(git:34ef472)
cp_fm_basic_linalg.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief basic linear algebra operations for full matrices
10 !> \par History
11 !> 08.2002 split out of qs_blacs [fawzi]
12 !> \author Fawzi Mohamed
13 ! **************************************************************************************************
15  USE cp_blacs_env, ONLY: cp_blacs_env_type
17  USE cp_fm_types, ONLY: &
19  cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
20  cp_fm_type
22  cp_to_string
23  USE kahan_sum, ONLY: accurate_dot_product, &
24  accurate_sum
25  USE kinds, ONLY: dp, &
26  int_8, &
27  sp
28  USE machine, ONLY: m_memory
29  USE mathlib, ONLY: get_pseudo_inverse_svd, &
30  invert_matrix
31  USE message_passing, ONLY: mp_comm_type
32 #include "../base/base_uses.f90"
33 
34  IMPLICIT NONE
35  PRIVATE
36 
37  LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .true.
38  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
39 
40  PUBLIC :: cp_fm_scale, & ! scale a matrix
41  cp_fm_scale_and_add, & ! scale and add two matrices
42  cp_fm_geadd, & ! general addition
43  cp_fm_column_scale, & ! scale columns of a matrix
44  cp_fm_row_scale, & ! scale rows of a matrix
45  cp_fm_trace, & ! trace of the transpose(A)*B
46  cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
47  cp_fm_norm, & ! different norms of A
48  cp_fm_schur_product, & ! schur product
49  cp_fm_transpose, & ! transpose a matrix
50  cp_fm_upper_to_full, & ! symmetrise an upper symmetric matrix
51  cp_fm_syrk, & ! rank k update
52  cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
53  cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
54  cp_fm_gemm, & ! multiply two matrices
55  cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
56  cp_fm_invert, & ! computes the inverse and determinant
57  cp_fm_frobenius_norm, & ! frobenius norm
58  cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
59  cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
60  cp_fm_solve, & ! solves the equation A*B=C A and C are input
61  cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
62  cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
63  cp_fm_potrf, & ! Cholesky decomposition
64  cp_fm_potri, & ! Invert triangular matrix
65  cp_fm_rot_rows, & ! rotates two rows
66  cp_fm_rot_cols, & ! rotates two columns
67  cp_fm_cholesky_restore, & ! apply Cholesky decomposition
68  cp_fm_gram_schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
69  cp_fm_det ! determinant of a real matrix with correct sign
70 
71  REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
72  REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
73 
74  INTERFACE cp_fm_trace
75  MODULE PROCEDURE cp_fm_trace_a0b0t0
76  MODULE PROCEDURE cp_fm_trace_a1b0t1_a
77  MODULE PROCEDURE cp_fm_trace_a1b0t1_p
78  MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
79  MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
80  MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
81  MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
82  END INTERFACE cp_fm_trace
83 
84  INTERFACE cp_fm_contracted_trace
85  MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
86  MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
87  MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
88  MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
89  END INTERFACE cp_fm_contracted_trace
90 CONTAINS
91 
92 ! **************************************************************************************************
93 !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
94 !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
95 ! **************************************************************************************************
96  SUBROUTINE cp_fm_det(matrix_a, det_a)
97 
98  TYPE(cp_fm_type), INTENT(IN) :: matrix_a
99  REAL(kind=dp), INTENT(OUT) :: det_a
100  REAL(kind=dp) :: determinant
101  TYPE(cp_fm_type) :: matrix_lu
102  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
103  INTEGER :: n, i, info, p
104  INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
105  REAL(kind=dp), DIMENSION(:), POINTER :: diag
106 
107 #if defined(__SCALAPACK)
108  INTEGER, EXTERNAL :: indxl2g
109  INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
110  INTEGER, DIMENSION(9) :: desca
111 #endif
112 
113  CALL cp_fm_create(matrix=matrix_lu, &
114  matrix_struct=matrix_a%matrix_struct, &
115  name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
116  CALL cp_fm_to_fm(matrix_a, matrix_lu)
117 
118  a => matrix_lu%local_data
119  n = matrix_lu%matrix_struct%nrow_global
120  ALLOCATE (ipivot(n))
121  ipivot(:) = 0
122  p = 0
123  ALLOCATE (diag(n))
124  diag(:) = 0.0_dp
125 #if defined(__SCALAPACK)
126  ! Use LU decomposition
127  desca(:) = matrix_lu%matrix_struct%descriptor(:)
128  CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
129  CALL cp_fm_get_diag(matrix_lu, diag)
130  determinant = product(diag)
131  myprow = matrix_lu%matrix_struct%context%mepos(1)
132  nprow = matrix_lu%matrix_struct%context%num_pe(1)
133  npcol = matrix_lu%matrix_struct%context%num_pe(2)
134  nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
135  nrow_block = matrix_lu%matrix_struct%nrow_block
136  DO irow_local = 1, nrow_local
137  i = indxl2g(irow_local, nrow_block, myprow, matrix_lu%matrix_struct%first_p_pos(1), nprow)
138  IF (ipivot(irow_local) /= i) p = p + 1
139  END DO
140  CALL matrix_lu%matrix_struct%para_env%sum(p)
141  ! very important fix
142  p = p/npcol
143 #else
144  CALL dgetrf(n, n, a, n, ipivot, info)
145  CALL cp_fm_get_diag(matrix_lu, diag)
146  determinant = product(diag)
147  DO i = 1, n
148  IF (ipivot(i) /= i) p = p + 1
149  END DO
150 #endif
151  DEALLOCATE (ipivot)
152  DEALLOCATE (diag)
153  CALL cp_fm_release(matrix_lu)
154  det_a = determinant*(-2*mod(p, 2) + 1.0_dp)
155  END SUBROUTINE cp_fm_det
156 
157 ! **************************************************************************************************
158 !> \brief calc A <- alpha*A + beta*B
159 !> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
160 !> scale A)
161 !> \param alpha ...
162 !> \param matrix_a ...
163 !> \param beta ...
164 !> \param matrix_b ...
165 ! **************************************************************************************************
166  SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
167 
168  REAL(kind=dp), INTENT(IN) :: alpha
169  TYPE(cp_fm_type), INTENT(IN) :: matrix_a
170  REAL(kind=dp), INTENT(in), OPTIONAL :: beta
171  TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
172 
173  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale_and_add'
174 
175  INTEGER :: handle, size_a, size_b
176  REAL(kind=dp) :: my_beta
177  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
178  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
179 
180  CALL timeset(routinen, handle)
181 
182  IF (PRESENT(matrix_b)) THEN
183  my_beta = 1.0_dp
184  ELSE
185  my_beta = 0.0_dp
186  END IF
187  IF (PRESENT(beta)) my_beta = beta
188  NULLIFY (a, b)
189 
190  IF (PRESENT(beta)) THEN
191  cpassert(PRESENT(matrix_b))
192  IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
193  cpwarn("Bad use of routine. Call cp_fm_scale instead")
194  CALL cp_fm_scale(alpha + beta, matrix_a)
195  CALL timestop(handle)
196  RETURN
197  END IF
198  END IF
199 
200  a => matrix_a%local_data
201  a_sp => matrix_a%local_data_sp
202 
203  IF (matrix_a%use_sp) THEN
204  size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
205  ELSE
206  size_a = SIZE(a, 1)*SIZE(a, 2)
207  END IF
208 
209  IF (alpha /= 1.0_dp) THEN
210  IF (matrix_a%use_sp) THEN
211  CALL sscal(size_a, real(alpha, sp), a_sp, 1)
212  ELSE
213  CALL dscal(size_a, alpha, a, 1)
214  END IF
215  END IF
216  IF (my_beta .NE. 0.0_dp) THEN
217  IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
218  cpabort("matrixes must be in the same blacs context")
219 
220  IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
221  matrix_b%matrix_struct)) THEN
222 
223  b => matrix_b%local_data
224  b_sp => matrix_b%local_data_sp
225  IF (matrix_b%use_sp) THEN
226  size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
227  ELSE
228  size_b = SIZE(b, 1)*SIZE(b, 2)
229  END IF
230  IF (size_a /= size_b) &
231  cpabort("Matrixes must have same locale sizes")
232 
233  IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
234  CALL saxpy(size_a, real(my_beta, sp), b_sp, 1, a_sp, 1)
235  ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
236  CALL saxpy(size_a, real(my_beta, sp), real(b, sp), 1, a_sp, 1)
237  ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
238  CALL daxpy(size_a, my_beta, real(b_sp, dp), 1, a, 1)
239  ELSE
240  CALL daxpy(size_a, my_beta, b, 1, a, 1)
241  END IF
242 
243  ELSE
244 #ifdef __SCALAPACK
245  cpabort("to do (pdscal,pdcopy,pdaxpy)")
246 #else
247  cpabort("")
248 #endif
249  END IF
250 
251  END IF
252 
253  CALL timestop(handle)
254 
255  END SUBROUTINE cp_fm_scale_and_add
256 
257 ! **************************************************************************************************
258 !> \brief interface to BLACS geadd:
259 !> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
260 !> where opt(matrix_a) can be either:
261 !> 'N': matrix_a
262 !> 'T': matrix_a^T
263 !> 'C': matrix_a^H (Hermitian conjugate)
264 !> note that this is a level three routine, use cp_fm_scale_and_add if that
265 !> is sufficient for your needs
266 !> \param alpha : complex scalar
267 !> \param trans : 'N' normal, 'T' transposed
268 !> \param matrix_a : input matrix_a
269 !> \param beta : complex scalar
270 !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
271 !> \author Lianheng Tong
272 ! **************************************************************************************************
273  SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
274  REAL(kind=dp), INTENT(IN) :: alpha, beta
275  CHARACTER, INTENT(IN) :: trans
276  TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
277 
278  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geadd'
279 
280  INTEGER :: nrow_global, ncol_global, handle
281  REAL(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
282 #if defined(__SCALAPACK)
283  INTEGER, DIMENSION(9) :: desca, descb
284 #else
285  INTEGER :: ii, jj
286 #endif
287 
288  CALL timeset(routinen, handle)
289 
290  nrow_global = matrix_a%matrix_struct%nrow_global
291  ncol_global = matrix_a%matrix_struct%ncol_global
292  cpassert(nrow_global .EQ. matrix_b%matrix_struct%nrow_global)
293  cpassert(ncol_global .EQ. matrix_b%matrix_struct%ncol_global)
294 
295  aa => matrix_a%local_data
296  bb => matrix_b%local_data
297 
298 #if defined(__SCALAPACK)
299  desca = matrix_a%matrix_struct%descriptor
300  descb = matrix_b%matrix_struct%descriptor
301  CALL pdgeadd(trans, &
302  nrow_global, &
303  ncol_global, &
304  alpha, &
305  aa, &
306  1, 1, &
307  desca, &
308  beta, &
309  bb, &
310  1, 1, &
311  descb)
312 #else
313  ! dgeadd is not a standard BLAS function, although is implemented
314  ! in some libraries like OpenBLAS, so not going to use it here
315  SELECT CASE (trans)
316  CASE ('T')
317  DO jj = 1, ncol_global
318  DO ii = 1, nrow_global
319  bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
320  END DO
321  END DO
322  CASE DEFAULT
323  DO jj = 1, ncol_global
324  DO ii = 1, nrow_global
325  bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
326  END DO
327  END DO
328  END SELECT
329 #endif
330 
331  CALL timestop(handle)
332 
333  END SUBROUTINE cp_fm_geadd
334 
335 ! **************************************************************************************************
336 !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
337 !> IMPORTANT : the sign of the determinant is not defined correctly yet ....
338 !> \param matrix_a ...
339 !> \param almost_determinant ...
340 !> \param correct_sign ...
341 !> \par History
342 !> added correct_sign 02.07 (fschiff)
343 !> \author Joost VandeVondele
344 !> \note
345 !> - matrix_a is overwritten
346 !> - the sign of the determinant might be wrong
347 !> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
348 !> - one should be able to find out if ipivot is an even or an odd permutation...
349 !> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
350 !> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
351 ! **************************************************************************************************
352  SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
353  TYPE(cp_fm_type), INTENT(IN) :: matrix_a
354  REAL(kind=dp), INTENT(OUT) :: almost_determinant
355  LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
356 
357  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_decompose'
358 
359  INTEGER :: handle, i, info, n
360  INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
361  REAL(kind=dp) :: determinant
362  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
363 #if defined(__SCALAPACK)
364  INTEGER, DIMENSION(9) :: desca
365  REAL(kind=dp), DIMENSION(:), POINTER :: diag
366 #else
367  INTEGER :: lda
368 #endif
369 
370  CALL timeset(routinen, handle)
371 
372  a => matrix_a%local_data
373  n = matrix_a%matrix_struct%nrow_global
374  ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
375 
376 #if defined(__SCALAPACK)
377  mark_used(correct_sign)
378  desca(:) = matrix_a%matrix_struct%descriptor(:)
379  CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
380 
381  ALLOCATE (diag(n))
382  diag(:) = 0.0_dp
383  CALL cp_fm_get_diag(matrix_a, diag)
384  determinant = 1.0_dp
385  DO i = 1, n
386  determinant = determinant*diag(i)
387  END DO
388  DEALLOCATE (diag)
389 #else
390  lda = SIZE(a, 1)
391  CALL dgetrf(n, n, a, lda, ipivot, info)
392  determinant = 1.0_dp
393  IF (correct_sign) THEN
394  DO i = 1, n
395  IF (ipivot(i) .NE. i) THEN
396  determinant = -determinant*a(i, i)
397  ELSE
398  determinant = determinant*a(i, i)
399  END IF
400  END DO
401  ELSE
402  DO i = 1, n
403  determinant = determinant*a(i, i)
404  END DO
405  END IF
406 #endif
407  ! info is allowed to be zero
408  ! this does just signal a zero diagonal element
409  DEALLOCATE (ipivot)
410  almost_determinant = determinant ! notice that the sign is random
411  CALL timestop(handle)
412  END SUBROUTINE
413 
414 ! **************************************************************************************************
415 !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
416 !> \param transa : 'N' -> normal 'T' -> transpose
417 !> alpha,beta :: can be 0.0_dp and 1.0_dp
418 !> \param transb ...
419 !> \param m ...
420 !> \param n ...
421 !> \param k ...
422 !> \param alpha ...
423 !> \param matrix_a : m x k matrix ( ! for transa = 'N')
424 !> \param matrix_b : k x n matrix ( ! for transb = 'N')
425 !> \param beta ...
426 !> \param matrix_c : m x n matrix
427 !> \param a_first_col ...
428 !> \param a_first_row ...
429 !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
430 !> \param b_first_row ...
431 !> \param c_first_col ...
432 !> \param c_first_row ...
433 !> \author Matthias Krack
434 !> \note
435 !> matrix_c should have no overlap with matrix_a, matrix_b
436 ! **************************************************************************************************
437  SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
438  matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
439  c_first_col, c_first_row)
440 
441  CHARACTER(LEN=1), INTENT(IN) :: transa, transb
442  INTEGER, INTENT(IN) :: m, n, k
443  REAL(kind=dp), INTENT(IN) :: alpha
444  TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
445  REAL(kind=dp), INTENT(IN) :: beta
446  TYPE(cp_fm_type), INTENT(IN) :: matrix_c
447  INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
448  b_first_col, b_first_row, &
449  c_first_col, c_first_row
450 
451  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_gemm'
452 
453  INTEGER :: handle, i_a, i_b, i_c, j_a, &
454  j_b, j_c
455  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
456  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
457 #if defined(__SCALAPACK)
458  INTEGER, DIMENSION(9) :: desca, descb, descc
459 #else
460  INTEGER :: lda, ldb, ldc
461 #endif
462 
463  CALL timeset(routinen, handle)
464 
465  !sample peak memory
466  CALL m_memory()
467 
468  a => matrix_a%local_data
469  b => matrix_b%local_data
470  c => matrix_c%local_data
471 
472  a_sp => matrix_a%local_data_sp
473  b_sp => matrix_b%local_data_sp
474  c_sp => matrix_c%local_data_sp
475 
476  IF (PRESENT(a_first_row)) THEN
477  i_a = a_first_row
478  ELSE
479  i_a = 1
480  END IF
481  IF (PRESENT(a_first_col)) THEN
482  j_a = a_first_col
483  ELSE
484  j_a = 1
485  END IF
486  IF (PRESENT(b_first_row)) THEN
487  i_b = b_first_row
488  ELSE
489  i_b = 1
490  END IF
491  IF (PRESENT(b_first_col)) THEN
492  j_b = b_first_col
493  ELSE
494  j_b = 1
495  END IF
496  IF (PRESENT(c_first_row)) THEN
497  i_c = c_first_row
498  ELSE
499  i_c = 1
500  END IF
501  IF (PRESENT(c_first_col)) THEN
502  j_c = c_first_col
503  ELSE
504  j_c = 1
505  END IF
506 
507 #if defined(__SCALAPACK)
508 
509  desca(:) = matrix_a%matrix_struct%descriptor(:)
510  descb(:) = matrix_b%matrix_struct%descriptor(:)
511  descc(:) = matrix_c%matrix_struct%descriptor(:)
512 
513  IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
514 
515  CALL psgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
516  descb, real(beta, sp), c_sp(1, 1), i_c, j_c, descc)
517 
518  ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
519 
520  CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
521  descb, beta, c, i_c, j_c, descc)
522 
523  ELSE
524  cpabort("Mixed precision gemm NYI")
525  END IF
526 #else
527 
528  IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
529 
530  lda = SIZE(a_sp, 1)
531  ldb = SIZE(b_sp, 1)
532  ldc = SIZE(c_sp, 1)
533 
534  CALL sgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
535  REAL(beta, sp), c_sp(i_c, j_c), ldc)
536 
537  ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
538 
539  lda = SIZE(a, 1)
540  ldb = SIZE(b, 1)
541  ldc = SIZE(c, 1)
542 
543  CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
544 
545  ELSE
546  cpabort("Mixed precision gemm NYI")
547  END IF
548 
549 #endif
550  CALL timestop(handle)
551 
552  END SUBROUTINE cp_fm_gemm
553 
554 ! **************************************************************************************************
555 !> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
556 !> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
557 !> where matrix_a is symmetric
558 !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
559 !> alpha,beta :: can be 0.0_dp and 1.0_dp
560 !> \param uplo ...
561 !> \param m ...
562 !> \param n ...
563 !> \param alpha ...
564 !> \param matrix_a : m x m matrix
565 !> \param matrix_b : m x n matrix
566 !> \param beta ...
567 !> \param matrix_c : m x n matrix
568 !> \author Matthias Krack
569 !> \note
570 !> matrix_c should have no overlap with matrix_a, matrix_b
571 !> all matrices in QS are upper triangular, so uplo should be 'U' always
572 !> matrix_a is always an m x m matrix
573 !> it is typically slower to do cp_fm_symm than cp_fm_gemm (especially in parallel easily 50 percent !)
574 ! **************************************************************************************************
575  SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
576 
577  CHARACTER(LEN=1), INTENT(IN) :: side, uplo
578  INTEGER, INTENT(IN) :: m, n
579  REAL(kind=dp), INTENT(IN) :: alpha
580  TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
581  REAL(kind=dp), INTENT(IN) :: beta
582  TYPE(cp_fm_type), INTENT(IN) :: matrix_c
583 
584  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_symm'
585 
586  INTEGER :: handle
587  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
588 #if defined(__SCALAPACK)
589  INTEGER, DIMENSION(9) :: desca, descb, descc
590 #else
591  INTEGER :: lda, ldb, ldc
592 #endif
593 
594  CALL timeset(routinen, handle)
595 
596  a => matrix_a%local_data
597  b => matrix_b%local_data
598  c => matrix_c%local_data
599 
600 #if defined(__SCALAPACK)
601 
602  desca(:) = matrix_a%matrix_struct%descriptor(:)
603  descb(:) = matrix_b%matrix_struct%descriptor(:)
604  descc(:) = matrix_c%matrix_struct%descriptor(:)
605 
606  CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
607 
608 #else
609 
610  lda = matrix_a%matrix_struct%local_leading_dimension
611  ldb = matrix_b%matrix_struct%local_leading_dimension
612  ldc = matrix_c%matrix_struct%local_leading_dimension
613 
614  CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
615 
616 #endif
617  CALL timestop(handle)
618 
619  END SUBROUTINE cp_fm_symm
620 
621 ! **************************************************************************************************
622 !> \brief computes the Frobenius norm of matrix_a
623 !> \brief computes the Frobenius norm of matrix_a
624 !> \param matrix_a : m x n matrix
625 !> \return ...
626 !> \author VW
627 ! **************************************************************************************************
628  FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
629  TYPE(cp_fm_type), INTENT(IN) :: matrix_a
630  REAL(kind=dp) :: norm
631 
632  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_frobenius_norm'
633 
634  INTEGER :: handle, size_a
635  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
636  REAL(kind=dp), EXTERNAL :: ddot
637 #if defined(__SCALAPACK)
638  TYPE(mp_comm_type) :: group
639 #endif
640 
641  CALL timeset(routinen, handle)
642 
643  norm = 0.0_dp
644  a => matrix_a%local_data
645  size_a = SIZE(a, 1)*SIZE(a, 2)
646  norm = ddot(size_a, a(1, 1), 1, a(1, 1), 1)
647 #if defined(__SCALAPACK)
648  group = matrix_a%matrix_struct%para_env
649  CALL group%sum(norm)
650 #endif
651  norm = sqrt(norm)
652 
653  CALL timestop(handle)
654 
655  END FUNCTION cp_fm_frobenius_norm
656 
657 ! **************************************************************************************************
658 !> \brief performs a rank-k update of a symmetric matrix_c
659 !> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
660 !> \param uplo : 'U' ('L')
661 !> \param trans : 'N' ('T')
662 !> \param k : number of cols to use in matrix_a
663 !> ia,ja :: 1,1 (could be used for selecting subblock of a)
664 !> \param alpha ...
665 !> \param matrix_a ...
666 !> \param ia ...
667 !> \param ja ...
668 !> \param beta ...
669 !> \param matrix_c ...
670 !> \author Matthias Krack
671 !> \note
672 !> In QS uplo should 'U' (upper part updated)
673 ! **************************************************************************************************
674  SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
675  CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
676  INTEGER, INTENT(IN) :: k
677  REAL(kind=dp), INTENT(IN) :: alpha
678  TYPE(cp_fm_type), INTENT(IN) :: matrix_a
679  INTEGER, INTENT(IN) :: ia, ja
680  REAL(kind=dp), INTENT(IN) :: beta
681  TYPE(cp_fm_type), INTENT(IN) :: matrix_c
682 
683  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_syrk'
684 
685  INTEGER :: handle, n
686  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
687 #if defined(__SCALAPACK)
688  INTEGER, DIMENSION(9) :: desca, descc
689 #else
690  INTEGER :: lda, ldc
691 #endif
692 
693  CALL timeset(routinen, handle)
694 
695  n = matrix_c%matrix_struct%nrow_global
696 
697  a => matrix_a%local_data
698  c => matrix_c%local_data
699 
700 #if defined(__SCALAPACK)
701 
702  desca(:) = matrix_a%matrix_struct%descriptor(:)
703  descc(:) = matrix_c%matrix_struct%descriptor(:)
704 
705  CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
706 
707 #else
708 
709  lda = SIZE(a, 1)
710  ldc = SIZE(c, 1)
711 
712  CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
713 
714 #endif
715  CALL timestop(handle)
716 
717  END SUBROUTINE cp_fm_syrk
718 
719 ! **************************************************************************************************
720 !> \brief computes the schur product of two matrices
721 !> c_ij = a_ij * b_ij
722 !> \param matrix_a ...
723 !> \param matrix_b ...
724 !> \param matrix_c ...
725 !> \author Joost VandeVondele
726 ! **************************************************************************************************
727  SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
728 
729  TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
730 
731  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_schur_product'
732 
733  INTEGER :: handle, icol_local, irow_local, mypcol, &
734  myprow, ncol_local, npcol, nprow, &
735  nrow_local
736  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
737  TYPE(cp_blacs_env_type), POINTER :: context
738 
739  CALL timeset(routinen, handle)
740 
741  context => matrix_a%matrix_struct%context
742  myprow = context%mepos(1)
743  mypcol = context%mepos(2)
744  nprow = context%num_pe(1)
745  npcol = context%num_pe(2)
746 
747  a => matrix_a%local_data
748  b => matrix_b%local_data
749  c => matrix_c%local_data
750 
751  nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
752  ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
753 
754  DO icol_local = 1, ncol_local
755  DO irow_local = 1, nrow_local
756  c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
757  END DO
758  END DO
759 
760  CALL timestop(handle)
761 
762  END SUBROUTINE cp_fm_schur_product
763 
764 ! **************************************************************************************************
765 !> \brief returns the trace of matrix_a^T matrix_b, i.e
766 !> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
767 !> \param matrix_a a matrix
768 !> \param matrix_b another matrix
769 !> \param trace ...
770 !> \par History
771 !> 11.06.2001 Creation (Matthias Krack)
772 !> 12.2002 added doc [fawzi]
773 !> \author Matthias Krack
774 !> \note
775 !> note the transposition of matrix_a!
776 ! **************************************************************************************************
777  SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
778 
779  TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
780  REAL(kind=dp), INTENT(OUT) :: trace
781 
782  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_trace_a0b0t0'
783 
784  INTEGER :: handle, mypcol, myprow, ncol_local, &
785  npcol, nprow, nrow_local
786  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
787  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
788  TYPE(cp_blacs_env_type), POINTER :: context
789  TYPE(mp_comm_type) :: group
790 
791  CALL timeset(routinen, handle)
792 
793  context => matrix_a%matrix_struct%context
794  myprow = context%mepos(1)
795  mypcol = context%mepos(2)
796  nprow = context%num_pe(1)
797  npcol = context%num_pe(2)
798 
799  group = matrix_a%matrix_struct%para_env
800 
801  a => matrix_a%local_data
802  b => matrix_b%local_data
803 
804  a_sp => matrix_a%local_data_sp
805  b_sp => matrix_b%local_data_sp
806 
807  nrow_local = min(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
808  ncol_local = min(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
809 
810  ! cries for an accurate_dot_product
811  IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
812  trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local)* &
813  b_sp(1:nrow_local, 1:ncol_local), dp))
814  ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
815  trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local), dp)* &
816  b(1:nrow_local, 1:ncol_local))
817  ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
818  trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
819  REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
820  ELSE
821  trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
822  b(1:nrow_local, 1:ncol_local))
823  END IF
824 
825  CALL group%sum(trace)
826 
827  CALL timestop(handle)
828 
829  END SUBROUTINE cp_fm_trace_a0b0t0
830 
831 
832 ! **************************************************************************************************
833 !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
834 !> \param matrix_a list of A matrices
835 !> \param matrix_b B matrix
836 !> \param trace computed traces
837 !> \par History
838 !> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
839 !> \note \parblock
840 !> Computing the trace requires collective communication between involved MPI processes
841 !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
842 !> the amount of time wasted in such synchronisation by performing one large collective
843 !> operation which involves all the matrices in question.
844 !>
845 !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
846 !> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
847 !> 'matrix_b' is a single matrix.
848 !> \endparblock
849 ! **************************************************************************************************
850  SUBROUTINE cp_fm_trace_a1b0t1_a (matrix_a, matrix_b, trace)
851  TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_a
852  TYPE(cp_fm_type), INTENT(IN) :: matrix_b
853  REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
854 
855  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_trace_a1b0t1_a'
856 
857  INTEGER :: handle, imatrix, n_matrices, &
858  ncols_local, nrows_local
859  LOGICAL :: use_sp_a, use_sp_b
860  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
861  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
862  TYPE(mp_comm_type) :: group
863 
864  CALL timeset(routinen, handle)
865 
866  n_matrices = SIZE(trace)
867  cpassert(SIZE(matrix_a) == n_matrices)
868 
869  CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
870  use_sp_b = matrix_b%use_sp
871 
872  IF (use_sp_b) THEN
873  ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
874  ELSE
875  ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
876  END IF
877 
878 !$OMP PARALLEL DO DEFAULT(NONE), &
879 !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
880 !$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
881 !$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
882 
883  DO imatrix = 1, n_matrices
884 
885  use_sp_a = matrix_a(imatrix) %use_sp
886 
887  ! assume that the matrices A(i) and B have identical shapes and distribution schemes
888  IF (use_sp_a .AND. use_sp_b) THEN
889  ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
890  trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
891  ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
892  ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
893  trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
894  ELSE
895  cpabort("Matrices A and B are of different types")
896  END IF
897  END DO
898 !$OMP END PARALLEL DO
899 
900  group = matrix_b%matrix_struct%para_env
901  CALL group%sum(trace)
902 
903  CALL timestop(handle)
904  END SUBROUTINE cp_fm_trace_a1b0t1_a
905  SUBROUTINE cp_fm_trace_a1b0t1_p (matrix_a, matrix_b, trace)
906  TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_a
907  TYPE(cp_fm_type), INTENT(IN) :: matrix_b
908  REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
909 
910  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_trace_a1b0t1_p'
911 
912  INTEGER :: handle, imatrix, n_matrices, &
913  ncols_local, nrows_local
914  LOGICAL :: use_sp_a, use_sp_b
915  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
916  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
917  TYPE(mp_comm_type) :: group
918 
919  CALL timeset(routinen, handle)
920 
921  n_matrices = SIZE(trace)
922  cpassert(SIZE(matrix_a) == n_matrices)
923 
924  CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
925  use_sp_b = matrix_b%use_sp
926 
927  IF (use_sp_b) THEN
928  ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
929  ELSE
930  ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
931  END IF
932 
933 !$OMP PARALLEL DO DEFAULT(NONE), &
934 !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
935 !$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
936 !$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
937 
938  DO imatrix = 1, n_matrices
939 
940  use_sp_a = matrix_a(imatrix) %matrix%use_sp
941 
942  ! assume that the matrices A(i) and B have identical shapes and distribution schemes
943  IF (use_sp_a .AND. use_sp_b) THEN
944  ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
945  trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
946  ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
947  ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
948  trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
949  ELSE
950  cpabort("Matrices A and B are of different types")
951  END IF
952  END DO
953 !$OMP END PARALLEL DO
954 
955  group = matrix_b%matrix_struct%para_env
956  CALL group%sum(trace)
957 
958  CALL timestop(handle)
959  END SUBROUTINE cp_fm_trace_a1b0t1_p
960 
961 ! **************************************************************************************************
962 !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
963 !> \param matrix_a list of A matrices
964 !> \param matrix_b list of B matrices
965 !> \param trace computed traces
966 !> \param accurate ...
967 !> \par History
968 !> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
969 !> \note \parblock
970 !> Computing the trace requires collective communication between involved MPI processes
971 !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
972 !> the amount of time wasted in such synchronisation by performing one large collective
973 !> operation which involves all the matrices in question.
974 !>
975 !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
976 !> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
977 !> \endparblock
978 ! **************************************************************************************************
979  SUBROUTINE cp_fm_trace_a1b1t1_aa (matrix_a, matrix_b, trace, accurate)
980  TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_a
981  TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_b
982  REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
983  LOGICAL, INTENT(IN), OPTIONAL :: accurate
984 
985  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_trace_a1b1t1_aa'
986 
987  INTEGER :: handle, imatrix, n_matrices, &
988  ncols_local, nrows_local
989  LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
990  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
991  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
992  TYPE(mp_comm_type) :: group
993 
994  CALL timeset(routinen, handle)
995 
996  n_matrices = SIZE(trace)
997  cpassert(SIZE(matrix_a) == n_matrices)
998  cpassert(SIZE(matrix_b) == n_matrices)
999 
1000  use_accurate_sum = .true.
1001  IF (PRESENT(accurate)) use_accurate_sum = accurate
1002 
1003 !$OMP PARALLEL DO DEFAULT(NONE), &
1004 !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1005 !$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1006 !$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1007  DO imatrix = 1, n_matrices
1008  CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1009 
1010  use_sp_a = matrix_a(imatrix) %use_sp
1011  use_sp_b = matrix_b(imatrix) %use_sp
1012 
1013  ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1014  IF (use_sp_a .AND. use_sp_b) THEN
1015  ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1016  ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1017  IF (use_accurate_sum) THEN
1018  trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1019  ELSE
1020  trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1021  END IF
1022  ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1023  ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1024  ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1025  IF (use_accurate_sum) THEN
1026  trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1027  ELSE
1028  trace(imatrix) = sum(ldata_a*ldata_b)
1029  END IF
1030  ELSE
1031  cpabort("Matrices A and B are of different types")
1032  END IF
1033  END DO
1034 !$OMP END PARALLEL DO
1035 
1036  group = matrix_a(1) %matrix_struct%para_env
1037  CALL group%sum(trace)
1038 
1039  CALL timestop(handle)
1040  END SUBROUTINE cp_fm_trace_a1b1t1_aa
1041  SUBROUTINE cp_fm_trace_a1b1t1_ap (matrix_a, matrix_b, trace, accurate)
1042  TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_a
1043  TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_b
1044  REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
1045  LOGICAL, INTENT(IN), OPTIONAL :: accurate
1046 
1047  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_trace_a1b1t1_ap'
1048 
1049  INTEGER :: handle, imatrix, n_matrices, &
1050  ncols_local, nrows_local
1051  LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1052  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1053  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1054  TYPE(mp_comm_type) :: group
1055 
1056  CALL timeset(routinen, handle)
1057 
1058  n_matrices = SIZE(trace)
1059  cpassert(SIZE(matrix_a) == n_matrices)
1060  cpassert(SIZE(matrix_b) == n_matrices)
1061 
1062  use_accurate_sum = .true.
1063  IF (PRESENT(accurate)) use_accurate_sum = accurate
1064 
1065 !$OMP PARALLEL DO DEFAULT(NONE), &
1066 !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1067 !$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1068 !$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1069  DO imatrix = 1, n_matrices
1070  CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1071 
1072  use_sp_a = matrix_a(imatrix) %use_sp
1073  use_sp_b = matrix_b(imatrix) %matrix%use_sp
1074 
1075  ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1076  IF (use_sp_a .AND. use_sp_b) THEN
1077  ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1078  ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1079  IF (use_accurate_sum) THEN
1080  trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1081  ELSE
1082  trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1083  END IF
1084  ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1085  ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1086  ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1087  IF (use_accurate_sum) THEN
1088  trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1089  ELSE
1090  trace(imatrix) = sum(ldata_a*ldata_b)
1091  END IF
1092  ELSE
1093  cpabort("Matrices A and B are of different types")
1094  END IF
1095  END DO
1096 !$OMP END PARALLEL DO
1097 
1098  group = matrix_a(1) %matrix_struct%para_env
1099  CALL group%sum(trace)
1100 
1101  CALL timestop(handle)
1102  END SUBROUTINE cp_fm_trace_a1b1t1_ap
1103  SUBROUTINE cp_fm_trace_a1b1t1_pa (matrix_a, matrix_b, trace, accurate)
1104  TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_a
1105  TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_b
1106  REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
1107  LOGICAL, INTENT(IN), OPTIONAL :: accurate
1108 
1109  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_trace_a1b1t1_pa'
1110 
1111  INTEGER :: handle, imatrix, n_matrices, &
1112  ncols_local, nrows_local
1113  LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1114  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1115  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1116  TYPE(mp_comm_type) :: group
1117 
1118  CALL timeset(routinen, handle)
1119 
1120  n_matrices = SIZE(trace)
1121  cpassert(SIZE(matrix_a) == n_matrices)
1122  cpassert(SIZE(matrix_b) == n_matrices)
1123 
1124  use_accurate_sum = .true.
1125  IF (PRESENT(accurate)) use_accurate_sum = accurate
1126 
1127 !$OMP PARALLEL DO DEFAULT(NONE), &
1128 !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1129 !$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1130 !$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1131  DO imatrix = 1, n_matrices
1132  CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1133 
1134  use_sp_a = matrix_a(imatrix) %matrix%use_sp
1135  use_sp_b = matrix_b(imatrix) %use_sp
1136 
1137  ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1138  IF (use_sp_a .AND. use_sp_b) THEN
1139  ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1140  ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1141  IF (use_accurate_sum) THEN
1142  trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1143  ELSE
1144  trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1145  END IF
1146  ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1147  ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1148  ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1149  IF (use_accurate_sum) THEN
1150  trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1151  ELSE
1152  trace(imatrix) = sum(ldata_a*ldata_b)
1153  END IF
1154  ELSE
1155  cpabort("Matrices A and B are of different types")
1156  END IF
1157  END DO
1158 !$OMP END PARALLEL DO
1159 
1160  group = matrix_a(1) %matrix%matrix_struct%para_env
1161  CALL group%sum(trace)
1162 
1163  CALL timestop(handle)
1164  END SUBROUTINE cp_fm_trace_a1b1t1_pa
1165  SUBROUTINE cp_fm_trace_a1b1t1_pp (matrix_a, matrix_b, trace, accurate)
1166  TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_a
1167  TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_b
1168  REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
1169  LOGICAL, INTENT(IN), OPTIONAL :: accurate
1170 
1171  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_trace_a1b1t1_pp'
1172 
1173  INTEGER :: handle, imatrix, n_matrices, &
1174  ncols_local, nrows_local
1175  LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1176  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1177  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1178  TYPE(mp_comm_type) :: group
1179 
1180  CALL timeset(routinen, handle)
1181 
1182  n_matrices = SIZE(trace)
1183  cpassert(SIZE(matrix_a) == n_matrices)
1184  cpassert(SIZE(matrix_b) == n_matrices)
1185 
1186  use_accurate_sum = .true.
1187  IF (PRESENT(accurate)) use_accurate_sum = accurate
1188 
1189 !$OMP PARALLEL DO DEFAULT(NONE), &
1190 !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1191 !$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1192 !$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1193  DO imatrix = 1, n_matrices
1194  CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1195 
1196  use_sp_a = matrix_a(imatrix) %matrix%use_sp
1197  use_sp_b = matrix_b(imatrix) %matrix%use_sp
1198 
1199  ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1200  IF (use_sp_a .AND. use_sp_b) THEN
1201  ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1202  ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1203  IF (use_accurate_sum) THEN
1204  trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1205  ELSE
1206  trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1207  END IF
1208  ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1209  ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1210  ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1211  IF (use_accurate_sum) THEN
1212  trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1213  ELSE
1214  trace(imatrix) = sum(ldata_a*ldata_b)
1215  END IF
1216  ELSE
1217  cpabort("Matrices A and B are of different types")
1218  END IF
1219  END DO
1220 !$OMP END PARALLEL DO
1221 
1222  group = matrix_a(1) %matrix%matrix_struct%para_env
1223  CALL group%sum(trace)
1224 
1225  CALL timestop(handle)
1226  END SUBROUTINE cp_fm_trace_a1b1t1_pp
1227 
1228 ! **************************************************************************************************
1229 !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
1230 !> \param matrix_a list of A matrices
1231 !> \param matrix_b list of B matrices
1232 !> \param trace computed traces
1233 !> \param accurate ...
1234 ! **************************************************************************************************
1235  SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa (matrix_a, matrix_b, trace, accurate)
1236  TYPE(cp_fm_type), DIMENSION(:, :), INTENT(in) :: matrix_a
1237  TYPE(cp_fm_type), DIMENSION(:, :), INTENT(in) :: matrix_b
1238  REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace
1239  LOGICAL, INTENT(IN), OPTIONAL :: accurate
1240 
1241  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_contracted_trace_a2b2t2_aa'
1242 
1243  INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1244  nrows_local, nz
1245  INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1246  LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1247  REAL(kind=dp) :: t
1248  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1249  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1250  TYPE(mp_comm_type) :: group
1251 
1252  CALL timeset(routinen, handle)
1253 
1254  nz = SIZE(matrix_a, 1)
1255  cpassert(SIZE(matrix_b, 1) == nz)
1256 
1257  na = SIZE(matrix_a, 2)
1258  nb = SIZE(matrix_b, 2)
1259  cpassert(SIZE(trace, 1) == na)
1260  cpassert(SIZE(trace, 2) == nb)
1261 
1262  use_accurate_sum = .true.
1263  IF (PRESENT(accurate)) use_accurate_sum = accurate
1264 
1265  ! here we use one running index (itrace) instead of two (ia, ib) in order to
1266  ! improve load balance between shared-memory threads
1267  ntraces = na*nb
1268  na8 = int(na, kind=int_8)
1269 
1270 !$OMP PARALLEL DO DEFAULT(NONE), &
1271 !$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1272 !$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1273 !$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1274  DO itrace = 1, ntraces
1275  ib8 = (itrace - 1)/na8
1276  ia = int(itrace - ib8*na8)
1277  ib = int(ib8) + 1
1278 
1279  t = 0.0_dp
1280  DO iz = 1, nz
1281  CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1282  use_sp_a = matrix_a(iz, ia) %use_sp
1283  use_sp_b = matrix_b(iz, ib) %use_sp
1284 
1285  ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1286  IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1287  ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1288  ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1289  IF (use_accurate_sum) THEN
1290  t = t + accurate_dot_product(ldata_a, ldata_b)
1291  ELSE
1292  t = t + sum(ldata_a*ldata_b)
1293  END IF
1294  ELSE IF (use_sp_a .AND. use_sp_b) THEN
1295  ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1296  ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1297  IF (use_accurate_sum) THEN
1298  t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1299  ELSE
1300  t = t + sum(ldata_a_sp*ldata_b_sp)
1301  END IF
1302  ELSE
1303  cpabort("Matrices A and B are of different types")
1304  END IF
1305  END DO
1306  trace(ia, ib) = t
1307  END DO
1308 !$OMP END PARALLEL DO
1309 
1310  group = matrix_a(1, 1) %matrix_struct%para_env
1311  CALL group%sum(trace)
1312 
1313  CALL timestop(handle)
1314  END SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa
1315  SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap (matrix_a, matrix_b, trace, accurate)
1316  TYPE(cp_fm_type), DIMENSION(:, :), INTENT(in) :: matrix_a
1317  TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in) :: matrix_b
1318  REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace
1319  LOGICAL, INTENT(IN), OPTIONAL :: accurate
1320 
1321  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_contracted_trace_a2b2t2_ap'
1322 
1323  INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1324  nrows_local, nz
1325  INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1326  LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1327  REAL(kind=dp) :: t
1328  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1329  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1330  TYPE(mp_comm_type) :: group
1331 
1332  CALL timeset(routinen, handle)
1333 
1334  nz = SIZE(matrix_a, 1)
1335  cpassert(SIZE(matrix_b, 1) == nz)
1336 
1337  na = SIZE(matrix_a, 2)
1338  nb = SIZE(matrix_b, 2)
1339  cpassert(SIZE(trace, 1) == na)
1340  cpassert(SIZE(trace, 2) == nb)
1341 
1342  use_accurate_sum = .true.
1343  IF (PRESENT(accurate)) use_accurate_sum = accurate
1344 
1345  ! here we use one running index (itrace) instead of two (ia, ib) in order to
1346  ! improve load balance between shared-memory threads
1347  ntraces = na*nb
1348  na8 = int(na, kind=int_8)
1349 
1350 !$OMP PARALLEL DO DEFAULT(NONE), &
1351 !$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1352 !$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1353 !$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1354  DO itrace = 1, ntraces
1355  ib8 = (itrace - 1)/na8
1356  ia = int(itrace - ib8*na8)
1357  ib = int(ib8) + 1
1358 
1359  t = 0.0_dp
1360  DO iz = 1, nz
1361  CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1362  use_sp_a = matrix_a(iz, ia) %use_sp
1363  use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1364 
1365  ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1366  IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1367  ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1368  ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1369  IF (use_accurate_sum) THEN
1370  t = t + accurate_dot_product(ldata_a, ldata_b)
1371  ELSE
1372  t = t + sum(ldata_a*ldata_b)
1373  END IF
1374  ELSE IF (use_sp_a .AND. use_sp_b) THEN
1375  ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1376  ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1377  IF (use_accurate_sum) THEN
1378  t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1379  ELSE
1380  t = t + sum(ldata_a_sp*ldata_b_sp)
1381  END IF
1382  ELSE
1383  cpabort("Matrices A and B are of different types")
1384  END IF
1385  END DO
1386  trace(ia, ib) = t
1387  END DO
1388 !$OMP END PARALLEL DO
1389 
1390  group = matrix_a(1, 1) %matrix_struct%para_env
1391  CALL group%sum(trace)
1392 
1393  CALL timestop(handle)
1394  END SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap
1395  SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa (matrix_a, matrix_b, trace, accurate)
1396  TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in) :: matrix_a
1397  TYPE(cp_fm_type), DIMENSION(:, :), INTENT(in) :: matrix_b
1398  REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace
1399  LOGICAL, INTENT(IN), OPTIONAL :: accurate
1400 
1401  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_contracted_trace_a2b2t2_pa'
1402 
1403  INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1404  nrows_local, nz
1405  INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1406  LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1407  REAL(kind=dp) :: t
1408  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1409  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1410  TYPE(mp_comm_type) :: group
1411 
1412  CALL timeset(routinen, handle)
1413 
1414  nz = SIZE(matrix_a, 1)
1415  cpassert(SIZE(matrix_b, 1) == nz)
1416 
1417  na = SIZE(matrix_a, 2)
1418  nb = SIZE(matrix_b, 2)
1419  cpassert(SIZE(trace, 1) == na)
1420  cpassert(SIZE(trace, 2) == nb)
1421 
1422  use_accurate_sum = .true.
1423  IF (PRESENT(accurate)) use_accurate_sum = accurate
1424 
1425  ! here we use one running index (itrace) instead of two (ia, ib) in order to
1426  ! improve load balance between shared-memory threads
1427  ntraces = na*nb
1428  na8 = int(na, kind=int_8)
1429 
1430 !$OMP PARALLEL DO DEFAULT(NONE), &
1431 !$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1432 !$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1433 !$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1434  DO itrace = 1, ntraces
1435  ib8 = (itrace - 1)/na8
1436  ia = int(itrace - ib8*na8)
1437  ib = int(ib8) + 1
1438 
1439  t = 0.0_dp
1440  DO iz = 1, nz
1441  CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1442  use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1443  use_sp_b = matrix_b(iz, ib) %use_sp
1444 
1445  ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1446  IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1447  ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1448  ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1449  IF (use_accurate_sum) THEN
1450  t = t + accurate_dot_product(ldata_a, ldata_b)
1451  ELSE
1452  t = t + sum(ldata_a*ldata_b)
1453  END IF
1454  ELSE IF (use_sp_a .AND. use_sp_b) THEN
1455  ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1456  ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1457  IF (use_accurate_sum) THEN
1458  t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1459  ELSE
1460  t = t + sum(ldata_a_sp*ldata_b_sp)
1461  END IF
1462  ELSE
1463  cpabort("Matrices A and B are of different types")
1464  END IF
1465  END DO
1466  trace(ia, ib) = t
1467  END DO
1468 !$OMP END PARALLEL DO
1469 
1470  group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1471  CALL group%sum(trace)
1472 
1473  CALL timestop(handle)
1474  END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa
1475  SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp (matrix_a, matrix_b, trace, accurate)
1476  TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in) :: matrix_a
1477  TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in) :: matrix_b
1478  REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace
1479  LOGICAL, INTENT(IN), OPTIONAL :: accurate
1480 
1481  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_contracted_trace_a2b2t2_pp'
1482 
1483  INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1484  nrows_local, nz
1485  INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1486  LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1487  REAL(kind=dp) :: t
1488  REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1489  REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1490  TYPE(mp_comm_type) :: group
1491 
1492  CALL timeset(routinen, handle)
1493 
1494  nz = SIZE(matrix_a, 1)
1495  cpassert(SIZE(matrix_b, 1) == nz)
1496 
1497  na = SIZE(matrix_a, 2)
1498  nb = SIZE(matrix_b, 2)
1499  cpassert(SIZE(trace, 1) == na)
1500  cpassert(SIZE(trace, 2) == nb)
1501 
1502  use_accurate_sum = .true.
1503  IF (PRESENT(accurate)) use_accurate_sum = accurate
1504 
1505  ! here we use one running index (itrace) instead of two (ia, ib) in order to
1506  ! improve load balance between shared-memory threads
1507  ntraces = na*nb
1508  na8 = int(na, kind=int_8)
1509 
1510 !$OMP PARALLEL DO DEFAULT(NONE), &
1511 !$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1512 !$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1513 !$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1514  DO itrace = 1, ntraces
1515  ib8 = (itrace - 1)/na8
1516  ia = int(itrace - ib8*na8)
1517  ib = int(ib8) + 1
1518 
1519  t = 0.0_dp
1520  DO iz = 1, nz
1521  CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1522  use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1523  use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1524 
1525  ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1526  IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1527  ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1528  ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1529  IF (use_accurate_sum) THEN
1530  t = t + accurate_dot_product(ldata_a, ldata_b)
1531  ELSE
1532  t = t + sum(ldata_a*ldata_b)
1533  END IF
1534  ELSE IF (use_sp_a .AND. use_sp_b) THEN
1535  ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1536  ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1537  IF (use_accurate_sum) THEN
1538  t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1539  ELSE
1540  t = t + sum(ldata_a_sp*ldata_b_sp)
1541  END IF
1542  ELSE
1543  cpabort("Matrices A and B are of different types")
1544  END IF
1545  END DO
1546  trace(ia, ib) = t
1547  END DO
1548 !$OMP END PARALLEL DO
1549 
1550  group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1551  CALL group%sum(trace)
1552 
1553  CALL timestop(handle)
1554  END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp
1555 
1556 ! **************************************************************************************************
1557 !> \brief multiplies in place by a triangular matrix:
1558 !> matrix_b = alpha op(triangular_matrix) matrix_b
1559 !> or (if side='R')
1560 !> matrix_b = alpha matrix_b op(triangular_matrix)
1561 !> op(triangular_matrix) is:
1562 !> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1563 !> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1564 !> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1565 !> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1566 !> \param triangular_matrix the triangular matrix that multiplies the other
1567 !> \param matrix_b the matrix that gets multiplied and stores the result
1568 !> \param side on which side of matrix_b stays op(triangular_matrix)
1569 !> (defaults to 'L')
1570 !> \param transpose_tr if the triangular matrix should be transposed
1571 !> (defaults to false)
1572 !> \param invert_tr if the triangular matrix should be inverted
1573 !> (defaults to false)
1574 !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1575 !> lower ('L') triangle (defaults to 'U')
1576 !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1577 !> be assumed to be 1 (defaults to false)
1578 !> \param n_rows the number of rows of the result (defaults to
1579 !> size(matrix_b,1))
1580 !> \param n_cols the number of columns of the result (defaults to
1581 !> size(matrix_b,2))
1582 !> \param alpha ...
1583 !> \par History
1584 !> 08.2002 created [fawzi]
1585 !> \author Fawzi Mohamed
1586 !> \note
1587 !> needs an mpi env
1588 ! **************************************************************************************************
1589  SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1590  transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1591  alpha)
1592  TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1593  CHARACTER, INTENT(in), OPTIONAL :: side
1594  LOGICAL, INTENT(in), OPTIONAL :: transpose_tr, invert_tr
1595  CHARACTER, INTENT(in), OPTIONAL :: uplo_tr
1596  LOGICAL, INTENT(in), OPTIONAL :: unit_diag_tr
1597  INTEGER, INTENT(in), OPTIONAL :: n_rows, n_cols
1598  REAL(kind=dp), INTENT(in), OPTIONAL :: alpha
1599 
1600  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_triangular_multiply'
1601 
1602  CHARACTER :: side_char, transa, unit_diag, uplo
1603  INTEGER :: handle, m, n
1604  LOGICAL :: invert
1605  REAL(kind=dp) :: al
1606 
1607  CALL timeset(routinen, handle)
1608  side_char = 'L'
1609  unit_diag = 'N'
1610  uplo = 'U'
1611  transa = 'N'
1612  invert = .false.
1613  al = 1.0_dp
1614  CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1615  IF (PRESENT(side)) side_char = side
1616  IF (PRESENT(invert_tr)) invert = invert_tr
1617  IF (PRESENT(uplo_tr)) uplo = uplo_tr
1618  IF (PRESENT(unit_diag_tr)) THEN
1619  IF (unit_diag_tr) THEN
1620  unit_diag = 'U'
1621  ELSE
1622  unit_diag = 'N'
1623  END IF
1624  END IF
1625  IF (PRESENT(transpose_tr)) THEN
1626  IF (transpose_tr) THEN
1627  transa = 'T'
1628  ELSE
1629  transa = 'N'
1630  END IF
1631  END IF
1632  IF (PRESENT(alpha)) al = alpha
1633  IF (PRESENT(n_rows)) m = n_rows
1634  IF (PRESENT(n_cols)) n = n_cols
1635 
1636  IF (invert) THEN
1637 
1638 #if defined(__SCALAPACK)
1639  CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1640  triangular_matrix%local_data(1, 1), 1, 1, &
1641  triangular_matrix%matrix_struct%descriptor, &
1642  matrix_b%local_data(1, 1), 1, 1, &
1643  matrix_b%matrix_struct%descriptor(1))
1644 #else
1645  CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1646  triangular_matrix%local_data(1, 1), &
1647  SIZE(triangular_matrix%local_data, 1), &
1648  matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1649 #endif
1650 
1651  ELSE
1652 
1653 #if defined(__SCALAPACK)
1654  CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1655  triangular_matrix%local_data(1, 1), 1, 1, &
1656  triangular_matrix%matrix_struct%descriptor, &
1657  matrix_b%local_data(1, 1), 1, 1, &
1658  matrix_b%matrix_struct%descriptor(1))
1659 #else
1660  CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1661  triangular_matrix%local_data(1, 1), &
1662  SIZE(triangular_matrix%local_data, 1), &
1663  matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1664 #endif
1665 
1666  END IF
1667 
1668  CALL timestop(handle)
1669  END SUBROUTINE cp_fm_triangular_multiply
1670 
1671 ! **************************************************************************************************
1672 !> \brief scales a matrix
1673 !> matrix_a = alpha * matrix_b
1674 !> \param alpha ...
1675 !> \param matrix_a ...
1676 !> \note
1677 !> use cp_fm_set_all to zero (avoids problems with nan)
1678 ! **************************************************************************************************
1679  SUBROUTINE cp_fm_scale(alpha, matrix_a)
1680  REAL(kind=dp), INTENT(IN) :: alpha
1681  TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1682 
1683  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale'
1684 
1685  INTEGER :: handle, size_a
1686  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1687 
1688  CALL timeset(routinen, handle)
1689 
1690  NULLIFY (a)
1691 
1692  a => matrix_a%local_data
1693  size_a = SIZE(a, 1)*SIZE(a, 2)
1694 
1695  CALL dscal(size_a, alpha, a, 1)
1696 
1697  CALL timestop(handle)
1698 
1699  END SUBROUTINE cp_fm_scale
1700 
1701 ! **************************************************************************************************
1702 !> \brief transposes a matrix
1703 !> matrixt = matrix ^ T
1704 !> \param matrix ...
1705 !> \param matrixt ...
1706 !> \note
1707 !> all matrix elements are transposed (see cp_fm_upper_to_half to symmetrise a matrix)
1708 ! **************************************************************************************************
1709  SUBROUTINE cp_fm_transpose(matrix, matrixt)
1710  TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1711 
1712  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_transpose'
1713 
1714  INTEGER :: handle, ncol_global, &
1715  nrow_global, ncol_globalt, nrow_globalt
1716  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
1717 #if defined(__SCALAPACK)
1718  INTEGER, DIMENSION(9) :: desca, descc
1719 #else
1720  INTEGER :: i, j
1721 #endif
1722 
1723  nrow_global = matrix%matrix_struct%nrow_global
1724  ncol_global = matrix%matrix_struct%ncol_global
1725  nrow_globalt = matrixt%matrix_struct%nrow_global
1726  ncol_globalt = matrixt%matrix_struct%ncol_global
1727  cpassert(nrow_global == ncol_globalt)
1728  cpassert(nrow_globalt == ncol_global)
1729 
1730  CALL timeset(routinen, handle)
1731 
1732  a => matrix%local_data
1733  c => matrixt%local_data
1734 
1735 #if defined(__SCALAPACK)
1736  desca(:) = matrix%matrix_struct%descriptor(:)
1737  descc(:) = matrixt%matrix_struct%descriptor(:)
1738  CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1739 #else
1740  DO j = 1, ncol_global
1741  DO i = 1, nrow_global
1742  c(j, i) = a(i, j)
1743  END DO
1744  END DO
1745 #endif
1746  CALL timestop(handle)
1747 
1748  END SUBROUTINE cp_fm_transpose
1749 
1750 ! **************************************************************************************************
1751 !> \brief given an upper triangular matrix computes the corresponding full matrix
1752 !> \param matrix the upper triangular matrix as input, the full matrix as output
1753 !> \param work a matrix of the same size as matrix
1754 !> \author Matthias Krack
1755 !> \note
1756 !> the lower triangular part is irrelevant
1757 ! **************************************************************************************************
1758  SUBROUTINE cp_fm_upper_to_full(matrix, work)
1759 
1760  TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1761 
1762  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_upper_to_full'
1763 
1764  INTEGER :: handle, icol_global, irow_global, &
1765  mypcol, myprow, ncol_global, &
1766  npcol, nprow, nrow_global
1767  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1768  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1769  TYPE(cp_blacs_env_type), POINTER :: context
1770 
1771 #if defined(__SCALAPACK)
1772  INTEGER :: icol_local, irow_local, &
1773  ncol_block, ncol_local, &
1774  nrow_block, nrow_local
1775  INTEGER, DIMENSION(9) :: desca, descc
1776  INTEGER, EXTERNAL :: indxl2g
1777  REAL(kind=dp), DIMENSION(:, :), POINTER :: c
1778  REAL(kind=sp), DIMENSION(:, :), POINTER :: c_sp
1779 #endif
1780 
1781  nrow_global = matrix%matrix_struct%nrow_global
1782  ncol_global = matrix%matrix_struct%ncol_global
1783  cpassert(nrow_global == ncol_global)
1784  nrow_global = work%matrix_struct%nrow_global
1785  ncol_global = work%matrix_struct%ncol_global
1786  cpassert(nrow_global == ncol_global)
1787  cpassert(matrix%use_sp .EQV. work%use_sp)
1788 
1789  CALL timeset(routinen, handle)
1790 
1791  context => matrix%matrix_struct%context
1792  myprow = context%mepos(1)
1793  mypcol = context%mepos(2)
1794  nprow = context%num_pe(1)
1795  npcol = context%num_pe(2)
1796 
1797 #if defined(__SCALAPACK)
1798 
1799  nrow_block = matrix%matrix_struct%nrow_block
1800  ncol_block = matrix%matrix_struct%ncol_block
1801 
1802  nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1803  ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1804 
1805  a => work%local_data
1806  a_sp => work%local_data_sp
1807  desca(:) = work%matrix_struct%descriptor(:)
1808  c => matrix%local_data
1809  c_sp => matrix%local_data_sp
1810  descc(:) = matrix%matrix_struct%descriptor(:)
1811 
1812  DO icol_local = 1, ncol_local
1813  icol_global = indxl2g(icol_local, ncol_block, mypcol, &
1814  matrix%matrix_struct%first_p_pos(2), npcol)
1815  DO irow_local = 1, nrow_local
1816  irow_global = indxl2g(irow_local, nrow_block, myprow, &
1817  matrix%matrix_struct%first_p_pos(1), nprow)
1818  IF (irow_global > icol_global) THEN
1819  IF (matrix%use_sp) THEN
1820  c_sp(irow_local, icol_local) = 0.0_sp
1821  ELSE
1822  c(irow_local, icol_local) = 0.0_dp
1823  END IF
1824  ELSE IF (irow_global == icol_global) THEN
1825  IF (matrix%use_sp) THEN
1826  c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1827  ELSE
1828  c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1829  END IF
1830  END IF
1831  END DO
1832  END DO
1833 
1834  DO icol_local = 1, ncol_local
1835  DO irow_local = 1, nrow_local
1836  IF (matrix%use_sp) THEN
1837  a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1838  ELSE
1839  a(irow_local, icol_local) = c(irow_local, icol_local)
1840  END IF
1841  END DO
1842  END DO
1843 
1844  IF (matrix%use_sp) THEN
1845  CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1846  ELSE
1847  CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1848  END IF
1849 
1850 #else
1851 
1852  a => matrix%local_data
1853  a_sp => matrix%local_data_sp
1854  DO irow_global = 1, nrow_global
1855  DO icol_global = irow_global + 1, ncol_global
1856  IF (matrix%use_sp) THEN
1857  a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1858  ELSE
1859  a(icol_global, irow_global) = a(irow_global, icol_global)
1860  END IF
1861  END DO
1862  END DO
1863 
1864 #endif
1865  CALL timestop(handle)
1866 
1867  END SUBROUTINE cp_fm_upper_to_full
1868 
1869 ! **************************************************************************************************
1870 !> \brief scales column i of matrix a with scaling(i)
1871 !> \param matrixa ...
1872 !> \param scaling : an array used for scaling the columns,
1873 !> SIZE(scaling) determines the number of columns to be scaled
1874 !> \author Joost VandeVondele
1875 !> \note
1876 !> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1877 !> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1878 !> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1879 !> where every vector has a different prefactor
1880 ! **************************************************************************************************
1881  SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1882  TYPE(cp_fm_type), INTENT(IN) :: matrixa
1883  REAL(kind=dp), DIMENSION(:), INTENT(in) :: scaling
1884 
1885  INTEGER :: k, mypcol, myprow, n, ncol_global, &
1886  npcol, nprow
1887  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1888  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1889 #if defined(__SCALAPACK)
1890  INTEGER :: icol_global, icol_local, &
1891  ipcol, iprow, irow_local
1892 #else
1893  INTEGER :: i
1894 #endif
1895 
1896  myprow = matrixa%matrix_struct%context%mepos(1)
1897  mypcol = matrixa%matrix_struct%context%mepos(2)
1898  nprow = matrixa%matrix_struct%context%num_pe(1)
1899  npcol = matrixa%matrix_struct%context%num_pe(2)
1900 
1901  ncol_global = matrixa%matrix_struct%ncol_global
1902 
1903  a => matrixa%local_data
1904  a_sp => matrixa%local_data_sp
1905  IF (matrixa%use_sp) THEN
1906  n = SIZE(a_sp, 1)
1907  ELSE
1908  n = SIZE(a, 1)
1909  END IF
1910  k = min(SIZE(scaling), ncol_global)
1911 
1912 #if defined(__SCALAPACK)
1913 
1914  DO icol_global = 1, k
1915  CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1916  nprow, npcol, myprow, mypcol, &
1917  irow_local, icol_local, iprow, ipcol)
1918  IF ((ipcol == mypcol)) THEN
1919  IF (matrixa%use_sp) THEN
1920  CALL sscal(n, real(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1921  ELSE
1922  CALL dscal(n, scaling(icol_global), a(:, icol_local), 1)
1923  END IF
1924  END IF
1925  END DO
1926 #else
1927  DO i = 1, k
1928  IF (matrixa%use_sp) THEN
1929  CALL sscal(n, real(scaling(i), sp), a_sp(:, i), 1)
1930  ELSE
1931  CALL dscal(n, scaling(i), a(:, i), 1)
1932  END IF
1933  END DO
1934 #endif
1935  END SUBROUTINE cp_fm_column_scale
1936 
1937 ! **************************************************************************************************
1938 !> \brief scales row i of matrix a with scaling(i)
1939 !> \param matrixa ...
1940 !> \param scaling : an array used for scaling the columns,
1941 !> \author JGH
1942 !> \note
1943 ! **************************************************************************************************
1944  SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1945  TYPE(cp_fm_type), INTENT(IN) :: matrixa
1946  REAL(kind=dp), DIMENSION(:), INTENT(in) :: scaling
1947 
1948  INTEGER :: n, m, nrow_global, nrow_local, ncol_local
1949  INTEGER, DIMENSION(:), POINTER :: row_indices
1950  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1951  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1952 #if defined(__SCALAPACK)
1953  INTEGER :: irow_global, icol, irow
1954 #else
1955  INTEGER :: j
1956 #endif
1957 
1958  CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1959  nrow_local=nrow_local, ncol_local=ncol_local)
1960  cpassert(SIZE(scaling) == nrow_global)
1961 
1962  a => matrixa%local_data
1963  a_sp => matrixa%local_data_sp
1964  IF (matrixa%use_sp) THEN
1965  n = SIZE(a_sp, 1)
1966  m = SIZE(a_sp, 2)
1967  ELSE
1968  n = SIZE(a, 1)
1969  m = SIZE(a, 2)
1970  END IF
1971 
1972 #if defined(__SCALAPACK)
1973  DO icol = 1, ncol_local
1974  IF (matrixa%use_sp) THEN
1975  DO irow = 1, nrow_local
1976  irow_global = row_indices(irow)
1977  a(irow, icol) = real(scaling(irow_global), dp)*a(irow, icol)
1978  END DO
1979  ELSE
1980  DO irow = 1, nrow_local
1981  irow_global = row_indices(irow)
1982  a(irow, icol) = scaling(irow_global)*a(irow, icol)
1983  END DO
1984  END IF
1985  END DO
1986 #else
1987  IF (matrixa%use_sp) THEN
1988  DO j = 1, m
1989  a_sp(1:n, j) = real(scaling(1:n), sp)*a_sp(1:n, j)
1990  END DO
1991  ELSE
1992  DO j = 1, m
1993  a(1:n, j) = scaling(1:n)*a(1:n, j)
1994  END DO
1995  END IF
1996 #endif
1997  END SUBROUTINE cp_fm_row_scale
1998 ! **************************************************************************************************
1999 !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
2000 !> \param matrix_a the matrix to invert
2001 !> \param matrix_inverse the inverse of matrix_a
2002 !> \param det_a the determinant of matrix_a
2003 !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
2004 !> are screened
2005 !> \param eigval optionally return matrix eigenvalues/singular values
2006 !> \par History
2007 !> note of Jan Wilhelm (12.2015)
2008 !> - computation of determinant corrected
2009 !> - determinant only computed if det_a is present
2010 !> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
2011 !> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
2012 !> \author Florian Schiffmann(02.2007)
2013 ! **************************************************************************************************
2014  SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
2015 
2016  TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
2017  REAL(kind=dp), INTENT(OUT), OPTIONAL :: det_a
2018  REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_svd
2019  REAL(kind=dp), DIMENSION(:), POINTER, &
2020  INTENT(INOUT), OPTIONAL :: eigval
2021 
2022  INTEGER :: n
2023  INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2024  REAL(kind=dp) :: determinant, my_eps_svd
2025  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2026  TYPE(cp_fm_type) :: matrix_lu
2027 
2028 #if defined(__SCALAPACK)
2029  TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
2030  TYPE(mp_comm_type) :: group
2031  INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
2032  INTEGER, DIMENSION(9) :: desca
2033  LOGICAL :: quenched
2034  REAL(kind=dp) :: alpha, beta
2035  REAL(kind=dp), DIMENSION(:), POINTER :: diag
2036  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: work
2037 #else
2038  LOGICAL :: sign
2039  REAL(kind=dp) :: eps1
2040 #endif
2041 
2042  my_eps_svd = 0.0_dp
2043  IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
2044 
2045  CALL cp_fm_create(matrix=matrix_lu, &
2046  matrix_struct=matrix_a%matrix_struct, &
2047  name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
2048  CALL cp_fm_to_fm(matrix_a, matrix_lu)
2049 
2050  a => matrix_lu%local_data
2051  n = matrix_lu%matrix_struct%nrow_global
2052  ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2053  ipivot(:) = 0
2054 #if defined(__SCALAPACK)
2055  IF (my_eps_svd .EQ. 0.0_dp) THEN
2056  ! Use LU decomposition
2057  lwork = 3*n
2058  liwork = 3*n
2059  desca(:) = matrix_lu%matrix_struct%descriptor(:)
2060  CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2061 
2062  IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
2063 
2064  ALLOCATE (diag(n))
2065  diag(:) = 0.0_dp
2066  CALL cp_fm_get_diag(matrix_lu, diag)
2067 
2068  exponent_of_minus_one = 0
2069  determinant = 1.0_dp
2070  DO i = 1, n
2071  determinant = determinant*diag(i)
2072  IF (ipivot(i) .NE. i) THEN
2073  exponent_of_minus_one = exponent_of_minus_one + 1
2074  END IF
2075  END DO
2076  IF (PRESENT(eigval)) THEN
2077  cpassert(.NOT. ASSOCIATED(eigval))
2078  ALLOCATE (eigval(n))
2079  eigval(:) = diag
2080  END IF
2081  DEALLOCATE (diag)
2082 
2083  group = matrix_lu%matrix_struct%para_env
2084  CALL group%sum(exponent_of_minus_one)
2085 
2086  determinant = determinant*(-1.0_dp)**exponent_of_minus_one
2087 
2088  END IF
2089 
2090  alpha = 0.0_dp
2091  beta = 1.0_dp
2092  CALL cp_fm_set_all(matrix_inverse, alpha, beta)
2093  CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
2094  ELSE
2095  ! Use singular value decomposition
2096  CALL cp_fm_create(matrix=u, &
2097  matrix_struct=matrix_a%matrix_struct, &
2098  name="LEFT_SINGULAR_MATRIX")
2099  CALL cp_fm_set_all(u, alpha=0.0_dp)
2100  CALL cp_fm_create(matrix=vt, &
2101  matrix_struct=matrix_a%matrix_struct, &
2102  name="RIGHT_SINGULAR_MATRIX")
2103  CALL cp_fm_set_all(vt, alpha=0.0_dp)
2104  ALLOCATE (diag(n))
2105  diag(:) = 0.0_dp
2106  desca(:) = matrix_lu%matrix_struct%descriptor(:)
2107  ALLOCATE (work(1))
2108  ! Workspace query
2109  lwork = -1
2110  CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2111  1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2112  lwork = int(work(1))
2113  DEALLOCATE (work)
2114  ALLOCATE (work(lwork))
2115  ! SVD
2116  CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2117  1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2118  ! info == n+1 implies homogeneity error when the number of procs is large
2119  ! this likely isnt a problem, but maybe we should handle it separately
2120  IF (info /= 0 .AND. info /= n + 1) &
2121  cpabort("Singular value decomposition of matrix failed.")
2122  ! (Pseudo)inverse and (pseudo)determinant
2123  CALL cp_fm_create(matrix=sigma, &
2124  matrix_struct=matrix_a%matrix_struct, &
2125  name="SINGULAR_VALUE_MATRIX")
2126  CALL cp_fm_set_all(sigma, alpha=0.0_dp)
2127  determinant = 1.0_dp
2128  quenched = .false.
2129  IF (PRESENT(eigval)) THEN
2130  cpassert(.NOT. ASSOCIATED(eigval))
2131  ALLOCATE (eigval(n))
2132  eigval(:) = diag
2133  END IF
2134  DO i = 1, n
2135  IF (diag(i) < my_eps_svd) THEN
2136  diag(i) = 0.0_dp
2137  quenched = .true.
2138  ELSE
2139  determinant = determinant*diag(i)
2140  diag(i) = 1.0_dp/diag(i)
2141  END IF
2142  CALL cp_fm_set_element(sigma, i, i, diag(i))
2143  END DO
2144  DEALLOCATE (diag)
2145  IF (quenched) &
2146  CALL cp_warn(__location__, &
2147  "Linear dependencies were detected in the SVD inversion of matrix "//trim(adjustl(matrix_a%name))// &
2148  ". At least one singular value has been quenched.")
2149  ! Sigma^-1 * U^T
2150  CALL cp_fm_create(matrix=inv_sigma_ut, &
2151  matrix_struct=matrix_a%matrix_struct, &
2152  name="SINGULAR_VALUE_MATRIX")
2153  CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
2154  CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
2155  u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
2156  ! A^-1 = V * (Sigma^-1 * U^T)
2157  CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
2158  CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
2159  inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
2160  ! Clean up
2161  DEALLOCATE (work)
2162  CALL cp_fm_release(u)
2163  CALL cp_fm_release(vt)
2164  CALL cp_fm_release(sigma)
2165  CALL cp_fm_release(inv_sigma_ut)
2166  END IF
2167 #else
2168  IF (my_eps_svd .EQ. 0.0_dp) THEN
2169  sign = .true.
2170  CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
2171  eval_error=eps1)
2172  CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
2173  IF (PRESENT(eigval)) &
2174  CALL cp_abort(__location__, &
2175  "NYI. Eigenvalues not available for return without SCALAPACK.")
2176  ELSE
2177  CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
2178  determinant, eigval)
2179  END IF
2180 #endif
2181  CALL cp_fm_release(matrix_lu)
2182  DEALLOCATE (ipivot)
2183  IF (PRESENT(det_a)) det_a = determinant
2184  END SUBROUTINE cp_fm_invert
2185 
2186 ! **************************************************************************************************
2187 !> \brief inverts a triangular matrix
2188 !> \param matrix_a ...
2189 !> \param uplo_tr ...
2190 !> \author MI
2191 ! **************************************************************************************************
2192  SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
2193 
2194  TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2195  CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
2196 
2197  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_triangular_invert'
2198 
2199  CHARACTER :: unit_diag, uplo
2200  INTEGER :: handle, info, ncol_global
2201  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2202 #if defined(__SCALAPACK)
2203  INTEGER, DIMENSION(9) :: desca
2204 #endif
2205 
2206  CALL timeset(routinen, handle)
2207 
2208  unit_diag = 'N'
2209  uplo = 'U'
2210  IF (PRESENT(uplo_tr)) uplo = uplo_tr
2211 
2212  ncol_global = matrix_a%matrix_struct%ncol_global
2213 
2214  a => matrix_a%local_data
2215 
2216 #if defined(__SCALAPACK)
2217 
2218  desca(:) = matrix_a%matrix_struct%descriptor(:)
2219 
2220  CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
2221 
2222 #else
2223  CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
2224 #endif
2225 
2226  CALL timestop(handle)
2227  END SUBROUTINE cp_fm_triangular_invert
2228 
2229 ! **************************************************************************************************
2230 !> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
2231 !> the computed upper triangular matrix R is in output in the submatrix sub(A) of size NxN
2232 !> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
2233 !> \param matrix_a ...
2234 !> \param matrix_r ...
2235 !> \param nrow_fact ...
2236 !> \param ncol_fact ...
2237 !> \param first_row ...
2238 !> \param first_col ...
2239 !> \author MI
2240 ! **************************************************************************************************
2241  SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col)
2242  TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
2243  INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
2244  first_row, first_col
2245 
2246  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_qr_factorization'
2247 
2248  INTEGER :: handle, i, icol, info, irow, &
2249  j, lda, lwork, ncol, &
2250  ndim, nrow
2251  REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
2252  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
2253  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2254 #if defined(__SCALAPACK)
2255  INTEGER, DIMENSION(9) :: desca
2256 #endif
2257 
2258  CALL timeset(routinen, handle)
2259 
2260  ncol = matrix_a%matrix_struct%ncol_global
2261  nrow = matrix_a%matrix_struct%nrow_global
2262  lda = nrow
2263 
2264  a => matrix_a%local_data
2265 
2266  IF (PRESENT(nrow_fact)) nrow = nrow_fact
2267  IF (PRESENT(ncol_fact)) ncol = ncol_fact
2268  irow = 1
2269  IF (PRESENT(first_row)) irow = first_row
2270  icol = 1
2271  IF (PRESENT(first_col)) icol = first_col
2272 
2273  cpassert(nrow >= ncol)
2274  ndim = SIZE(a, 2)
2275 ! ALLOCATE(ipiv(ndim))
2276  ALLOCATE (tau(ndim))
2277 
2278 #if defined(__SCALAPACK)
2279 
2280  desca(:) = matrix_a%matrix_struct%descriptor(:)
2281 
2282  lwork = -1
2283  ALLOCATE (work(2*ndim))
2284  CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2285  lwork = int(work(1))
2286  DEALLOCATE (work)
2287  ALLOCATE (work(lwork))
2288  CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2289 
2290 #else
2291  lwork = -1
2292  ALLOCATE (work(2*ndim))
2293  CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2294  lwork = int(work(1))
2295  DEALLOCATE (work)
2296  ALLOCATE (work(lwork))
2297  CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2298 
2299 #endif
2300 
2301  ALLOCATE (r_mat(ncol, ncol))
2302  CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
2303  DO i = 1, ncol
2304  DO j = i + 1, ncol
2305  r_mat(j, i) = 0.0_dp
2306  END DO
2307  END DO
2308  CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
2309 
2310  DEALLOCATE (tau, work, r_mat)
2311 
2312  CALL timestop(handle)
2313 
2314  END SUBROUTINE cp_fm_qr_factorization
2315 
2316 ! **************************************************************************************************
2317 !> \brief computes the the solution to A*b=A_general using lu decomposition
2318 !> pay attention, both matrices are overwritten, a_general contais the result
2319 !> \param matrix_a ...
2320 !> \param general_a ...
2321 !> \author Florian Schiffmann
2322 ! **************************************************************************************************
2323  SUBROUTINE cp_fm_solve(matrix_a, general_a)
2324  TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
2325 
2326  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_solve'
2327 
2328  INTEGER :: handle, info, n
2329  INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2330  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, a_general
2331 #if defined(__SCALAPACK)
2332  INTEGER, DIMENSION(9) :: desca, descb
2333 #else
2334  INTEGER :: lda, ldb
2335 #endif
2336 
2337  CALL timeset(routinen, handle)
2338 
2339  a => matrix_a%local_data
2340  a_general => general_a%local_data
2341  n = matrix_a%matrix_struct%nrow_global
2342  ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2343 
2344 #if defined(__SCALAPACK)
2345  desca(:) = matrix_a%matrix_struct%descriptor(:)
2346  descb(:) = general_a%matrix_struct%descriptor(:)
2347  CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2348  CALL pdgetrs("N", n, n, a, 1, 1, desca, ipivot, a_general, &
2349  1, 1, descb, info)
2350 
2351 #else
2352  lda = SIZE(a, 1)
2353  ldb = SIZE(a_general, 1)
2354  CALL dgetrf(n, n, a, lda, ipivot, info)
2355  CALL dgetrs("N", n, n, a, lda, ipivot, a_general, ldb, info)
2356 
2357 #endif
2358  ! info is allowed to be zero
2359  ! this does just signal a zero diagonal element
2360  DEALLOCATE (ipivot)
2361  CALL timestop(handle)
2362  END SUBROUTINE
2363 
2364 ! **************************************************************************************************
2365 !> \brief Convenience function. Computes the matrix multiplications needed
2366 !> for the multiplication of complex matrices.
2367 !> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
2368 !> \param transa : 'N' -> normal 'T' -> transpose
2369 !> alpha,beta :: can be 0.0_dp and 1.0_dp
2370 !> \param transb ...
2371 !> \param m ...
2372 !> \param n ...
2373 !> \param k ...
2374 !> \param alpha ...
2375 !> \param A_re m x k matrix ( ! for transa = 'N'), real part
2376 !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
2377 !> \param B_re k x n matrix ( ! for transa = 'N'), real part
2378 !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
2379 !> \param beta ...
2380 !> \param C_re m x n matrix, real part
2381 !> \param C_im m x n matrix, imaginary part
2382 !> \param a_first_col ...
2383 !> \param a_first_row ...
2384 !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
2385 !> \param b_first_row ...
2386 !> \param c_first_col ...
2387 !> \param c_first_row ...
2388 !> \author Samuel Andermatt
2389 !> \note
2390 !> C should have no overlap with A, B
2391 ! **************************************************************************************************
2392  SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
2393  C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
2394  c_first_row)
2395  CHARACTER(LEN=1), INTENT(IN) :: transa, transb
2396  INTEGER, INTENT(IN) :: m, n, k
2397  REAL(kind=dp), INTENT(IN) :: alpha
2398  TYPE(cp_fm_type), INTENT(IN) :: a_re, a_im, b_re, b_im
2399  REAL(kind=dp), INTENT(IN) :: beta
2400  TYPE(cp_fm_type), INTENT(IN) :: c_re, c_im
2401  INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
2402  b_first_row, c_first_col, c_first_row
2403 
2404  CHARACTER(len=*), PARAMETER :: routinen = 'cp_complex_fm_gemm'
2405 
2406  INTEGER :: handle
2407 
2408  CALL timeset(routinen, handle)
2409 
2410  CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_re, beta, c_re, &
2411  a_first_col=a_first_col, &
2412  a_first_row=a_first_row, &
2413  b_first_col=b_first_col, &
2414  b_first_row=b_first_row, &
2415  c_first_col=c_first_col, &
2416  c_first_row=c_first_row)
2417  CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, a_im, b_im, 1.0_dp, c_re, &
2418  a_first_col=a_first_col, &
2419  a_first_row=a_first_row, &
2420  b_first_col=b_first_col, &
2421  b_first_row=b_first_row, &
2422  c_first_col=c_first_col, &
2423  c_first_row=c_first_row)
2424  CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_im, beta, c_im, &
2425  a_first_col=a_first_col, &
2426  a_first_row=a_first_row, &
2427  b_first_col=b_first_col, &
2428  b_first_row=b_first_row, &
2429  c_first_col=c_first_col, &
2430  c_first_row=c_first_row)
2431  CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_im, b_re, 1.0_dp, c_im, &
2432  a_first_col=a_first_col, &
2433  a_first_row=a_first_row, &
2434  b_first_col=b_first_col, &
2435  b_first_row=b_first_row, &
2436  c_first_col=c_first_col, &
2437  c_first_row=c_first_row)
2438 
2439  CALL timestop(handle)
2440 
2441  END SUBROUTINE cp_complex_fm_gemm
2442 
2443 ! **************************************************************************************************
2444 !> \brief inverts a matrix using LU decomposition
2445 !> the input matrix will be overwritten
2446 !> \param matrix : input a general square non-singular matrix, outputs its inverse
2447 !> \param info_out : optional, if present outputs the info from (p)zgetri
2448 !> \author Lianheng Tong
2449 ! **************************************************************************************************
2450  SUBROUTINE cp_fm_lu_invert(matrix, info_out)
2451  TYPE(cp_fm_type), INTENT(IN) :: matrix
2452  INTEGER, INTENT(OUT), OPTIONAL :: info_out
2453 
2454  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_invert'
2455 
2456  INTEGER :: nrows_global, handle, info, lwork
2457  INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
2458  REAL(kind=dp), DIMENSION(:, :), POINTER :: mat
2459  REAL(kind=sp), DIMENSION(:, :), POINTER :: mat_sp
2460  REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2461  REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2462 #if defined(__SCALAPACK)
2463  INTEGER :: liwork
2464  INTEGER, DIMENSION(9) :: desca
2465  INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
2466 #else
2467  INTEGER :: lda
2468 #endif
2469 
2470  CALL timeset(routinen, handle)
2471 
2472  mat => matrix%local_data
2473  mat_sp => matrix%local_data_sp
2474  nrows_global = matrix%matrix_struct%nrow_global
2475  cpassert(nrows_global .EQ. matrix%matrix_struct%ncol_global)
2476  ALLOCATE (ipivot(nrows_global))
2477  ! do LU decomposition
2478 #if defined(__SCALAPACK)
2479  desca = matrix%matrix_struct%descriptor
2480  IF (matrix%use_sp) THEN
2481  CALL psgetrf(nrows_global, nrows_global, &
2482  mat_sp, 1, 1, desca, ipivot, info)
2483  ELSE
2484  CALL pdgetrf(nrows_global, nrows_global, &
2485  mat, 1, 1, desca, ipivot, info)
2486  END IF
2487 #else
2488  lda = SIZE(mat, 1)
2489  IF (matrix%use_sp) THEN
2490  CALL sgetrf(nrows_global, nrows_global, &
2491  mat_sp, lda, ipivot, info)
2492  ELSE
2493  CALL dgetrf(nrows_global, nrows_global, &
2494  mat, lda, ipivot, info)
2495  END IF
2496 #endif
2497  IF (info /= 0) THEN
2498  CALL cp_abort(__location__, "LU decomposition has failed")
2499  END IF
2500  ! do inversion
2501  IF (matrix%use_sp) THEN
2502  ALLOCATE (work(1))
2503  ELSE
2504  ALLOCATE (work_sp(1))
2505  END IF
2506 #if defined(__SCALAPACK)
2507  ALLOCATE (iwork(1))
2508  IF (matrix%use_sp) THEN
2509  CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2510  ipivot, work_sp, -1, iwork, -1, info)
2511  lwork = int(work_sp(1))
2512  DEALLOCATE (work_sp)
2513  ALLOCATE (work_sp(lwork))
2514  ELSE
2515  CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2516  ipivot, work, -1, iwork, -1, info)
2517  lwork = int(work(1))
2518  DEALLOCATE (work)
2519  ALLOCATE (work(lwork))
2520  END IF
2521  liwork = int(iwork(1))
2522  DEALLOCATE (iwork)
2523  ALLOCATE (iwork(liwork))
2524  IF (matrix%use_sp) THEN
2525  CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2526  ipivot, work_sp, lwork, iwork, liwork, info)
2527  ELSE
2528  CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2529  ipivot, work, lwork, iwork, liwork, info)
2530  END IF
2531  DEALLOCATE (iwork)
2532 #else
2533  IF (matrix%use_sp) THEN
2534  CALL sgetri(nrows_global, mat_sp, lda, &
2535  ipivot, work_sp, -1, info)
2536  lwork = int(work_sp(1))
2537  DEALLOCATE (work_sp)
2538  ALLOCATE (work_sp(lwork))
2539  CALL sgetri(nrows_global, mat_sp, lda, &
2540  ipivot, work_sp, lwork, info)
2541  ELSE
2542  CALL dgetri(nrows_global, mat, lda, &
2543  ipivot, work, -1, info)
2544  lwork = int(work(1))
2545  DEALLOCATE (work)
2546  ALLOCATE (work(lwork))
2547  CALL dgetri(nrows_global, mat, lda, &
2548  ipivot, work, lwork, info)
2549  END IF
2550 #endif
2551  IF (matrix%use_sp) THEN
2552  DEALLOCATE (work_sp)
2553  ELSE
2554  DEALLOCATE (work)
2555  END IF
2556  DEALLOCATE (ipivot)
2557 
2558  IF (PRESENT(info_out)) THEN
2559  info_out = info
2560  ELSE
2561  IF (info /= 0) &
2562  CALL cp_abort(__location__, "LU inversion has failed")
2563  END IF
2564 
2565  CALL timestop(handle)
2566 
2567  END SUBROUTINE cp_fm_lu_invert
2568 
2569 ! **************************************************************************************************
2570 !> \brief norm of matrix using (p)dlange
2571 !> \param matrix : input a general matrix
2572 !> \param mode : 'M' max abs element value,
2573 !> '1' or 'O' one norm, i.e. maximum column sum
2574 !> 'I' infinity norm, i.e. maximum row sum
2575 !> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2576 !> \return : the norm according to mode
2577 !> \author Lianheng Tong
2578 ! **************************************************************************************************
2579  FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2580  TYPE(cp_fm_type), INTENT(IN) :: matrix
2581  CHARACTER, INTENT(IN) :: mode
2582  REAL(kind=dp) :: res
2583 
2584  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_norm'
2585 
2586  INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2587  REAL(kind=sp) :: res_sp
2588  REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2589  REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2590  REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2591  REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2592 #if defined(__SCALAPACK)
2593  INTEGER, DIMENSION(9) :: desca
2594 #else
2595  INTEGER :: lda
2596 #endif
2597 
2598  CALL timeset(routinen, handle)
2599 
2600  CALL cp_fm_get_info(matrix=matrix, &
2601  nrow_global=nrows, &
2602  ncol_global=ncols, &
2603  nrow_local=nrows_local, &
2604  ncol_local=ncols_local)
2605  aa => matrix%local_data
2606  aa_sp => matrix%local_data_sp
2607 
2608 #if defined(__SCALAPACK)
2609  desca = matrix%matrix_struct%descriptor
2610  SELECT CASE (mode)
2611  CASE ('M', 'm')
2612  lwork = 1
2613  CASE ('1', 'O', 'o')
2614  lwork = ncols_local
2615  CASE ('I', 'i')
2616  lwork = nrows_local
2617  CASE ('F', 'f', 'E', 'e')
2618  lwork = 1
2619  CASE DEFAULT
2620  cpabort("mode input is not valid")
2621  END SELECT
2622  IF (matrix%use_sp) THEN
2623  ALLOCATE (work_sp(lwork))
2624  res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2625  DEALLOCATE (work_sp)
2626  res = real(res_sp, kind=dp)
2627  ELSE
2628  ALLOCATE (work(lwork))
2629  res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2630  DEALLOCATE (work)
2631  END IF
2632 #else
2633  SELECT CASE (mode)
2634  CASE ('M', 'm')
2635  lwork = 1
2636  CASE ('1', 'O', 'o')
2637  lwork = 1
2638  CASE ('I', 'i')
2639  lwork = nrows
2640  CASE ('F', 'f', 'E', 'e')
2641  lwork = 1
2642  CASE DEFAULT
2643  cpabort("mode input is not valid")
2644  END SELECT
2645  IF (matrix%use_sp) THEN
2646  ALLOCATE (work_sp(lwork))
2647  lda = SIZE(aa_sp, 1)
2648  res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2649  DEALLOCATE (work_sp)
2650  res = real(res_sp, kind=dp)
2651  ELSE
2652  ALLOCATE (work(lwork))
2653  lda = SIZE(aa, 1)
2654  res = dlange(mode, nrows, ncols, aa, lda, work)
2655  DEALLOCATE (work)
2656  END IF
2657 #endif
2658 
2659  CALL timestop(handle)
2660 
2661  END FUNCTION cp_fm_norm
2662 
2663 ! **************************************************************************************************
2664 !> \brief trace of a matrix using pdlatra
2665 !> \param matrix : input a square matrix
2666 !> \return : the trace
2667 !> \author Lianheng Tong
2668 ! **************************************************************************************************
2669  FUNCTION cp_fm_latra(matrix) RESULT(res)
2670  TYPE(cp_fm_type), INTENT(IN) :: matrix
2671  REAL(kind=dp) :: res
2672 
2673  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_latra'
2674 
2675  INTEGER :: nrows, ncols, handle
2676  REAL(kind=sp) :: res_sp
2677  REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2678  REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2679 #if defined(__SCALAPACK)
2680  INTEGER, DIMENSION(9) :: desca
2681 #else
2682  INTEGER :: ii
2683 #endif
2684 
2685  CALL timeset(routinen, handle)
2686 
2687  nrows = matrix%matrix_struct%nrow_global
2688  ncols = matrix%matrix_struct%ncol_global
2689  cpassert(nrows .EQ. ncols)
2690  aa => matrix%local_data
2691  aa_sp => matrix%local_data_sp
2692 
2693 #if defined(__SCALAPACK)
2694  desca = matrix%matrix_struct%descriptor
2695  IF (matrix%use_sp) THEN
2696  res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2697  res = real(res_sp, kind=dp)
2698  ELSE
2699  res = pdlatra(nrows, aa, 1, 1, desca)
2700  END IF
2701 #else
2702  IF (matrix%use_sp) THEN
2703  res_sp = 0.0_sp
2704  DO ii = 1, nrows
2705  res_sp = res_sp + aa_sp(ii, ii)
2706  END DO
2707  res = real(res_sp, kind=dp)
2708  ELSE
2709  res = 0.0_dp
2710  DO ii = 1, nrows
2711  res = res + aa(ii, ii)
2712  END DO
2713  END IF
2714 #endif
2715 
2716  CALL timestop(handle)
2717 
2718  END FUNCTION cp_fm_latra
2719 
2720 ! **************************************************************************************************
2721 !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2722 !> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2723 !> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2724 !> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2725 !> \param nrow ...
2726 !> \param ncol ...
2727 !> \param first_row ...
2728 !> \param first_col ...
2729 !> \author MI
2730 ! **************************************************************************************************
2731  SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2732 
2733  TYPE(cp_fm_type), INTENT(IN) :: matrix
2734  REAL(kind=dp), DIMENSION(:), POINTER :: tau
2735  INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2736 
2737  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdgeqpf'
2738 
2739  INTEGER :: handle
2740  INTEGER :: info, lwork
2741  INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2742  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2743  REAL(kind=dp), DIMENSION(:), POINTER :: work
2744 #if defined(__SCALAPACK)
2745  INTEGER, DIMENSION(9) :: descc
2746 #else
2747  INTEGER :: lda
2748 #endif
2749 
2750  CALL timeset(routinen, handle)
2751 
2752  a => matrix%local_data
2753  lwork = -1
2754  ALLOCATE (work(2*nrow))
2755  ALLOCATE (ipiv(ncol))
2756  info = 0
2757 
2758 #if defined(__SCALAPACK)
2759  descc(:) = matrix%matrix_struct%descriptor(:)
2760  ! Call SCALAPACK routine to get optimal work dimension
2761  CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2762  lwork = int(work(1))
2763  DEALLOCATE (work)
2764  ALLOCATE (work(lwork))
2765  tau = 0.0_dp
2766  ipiv = 0
2767 
2768  ! Call SCALAPACK routine to get QR decomposition of CTs
2769  CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2770 #else
2771  cpassert(first_row == 1 .AND. first_col == 1)
2772  lda = SIZE(a, 1)
2773  CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2774  lwork = int(work(1))
2775  DEALLOCATE (work)
2776  ALLOCATE (work(lwork))
2777  tau = 0.0_dp
2778  ipiv = 0
2779  CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2780 #endif
2781  cpassert(info == 0)
2782 
2783  DEALLOCATE (work)
2784  DEALLOCATE (ipiv)
2785 
2786  CALL timestop(handle)
2787 
2788  END SUBROUTINE cp_fm_pdgeqpf
2789 
2790 ! **************************************************************************************************
2791 !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2792 !> with orthonormal columns, which is defined as the first N columns of a product of K
2793 !> elementary reflectors of order M
2794 !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2795 !> as returned from PDGEQRF
2796 !> On exit it contains the M-by-N distributed matrix Q
2797 !> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2798 !> \param nrow ...
2799 !> \param first_row ...
2800 !> \param first_col ...
2801 !> \author MI
2802 ! **************************************************************************************************
2803  SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2804 
2805  TYPE(cp_fm_type), INTENT(IN) :: matrix
2806  REAL(kind=dp), DIMENSION(:), POINTER :: tau
2807  INTEGER, INTENT(IN) :: nrow, first_row, first_col
2808 
2809  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdorgqr'
2810 
2811  INTEGER :: handle
2812  INTEGER :: info, lwork
2813  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2814  REAL(kind=dp), DIMENSION(:), POINTER :: work
2815 #if defined(__SCALAPACK)
2816  INTEGER, DIMENSION(9) :: descc
2817 #else
2818  INTEGER :: lda
2819 #endif
2820 
2821  CALL timeset(routinen, handle)
2822 
2823  a => matrix%local_data
2824  lwork = -1
2825  ALLOCATE (work(2*nrow))
2826  info = 0
2827 
2828 #if defined(__SCALAPACK)
2829  descc(:) = matrix%matrix_struct%descriptor(:)
2830 
2831  CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2832  cpassert(info == 0)
2833  lwork = int(work(1))
2834  DEALLOCATE (work)
2835  ALLOCATE (work(lwork))
2836 
2837  ! Call SCALAPACK routine to get Q
2838  CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2839 #else
2840  cpassert(first_row == 1 .AND. first_col == 1)
2841  lda = SIZE(a, 1)
2842  CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2843  lwork = int(work(1))
2844  DEALLOCATE (work)
2845  ALLOCATE (work(lwork))
2846  CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2847 #endif
2848  cpassert(info == 0)
2849 
2850  DEALLOCATE (work)
2851  CALL timestop(handle)
2852 
2853  END SUBROUTINE cp_fm_pdorgqr
2854 
2855 ! **************************************************************************************************
2856 !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2857 !> \param cs cosine of the rotation angle
2858 !> \param sn sinus of the rotation angle
2859 !> \param irow ...
2860 !> \param jrow ...
2861 !> \author Ole Schuett
2862 ! **************************************************************************************************
2863  SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2864  TYPE(cp_fm_type), INTENT(IN) :: matrix
2865  INTEGER, INTENT(IN) :: irow, jrow
2866  REAL(dp), INTENT(IN) :: cs, sn
2867 
2868  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_rows'
2869  INTEGER :: handle, nrow, ncol
2870 
2871 #if defined(__SCALAPACK)
2872  INTEGER :: info, lwork
2873  INTEGER, DIMENSION(9) :: desc
2874  REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2875 #endif
2876 
2877  CALL timeset(routinen, handle)
2878  CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2879 
2880 #if defined(__SCALAPACK)
2881  lwork = 2*ncol + 1
2882  ALLOCATE (work(lwork))
2883  desc(:) = matrix%matrix_struct%descriptor(:)
2884  CALL pdrot(ncol, &
2885  matrix%local_data(1, 1), irow, 1, desc, ncol, &
2886  matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2887  cs, sn, work, lwork, info)
2888  cpassert(info == 0)
2889  DEALLOCATE (work)
2890 #else
2891  CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2892 #endif
2893 
2894  CALL timestop(handle)
2895  END SUBROUTINE cp_fm_rot_rows
2896 
2897 ! **************************************************************************************************
2898 !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2899 !> \param cs cosine of the rotation angle
2900 !> \param sn sinus of the rotation angle
2901 !> \param icol ...
2902 !> \param jcol ...
2903 !> \author Ole Schuett
2904 ! **************************************************************************************************
2905  SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2906  TYPE(cp_fm_type), INTENT(IN) :: matrix
2907  INTEGER, INTENT(IN) :: icol, jcol
2908  REAL(dp), INTENT(IN) :: cs, sn
2909 
2910  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_cols'
2911  INTEGER :: handle, nrow, ncol
2912 
2913 #if defined(__SCALAPACK)
2914  INTEGER :: info, lwork
2915  INTEGER, DIMENSION(9) :: desc
2916  REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2917 #endif
2918 
2919  CALL timeset(routinen, handle)
2920  CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2921 
2922 #if defined(__SCALAPACK)
2923  lwork = 2*nrow + 1
2924  ALLOCATE (work(lwork))
2925  desc(:) = matrix%matrix_struct%descriptor(:)
2926  CALL pdrot(nrow, &
2927  matrix%local_data(1, 1), 1, icol, desc, 1, &
2928  matrix%local_data(1, 1), 1, jcol, desc, 1, &
2929  cs, sn, work, lwork, info)
2930  cpassert(info == 0)
2931  DEALLOCATE (work)
2932 #else
2933  CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
2934 #endif
2935 
2936  CALL timestop(handle)
2937  END SUBROUTINE cp_fm_rot_cols
2938 
2939 ! **************************************************************************************************
2940 !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
2941 !> \param matrix_a ...
2942 !> \param B ...
2943 !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
2944 !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
2945 !> \param start_row starting index of rows, optional, defaults to 1
2946 !> \param start_col starting index of columns, optional, defaults to 1
2947 !> \param do_norm ...
2948 !> \param do_print ...
2949 ! **************************************************************************************************
2950  SUBROUTINE cp_fm_gram_schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
2951  do_norm, do_print)
2952 
2953  TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2954  REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: b
2955  INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
2956  LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
2957 
2958  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_Gram_Schmidt_orthonorm'
2959 
2960  INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
2961  j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
2962  start_col_local, start_row_global, start_row_local, this_col, unit_nr
2963  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2964  LOGICAL :: my_do_norm, my_do_print
2965  REAL(kind=dp) :: norm
2966  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2967 
2968  CALL timeset(routinen, handle)
2969 
2970  my_do_norm = .true.
2971  IF (PRESENT(do_norm)) my_do_norm = do_norm
2972 
2973  my_do_print = .false.
2974  IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
2975 
2976  unit_nr = -1
2977  IF (my_do_print) THEN
2978  unit_nr = cp_logger_get_default_unit_nr()
2979  IF (unit_nr < 1) my_do_print = .false.
2980  END IF
2981 
2982  IF (SIZE(b) /= 0) THEN
2983  IF (PRESENT(nrows)) THEN
2984  nrow_global = nrows
2985  ELSE
2986  nrow_global = SIZE(b, 1)
2987  END IF
2988 
2989  IF (PRESENT(ncols)) THEN
2990  ncol_global = ncols
2991  ELSE
2992  ncol_global = SIZE(b, 2)
2993  END IF
2994 
2995  IF (PRESENT(start_row)) THEN
2996  start_row_global = start_row
2997  ELSE
2998  start_row_global = 1
2999  END IF
3000 
3001  IF (PRESENT(start_col)) THEN
3002  start_col_global = start_col
3003  ELSE
3004  start_col_global = 1
3005  END IF
3006 
3007  end_row_global = start_row_global + nrow_global - 1
3008  end_col_global = start_col_global + ncol_global - 1
3009 
3010  CALL cp_fm_get_info(matrix=matrix_a, &
3011  nrow_global=nrow_global, ncol_global=ncol_global, &
3012  nrow_local=nrow_local, ncol_local=ncol_local, &
3013  row_indices=row_indices, col_indices=col_indices)
3014  IF (end_row_global > nrow_global) THEN
3015  end_row_global = nrow_global
3016  END IF
3017  IF (end_col_global > ncol_global) THEN
3018  end_col_global = ncol_global
3019  END IF
3020 
3021  ! find out row/column indices of locally stored matrix elements that
3022  ! needs to be copied.
3023  ! Arrays row_indices and col_indices are assumed to be sorted in
3024  ! ascending order
3025  DO start_row_local = 1, nrow_local
3026  IF (row_indices(start_row_local) >= start_row_global) EXIT
3027  END DO
3028 
3029  DO end_row_local = start_row_local, nrow_local
3030  IF (row_indices(end_row_local) > end_row_global) EXIT
3031  END DO
3032  end_row_local = end_row_local - 1
3033 
3034  DO start_col_local = 1, ncol_local
3035  IF (col_indices(start_col_local) >= start_col_global) EXIT
3036  END DO
3037 
3038  DO end_col_local = start_col_local, ncol_local
3039  IF (col_indices(end_col_local) > end_col_global) EXIT
3040  END DO
3041  end_col_local = end_col_local - 1
3042 
3043  a => matrix_a%local_data
3044 
3045  this_col = col_indices(start_col_local) - start_col_global + 1
3046 
3047  b(:, this_col) = a(:, start_col_local)
3048 
3049  IF (my_do_norm) THEN
3050  norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3051  b(:, this_col) = b(:, this_col)/norm
3052  IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3053  END IF
3054 
3055  DO i = start_col_local + 1, end_col_local
3056  this_col = col_indices(i) - start_col_global + 1
3057  b(:, this_col) = a(:, i)
3058  DO j = start_col_local, i - 1
3059  j_col = col_indices(j) - start_col_global + 1
3060  b(:, this_col) = b(:, this_col) - &
3061  accurate_dot_product(b(:, j_col), b(:, this_col))* &
3062  b(:, j_col)/accurate_dot_product(b(:, j_col), b(:, j_col))
3063  END DO
3064 
3065  IF (my_do_norm) THEN
3066  norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3067  b(:, this_col) = b(:, this_col)/norm
3068  IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3069  END IF
3070 
3071  END DO
3072  CALL matrix_a%matrix_struct%para_env%sum(b)
3073  END IF
3074 
3075  CALL timestop(handle)
3076 
3077  END SUBROUTINE cp_fm_gram_schmidt_orthonorm
3078 
3079 ! **************************************************************************************************
3080 !> \brief Cholesky decomposition
3081 !> \param fm_matrix ...
3082 !> \param n ...
3083 ! **************************************************************************************************
3084  SUBROUTINE cp_fm_potrf(fm_matrix, n)
3085  TYPE(cp_fm_type) :: fm_matrix
3086  INTEGER, INTENT(in) :: n
3087 
3088  INTEGER :: info
3089  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3090  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3091 #if defined(__SCALAPACK)
3092  INTEGER, DIMENSION(9) :: desca
3093 #endif
3094 
3095  a => fm_matrix%local_data
3096  a_sp => fm_matrix%local_data_sp
3097 #if defined(__SCALAPACK)
3098  desca(:) = fm_matrix%matrix_struct%descriptor(:)
3099  IF (fm_matrix%use_sp) THEN
3100  CALL pspotrf('U', n, a_sp(1, 1), 1, 1, desca, info)
3101  ELSE
3102  CALL pdpotrf('U', n, a(1, 1), 1, 1, desca, info)
3103  END IF
3104 #else
3105  IF (fm_matrix%use_sp) THEN
3106  CALL spotrf('U', n, a_sp(1, 1), SIZE(a_sp, 1), info)
3107  ELSE
3108  CALL dpotrf('U', n, a(1, 1), SIZE(a, 1), info)
3109  END IF
3110 #endif
3111  IF (info /= 0) &
3112  cpabort("Cholesky decomposition failed. Matrix ill conditioned ?")
3113 
3114  END SUBROUTINE cp_fm_potrf
3115 
3116 ! **************************************************************************************************
3117 !> \brief Invert trianguar matrix
3118 !> \param fm_matrix the matrix to invert (must be an upper triangular matrix)
3119 !> \param n size of the matrix to invert
3120 ! **************************************************************************************************
3121  SUBROUTINE cp_fm_potri(fm_matrix, n)
3122  TYPE(cp_fm_type) :: fm_matrix
3123  INTEGER, INTENT(in) :: n
3124 
3125  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3126  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3127  INTEGER :: info
3128 #if defined(__SCALAPACK)
3129  INTEGER, DIMENSION(9) :: desca
3130 #endif
3131 
3132  a => fm_matrix%local_data
3133  a_sp => fm_matrix%local_data_sp
3134 #if defined(__SCALAPACK)
3135  desca(:) = fm_matrix%matrix_struct%descriptor(:)
3136  IF (fm_matrix%use_sp) THEN
3137  CALL pspotri('U', n, a_sp(1, 1), 1, 1, desca, info)
3138  ELSE
3139  CALL pdpotri('U', n, a(1, 1), 1, 1, desca, info)
3140  END IF
3141 #else
3142  IF (fm_matrix%use_sp) THEN
3143  CALL spotri('U', n, a_sp(1, 1), SIZE(a_sp, 1), info)
3144  ELSE
3145  CALL dpotri('U', n, a(1, 1), SIZE(a, 1), info)
3146  END IF
3147 #endif
3148  cpassert(info == 0)
3149  END SUBROUTINE cp_fm_potri
3150 
3151 ! **************************************************************************************************
3152 !> \brief ...
3153 !> \param fm_matrix ...
3154 !> \param neig ...
3155 !> \param fm_matrixb ...
3156 !> \param fm_matrixout ...
3157 !> \param op ...
3158 !> \param pos ...
3159 !> \param transa ...
3160 ! **************************************************************************************************
3161  SUBROUTINE cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
3162  TYPE(cp_fm_type) :: fm_matrix
3163  TYPE(cp_fm_type) :: fm_matrixb
3164  TYPE(cp_fm_type) :: fm_matrixout
3165  INTEGER, INTENT(IN) :: neig
3166  CHARACTER(LEN=*), INTENT(IN) :: op
3167  CHARACTER(LEN=*), INTENT(IN) :: pos
3168  CHARACTER(LEN=*), INTENT(IN) :: transa
3169 
3170  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, outm
3171  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, outm_sp
3172  INTEGER :: n, itype
3173  REAL(kind=dp) :: alpha
3174 #if defined(__SCALAPACK)
3175  INTEGER :: i
3176  INTEGER, DIMENSION(9) :: desca, descb, descout
3177 #endif
3178 
3179  ! notice b is the cholesky guy
3180  a => fm_matrix%local_data
3181  b => fm_matrixb%local_data
3182  outm => fm_matrixout%local_data
3183  a_sp => fm_matrix%local_data_sp
3184  b_sp => fm_matrixb%local_data_sp
3185  outm_sp => fm_matrixout%local_data_sp
3186 
3187  n = fm_matrix%matrix_struct%nrow_global
3188  itype = 1
3189 
3190 #if defined(__SCALAPACK)
3191  desca(:) = fm_matrix%matrix_struct%descriptor(:)
3192  descb(:) = fm_matrixb%matrix_struct%descriptor(:)
3193  descout(:) = fm_matrixout%matrix_struct%descriptor(:)
3194  alpha = 1.0_dp
3195  DO i = 1, neig
3196  IF (fm_matrix%use_sp) THEN
3197  CALL pscopy(n, a_sp(1, 1), 1, i, desca, 1, outm_sp(1, 1), 1, i, descout, 1)
3198  ELSE
3199  CALL pdcopy(n, a(1, 1), 1, i, desca, 1, outm(1, 1), 1, i, descout, 1)
3200  END IF
3201  END DO
3202  IF (op .EQ. "SOLVE") THEN
3203  IF (fm_matrix%use_sp) THEN
3204  CALL pstrsm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), 1, 1, descb, &
3205  outm_sp(1, 1), 1, 1, descout)
3206  ELSE
3207  CALL pdtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
3208  END IF
3209  ELSE
3210  IF (fm_matrix%use_sp) THEN
3211  CALL pstrmm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), 1, 1, descb, &
3212  outm_sp(1, 1), 1, 1, descout)
3213  ELSE
3214  CALL pdtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
3215  END IF
3216  END IF
3217 #else
3218  alpha = 1.0_dp
3219  IF (fm_matrix%use_sp) THEN
3220  CALL scopy(neig*n, a_sp(1, 1), 1, outm_sp(1, 1), 1)
3221  ELSE
3222  CALL dcopy(neig*n, a(1, 1), 1, outm(1, 1), 1)
3223  END IF
3224  IF (op .EQ. "SOLVE") THEN
3225  IF (fm_matrix%use_sp) THEN
3226  CALL strsm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), SIZE(b_sp, 1), outm_sp(1, 1), n)
3227  ELSE
3228  CALL dtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), SIZE(b, 1), outm(1, 1), n)
3229  END IF
3230  ELSE
3231  IF (fm_matrix%use_sp) THEN
3232  CALL strmm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), n, outm_sp(1, 1), n)
3233  ELSE
3234  CALL dtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), n, outm(1, 1), n)
3235  END IF
3236  END IF
3237 #endif
3238 
3239  END SUBROUTINE cp_fm_cholesky_restore
3240 
3241 END MODULE cp_fm_basic_linalg
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
basic linear algebra operations for full matrices
subroutine, public cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_fm_row_scale(matrixa, scaling)
scales row i of matrix a with scaling(i)
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_fm_potrf(fm_matrix, n)
Cholesky decomposition.
subroutine, public cp_fm_solve(matrix_a, general_a)
computes the the solution to A*b=A_general using lu decomposition pay attention, both matrices are ov...
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
Convenience function. Computes the matrix multiplications needed for the multiplication of complex ma...
subroutine, public cp_fm_det(matrix_a, det_a)
Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix.
subroutine, public cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col)
performs a QR factorization of the input rectangular matrix A or of a submatrix of A the computed upp...
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
performs a rank-k update of a symmetric matrix_c matrix_c = beta * matrix_c + alpha * matrix_a * tran...
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_upper_to_full(matrix, work)
given an upper triangular matrix computes the corresponding full matrix
subroutine, public cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
computes the schur product of two matrices c_ij = a_ij * b_ij
real(kind=dp) function, public cp_fm_norm(matrix, mode)
norm of matrix using (p)dlange
subroutine, public cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
...
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix.
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_invert(matrix_a, uplo_tr)
inverts a triangular matrix
subroutine, public cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b computes matrix_c = beta * matrix_c...
subroutine, public cp_fm_gram_schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, do_norm, do_print)
Orthonormalizes selected rows and columns of a full matrix, matrix_a.
subroutine, public cp_fm_potri(fm_matrix, n)
Invert trianguar matrix.
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
Definition: cp_fm_struct.F:357
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
Definition: cp_fm_types.F:570
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
Definition: cp_fm_types.F:1016
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
Definition: cp_fm_types.F:768
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
Definition: cp_fm_types.F:535
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
Definition: cp_fm_types.F:901
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
Definition: cp_fm_types.F:700
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition: kahan_sum.F:29
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public int_8
Definition: kinds.F:54
integer, parameter, public dp
Definition: kinds.F:34
integer, parameter, public sp
Definition: kinds.F:33
Machine interface based on Fortran 2003 and POSIX.
Definition: machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition: machine.F:332
Collection of simple mathematical functions and subroutines.
Definition: mathlib.F:15
subroutine, public get_pseudo_inverse_svd(a, a_pinverse, rskip, determinant, sval)
returns the pseudoinverse of a real, square matrix using singular value decomposition
Definition: mathlib.F:949
Interface to the message passing library MPI.