(git:d18deda)
Loading...
Searching...
No Matches
cp_fm_basic_linalg.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief basic linear algebra operations for full matrices
10!> \par History
11!> 08.2002 split out of qs_blacs [fawzi]
12!> \author Fawzi Mohamed
13! **************************************************************************************************
17 USE cp_fm_types, ONLY: &
25 USE kinds, ONLY: dp, &
26 int_8, &
27 sp
28 USE machine, ONLY: m_memory
32#include "../base/base_uses.f90"
33
34 IMPLICIT NONE
35 PRIVATE
36
37 LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .true.
38 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
39
40 PUBLIC :: cp_fm_scale, & ! scale a matrix
41 cp_fm_scale_and_add, & ! scale and add two matrices
42 cp_fm_geadd, & ! general addition
43 cp_fm_column_scale, & ! scale columns of a matrix
44 cp_fm_row_scale, & ! scale rows of a matrix
45 cp_fm_trace, & ! trace of the transpose(A)*B
46 cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
47 cp_fm_norm, & ! different norms of A
48 cp_fm_schur_product, & ! schur product
49 cp_fm_transpose, & ! transpose a matrix
50 cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
51 cp_fm_syrk, & ! rank k update
52 cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
53 cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
54 cp_fm_gemm, & ! multiply two matrices
55 cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
56 cp_fm_invert, & ! computes the inverse and determinant
57 cp_fm_frobenius_norm, & ! frobenius norm
58 cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
59 cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
60 cp_fm_solve, & ! solves the equation A*B=C A and C are input
61 cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
62 cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
63 cp_fm_potrf, & ! Cholesky decomposition
64 cp_fm_potri, & ! Invert triangular matrix
65 cp_fm_rot_rows, & ! rotates two rows
66 cp_fm_rot_cols, & ! rotates two columns
67 cp_fm_cholesky_restore, & ! apply Cholesky decomposition
68 cp_fm_gram_schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
69 cp_fm_det, & ! determinant of a real matrix with correct sign
70 cp_fm_matvec ! matrix-vector multiplication (vector replicated)
71
72 REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
73 REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
74
75 INTERFACE cp_fm_trace
76 MODULE PROCEDURE cp_fm_trace_a0b0t0
77 MODULE PROCEDURE cp_fm_trace_a1b0t1_a
78 MODULE PROCEDURE cp_fm_trace_a1b0t1_p
79 MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
80 MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
81 MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
82 MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
83 END INTERFACE cp_fm_trace
84
86 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
87 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
88 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
89 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
90 END INTERFACE cp_fm_contracted_trace
91CONTAINS
92
93! **************************************************************************************************
94!> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
95!> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
96! **************************************************************************************************
97 SUBROUTINE cp_fm_det(matrix_a, det_a)
98
99 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
100 REAL(kind=dp), INTENT(OUT) :: det_a
101 REAL(kind=dp) :: determinant
102 TYPE(cp_fm_type) :: matrix_lu
103 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
104 INTEGER :: n, i, info, p
105 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
106 REAL(kind=dp), DIMENSION(:), POINTER :: diag
107
108#if defined(__parallel)
109 INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
110 INTEGER, DIMENSION(9) :: desca
111#endif
112
113 CALL cp_fm_create(matrix=matrix_lu, &
114 matrix_struct=matrix_a%matrix_struct, &
115 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
116 CALL cp_fm_to_fm(matrix_a, matrix_lu)
117
118 a => matrix_lu%local_data
119 n = matrix_lu%matrix_struct%nrow_global
120 ALLOCATE (ipivot(n))
121 ipivot(:) = 0
122 p = 0
123 ALLOCATE (diag(n))
124 diag(:) = 0.0_dp
125#if defined(__parallel)
126 ! Use LU decomposition
127 desca(:) = matrix_lu%matrix_struct%descriptor(:)
128 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
129 CALL cp_fm_get_diag(matrix_lu, diag)
130 determinant = product(diag)
131 myprow = matrix_lu%matrix_struct%context%mepos(1)
132 nprow = matrix_lu%matrix_struct%context%num_pe(1)
133 npcol = matrix_lu%matrix_struct%context%num_pe(2)
134 nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
135 nrow_block = matrix_lu%matrix_struct%nrow_block
136 DO irow_local = 1, nrow_local
137 i = matrix_lu%matrix_struct%row_indices(irow_local)
138 IF (ipivot(irow_local) /= i) p = p + 1
139 END DO
140 CALL matrix_lu%matrix_struct%para_env%sum(p)
141 ! very important fix
142 p = p/npcol
143#else
144 CALL dgetrf(n, n, a, n, ipivot, info)
145 CALL cp_fm_get_diag(matrix_lu, diag)
146 determinant = product(diag)
147 DO i = 1, n
148 IF (ipivot(i) /= i) p = p + 1
149 END DO
150#endif
151 DEALLOCATE (ipivot)
152 DEALLOCATE (diag)
153 CALL cp_fm_release(matrix_lu)
154 det_a = determinant*(-2*mod(p, 2) + 1.0_dp)
155 END SUBROUTINE cp_fm_det
156
157! **************************************************************************************************
158!> \brief calc A <- alpha*A + beta*B
159!> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
160!> scale A)
161!> \param alpha ...
162!> \param matrix_a ...
163!> \param beta ...
164!> \param matrix_b ...
165! **************************************************************************************************
166 SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
167
168 REAL(kind=dp), INTENT(IN) :: alpha
169 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
170 REAL(kind=dp), INTENT(IN), OPTIONAL :: beta
171 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
172
173 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale_and_add'
174
175 INTEGER :: handle, size_a, size_b
176 REAL(kind=dp) :: my_beta
177 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
178 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
179
180 CALL timeset(routinen, handle)
181
182 my_beta = 0.0_dp
183 IF (PRESENT(matrix_b)) my_beta = 1.0_dp
184 IF (PRESENT(beta)) my_beta = beta
185 NULLIFY (a, b)
186
187 IF (PRESENT(beta)) THEN
188 cpassert(PRESENT(matrix_b))
189 IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
190 cpwarn("Bad use of routine. Call cp_fm_scale instead")
191 CALL cp_fm_scale(alpha + beta, matrix_a)
192 CALL timestop(handle)
193 RETURN
194 END IF
195 END IF
196
197 a => matrix_a%local_data
198 a_sp => matrix_a%local_data_sp
199
200 IF (matrix_a%use_sp) THEN
201 size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
202 ELSE
203 size_a = SIZE(a, 1)*SIZE(a, 2)
204 END IF
205
206 IF (alpha .NE. 1.0_dp) THEN
207 IF (matrix_a%use_sp) THEN
208 CALL sscal(size_a, real(alpha, sp), a_sp, 1)
209 ELSE
210 CALL dscal(size_a, alpha, a, 1)
211 END IF
212 END IF
213 IF (my_beta .NE. 0.0_dp) THEN
214 IF (matrix_a%matrix_struct%context .NE. matrix_b%matrix_struct%context) &
215 cpabort("Matrices must be in the same blacs context")
216
217 IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
218 matrix_b%matrix_struct)) THEN
219
220 b => matrix_b%local_data
221 b_sp => matrix_b%local_data_sp
222 IF (matrix_b%use_sp) THEN
223 size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
224 ELSE
225 size_b = SIZE(b, 1)*SIZE(b, 2)
226 END IF
227 IF (size_a .NE. size_b) &
228 cpabort("Matrices must have same local sizes")
229
230 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
231 CALL saxpy(size_a, real(my_beta, sp), b_sp, 1, a_sp, 1)
232 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
233 CALL saxpy(size_a, real(my_beta, sp), real(b, sp), 1, a_sp, 1)
234 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
235 CALL daxpy(size_a, my_beta, real(b_sp, dp), 1, a, 1)
236 ELSE
237 CALL daxpy(size_a, my_beta, b, 1, a, 1)
238 END IF
239
240 ELSE
241#ifdef __parallel
242 cpabort("to do (pdscal,pdcopy,pdaxpy)")
243#else
244 cpabort("")
245#endif
246 END IF
247
248 END IF
249
250 CALL timestop(handle)
251
252 END SUBROUTINE cp_fm_scale_and_add
253
254! **************************************************************************************************
255!> \brief interface to BLACS geadd:
256!> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
257!> where opt(matrix_a) can be either:
258!> 'N': matrix_a
259!> 'T': matrix_a^T
260!> 'C': matrix_a^H (Hermitian conjugate)
261!> note that this is a level three routine, use cp_fm_scale_and_add if that
262!> is sufficient for your needs
263!> \param alpha : complex scalar
264!> \param trans : 'N' normal, 'T' transposed
265!> \param matrix_a : input matrix_a
266!> \param beta : complex scalar
267!> \param matrix_b : input matrix_b, upon out put the updated matrix_b
268!> \author Lianheng Tong
269! **************************************************************************************************
270 SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
271 REAL(kind=dp), INTENT(IN) :: alpha, beta
272 CHARACTER, INTENT(IN) :: trans
273 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
274
275 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geadd'
276
277 INTEGER :: nrow_global, ncol_global, handle
278 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
279#if defined(__parallel)
280 INTEGER, DIMENSION(9) :: desca, descb
281#elif !defined(__MKL)
282 INTEGER :: ii, jj
283#endif
284
285 CALL timeset(routinen, handle)
286
287 nrow_global = matrix_a%matrix_struct%nrow_global
288 ncol_global = matrix_a%matrix_struct%ncol_global
289 cpassert(nrow_global .EQ. matrix_b%matrix_struct%nrow_global)
290 cpassert(ncol_global .EQ. matrix_b%matrix_struct%ncol_global)
291
292 aa => matrix_a%local_data
293 bb => matrix_b%local_data
294
295#if defined(__parallel)
296 desca = matrix_a%matrix_struct%descriptor
297 descb = matrix_b%matrix_struct%descriptor
298 CALL pdgeadd(trans, &
299 nrow_global, &
300 ncol_global, &
301 alpha, &
302 aa, &
303 1, 1, &
304 desca, &
305 beta, &
306 bb, &
307 1, 1, &
308 descb)
309#elif defined(__MKL)
310 CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
311 alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
312#else
313 ! dgeadd is not a standard BLAS function, although it is implemented
314 ! in some libraries like OpenBLAS, so not going to use it here
315 SELECT CASE (trans)
316 CASE ('T')
317 DO jj = 1, ncol_global
318 DO ii = 1, nrow_global
319 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
320 END DO
321 END DO
322 CASE DEFAULT
323 DO jj = 1, ncol_global
324 DO ii = 1, nrow_global
325 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
326 END DO
327 END DO
328 END SELECT
329#endif
330
331 CALL timestop(handle)
332
333 END SUBROUTINE cp_fm_geadd
334
335! **************************************************************************************************
336!> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
337!> IMPORTANT : the sign of the determinant is not defined correctly yet ....
338!> \param matrix_a ...
339!> \param almost_determinant ...
340!> \param correct_sign ...
341!> \par History
342!> added correct_sign 02.07 (fschiff)
343!> \author Joost VandeVondele
344!> \note
345!> - matrix_a is overwritten
346!> - the sign of the determinant might be wrong
347!> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
348!> - one should be able to find out if ipivot is an even or an odd permutation...
349!> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
350!> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
351! **************************************************************************************************
352 SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
353 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
354 REAL(kind=dp), INTENT(OUT) :: almost_determinant
355 LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
356
357 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_decompose'
358
359 INTEGER :: handle, i, info, n
360 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
361 REAL(kind=dp) :: determinant
362 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
363#if defined(__parallel)
364 INTEGER, DIMENSION(9) :: desca
365 REAL(kind=dp), DIMENSION(:), POINTER :: diag
366#else
367 INTEGER :: lda
368#endif
369
370 CALL timeset(routinen, handle)
371
372 a => matrix_a%local_data
373 n = matrix_a%matrix_struct%nrow_global
374 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
375
376#if defined(__parallel)
377 mark_used(correct_sign)
378 desca(:) = matrix_a%matrix_struct%descriptor(:)
379 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
380
381 ALLOCATE (diag(n))
382 diag(:) = 0.0_dp
383 CALL cp_fm_get_diag(matrix_a, diag)
384 determinant = 1.0_dp
385 DO i = 1, n
386 determinant = determinant*diag(i)
387 END DO
388 DEALLOCATE (diag)
389#else
390 lda = SIZE(a, 1)
391 CALL dgetrf(n, n, a, lda, ipivot, info)
392 determinant = 1.0_dp
393 IF (correct_sign) THEN
394 DO i = 1, n
395 IF (ipivot(i) .NE. i) THEN
396 determinant = -determinant*a(i, i)
397 ELSE
398 determinant = determinant*a(i, i)
399 END IF
400 END DO
401 ELSE
402 DO i = 1, n
403 determinant = determinant*a(i, i)
404 END DO
405 END IF
406#endif
407 ! info is allowed to be zero
408 ! this does just signal a zero diagonal element
409 DEALLOCATE (ipivot)
410 almost_determinant = determinant ! notice that the sign is random
411 CALL timestop(handle)
412 END SUBROUTINE cp_fm_lu_decompose
413
414! **************************************************************************************************
415!> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
416!> \param transa : 'N' -> normal 'T' -> transpose
417!> alpha,beta :: can be 0.0_dp and 1.0_dp
418!> \param transb ...
419!> \param m ...
420!> \param n ...
421!> \param k ...
422!> \param alpha ...
423!> \param matrix_a : m x k matrix ( ! for transa = 'N')
424!> \param matrix_b : k x n matrix ( ! for transb = 'N')
425!> \param beta ...
426!> \param matrix_c : m x n matrix
427!> \param a_first_col ...
428!> \param a_first_row ...
429!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
430!> \param b_first_row ...
431!> \param c_first_col ...
432!> \param c_first_row ...
433!> \author Matthias Krack
434!> \note
435!> matrix_c should have no overlap with matrix_a, matrix_b
436! **************************************************************************************************
437 SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
438 matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
439 c_first_col, c_first_row)
440
441 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
442 INTEGER, INTENT(IN) :: m, n, k
443 REAL(kind=dp), INTENT(IN) :: alpha
444 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
445 REAL(kind=dp), INTENT(IN) :: beta
446 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
447 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
448 b_first_col, b_first_row, &
449 c_first_col, c_first_row
450
451 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_gemm'
452
453 INTEGER :: handle, i_a, i_b, i_c, j_a, &
454 j_b, j_c
455 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
456 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
457#if defined(__parallel)
458 INTEGER, DIMENSION(9) :: desca, descb, descc
459#else
460 INTEGER :: lda, ldb, ldc
461#endif
462
463 CALL timeset(routinen, handle)
464
465 !sample peak memory
466 CALL m_memory()
467
468 a => matrix_a%local_data
469 b => matrix_b%local_data
470 c => matrix_c%local_data
471
472 a_sp => matrix_a%local_data_sp
473 b_sp => matrix_b%local_data_sp
474 c_sp => matrix_c%local_data_sp
475
476 i_a = 1
477 IF (PRESENT(a_first_row)) i_a = a_first_row
478
479 j_a = 1
480 IF (PRESENT(a_first_col)) j_a = a_first_col
481
482 i_b = 1
483 IF (PRESENT(b_first_row)) i_b = b_first_row
484
485 j_b = 1
486 IF (PRESENT(b_first_col)) j_b = b_first_col
487
488 i_c = 1
489 IF (PRESENT(c_first_row)) i_c = c_first_row
490
491 j_c = 1
492 IF (PRESENT(c_first_col)) j_c = c_first_col
493
494#if defined(__parallel)
495
496 desca(:) = matrix_a%matrix_struct%descriptor(:)
497 descb(:) = matrix_b%matrix_struct%descriptor(:)
498 descc(:) = matrix_c%matrix_struct%descriptor(:)
499
500 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
501
502 CALL psgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
503 descb, real(beta, sp), c_sp(1, 1), i_c, j_c, descc)
504
505 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
506
507 CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
508 descb, beta, c, i_c, j_c, descc)
509
510 ELSE
511 cpabort("Mixed precision gemm NYI")
512 END IF
513#else
514
515 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
516
517 lda = SIZE(a_sp, 1)
518 ldb = SIZE(b_sp, 1)
519 ldc = SIZE(c_sp, 1)
520
521 CALL sgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
522 REAL(beta, sp), c_sp(i_c, j_c), ldc)
523
524 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
525
526 lda = SIZE(a, 1)
527 ldb = SIZE(b, 1)
528 ldc = SIZE(c, 1)
529
530 CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
531
532 ELSE
533 cpabort("Mixed precision gemm NYI")
534 END IF
535
536#endif
537 CALL timestop(handle)
538
539 END SUBROUTINE cp_fm_gemm
540
541! **************************************************************************************************
542!> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
543!> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
544!> where matrix_a is symmetric
545!> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
546!> alpha,beta :: can be 0.0_dp and 1.0_dp
547!> \param uplo triangular format
548!> \param m ...
549!> \param n ...
550!> \param alpha ...
551!> \param matrix_a : m x m matrix
552!> \param matrix_b : m x n matrix
553!> \param beta ...
554!> \param matrix_c : m x n matrix
555!> \author Matthias Krack
556!> \note
557!> matrix_c should have no overlap with matrix_a, matrix_b
558!> all matrices in QS are triangular according to uplo
559!> matrix_a is always an m x m matrix
560!> typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
561! **************************************************************************************************
562 SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
563
564 CHARACTER(LEN=1), INTENT(IN) :: side, uplo
565 INTEGER, INTENT(IN) :: m, n
566 REAL(kind=dp), INTENT(IN) :: alpha
567 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
568 REAL(kind=dp), INTENT(IN) :: beta
569 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
570
571 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_symm'
572
573 INTEGER :: handle
574 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
575#if defined(__parallel)
576 INTEGER, DIMENSION(9) :: desca, descb, descc
577#else
578 INTEGER :: lda, ldb, ldc
579#endif
580
581 CALL timeset(routinen, handle)
582
583 a => matrix_a%local_data
584 b => matrix_b%local_data
585 c => matrix_c%local_data
586
587#if defined(__parallel)
588
589 desca(:) = matrix_a%matrix_struct%descriptor(:)
590 descb(:) = matrix_b%matrix_struct%descriptor(:)
591 descc(:) = matrix_c%matrix_struct%descriptor(:)
592
593 CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
594
595#else
596
597 lda = matrix_a%matrix_struct%local_leading_dimension
598 ldb = matrix_b%matrix_struct%local_leading_dimension
599 ldc = matrix_c%matrix_struct%local_leading_dimension
600
601 CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
602
603#endif
604 CALL timestop(handle)
605
606 END SUBROUTINE cp_fm_symm
607
608! **************************************************************************************************
609!> \brief computes the Frobenius norm of matrix_a
610!> \brief computes the Frobenius norm of matrix_a
611!> \param matrix_a : m x n matrix
612!> \return ...
613!> \author VW
614! **************************************************************************************************
615 FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
616 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
617 REAL(kind=dp) :: norm
618
619 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_frobenius_norm'
620
621 INTEGER :: handle, size_a
622 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
623 REAL(kind=dp), EXTERNAL :: ddot
624#if defined(__parallel)
625 TYPE(mp_comm_type) :: group
626#endif
627
628 CALL timeset(routinen, handle)
629
630 norm = 0.0_dp
631 a => matrix_a%local_data
632 size_a = SIZE(a, 1)*SIZE(a, 2)
633 norm = ddot(size_a, a(1, 1), 1, a(1, 1), 1)
634#if defined(__parallel)
635 group = matrix_a%matrix_struct%para_env
636 CALL group%sum(norm)
637#endif
638 norm = sqrt(norm)
639
640 CALL timestop(handle)
641
642 END FUNCTION cp_fm_frobenius_norm
643
644! **************************************************************************************************
645!> \brief performs a rank-k update of a symmetric matrix_c
646!> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
647!> \param uplo : 'U' ('L')
648!> \param trans : 'N' ('T')
649!> \param k : number of cols to use in matrix_a
650!> ia,ja :: 1,1 (could be used for selecting subblock of a)
651!> \param alpha ...
652!> \param matrix_a ...
653!> \param ia ...
654!> \param ja ...
655!> \param beta ...
656!> \param matrix_c ...
657!> \author Matthias Krack
658! **************************************************************************************************
659 SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
660 CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
661 INTEGER, INTENT(IN) :: k
662 REAL(kind=dp), INTENT(IN) :: alpha
663 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
664 INTEGER, INTENT(IN) :: ia, ja
665 REAL(kind=dp), INTENT(IN) :: beta
666 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
667
668 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_syrk'
669
670 INTEGER :: handle, n
671 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
672#if defined(__parallel)
673 INTEGER, DIMENSION(9) :: desca, descc
674#else
675 INTEGER :: lda, ldc
676#endif
677
678 CALL timeset(routinen, handle)
679
680 n = matrix_c%matrix_struct%nrow_global
681
682 a => matrix_a%local_data
683 c => matrix_c%local_data
684
685#if defined(__parallel)
686
687 desca(:) = matrix_a%matrix_struct%descriptor(:)
688 descc(:) = matrix_c%matrix_struct%descriptor(:)
689
690 CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
691
692#else
693
694 lda = SIZE(a, 1)
695 ldc = SIZE(c, 1)
696
697 CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
698
699#endif
700 CALL timestop(handle)
701
702 END SUBROUTINE cp_fm_syrk
703
704! **************************************************************************************************
705!> \brief computes the schur product of two matrices
706!> c_ij = a_ij * b_ij
707!> \param matrix_a ...
708!> \param matrix_b ...
709!> \param matrix_c ...
710!> \author Joost VandeVondele
711! **************************************************************************************************
712 SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
713
714 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
715
716 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_schur_product'
717
718 INTEGER :: handle, icol_local, irow_local, mypcol, &
719 myprow, ncol_local, npcol, nprow, &
720 nrow_local
721 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
722 TYPE(cp_blacs_env_type), POINTER :: context
723
724 CALL timeset(routinen, handle)
725
726 context => matrix_a%matrix_struct%context
727 myprow = context%mepos(1)
728 mypcol = context%mepos(2)
729 nprow = context%num_pe(1)
730 npcol = context%num_pe(2)
731
732 a => matrix_a%local_data
733 b => matrix_b%local_data
734 c => matrix_c%local_data
735
736 nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
737 ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
738
739 DO icol_local = 1, ncol_local
740 DO irow_local = 1, nrow_local
741 c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
742 END DO
743 END DO
744
745 CALL timestop(handle)
746
747 END SUBROUTINE cp_fm_schur_product
748
749! **************************************************************************************************
750!> \brief returns the trace of matrix_a^T matrix_b, i.e
751!> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
752!> \param matrix_a a matrix
753!> \param matrix_b another matrix
754!> \param trace ...
755!> \par History
756!> 11.06.2001 Creation (Matthias Krack)
757!> 12.2002 added doc [fawzi]
758!> \author Matthias Krack
759!> \note
760!> note the transposition of matrix_a!
761! **************************************************************************************************
762 SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
763
764 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
765 REAL(KIND=dp), INTENT(OUT) :: trace
766
767 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
768
769 INTEGER :: handle, mypcol, myprow, ncol_local, &
770 npcol, nprow, nrow_local
771 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
772 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
773 TYPE(cp_blacs_env_type), POINTER :: context
774 TYPE(mp_comm_type) :: group
775
776 CALL timeset(routinen, handle)
777
778 context => matrix_a%matrix_struct%context
779 myprow = context%mepos(1)
780 mypcol = context%mepos(2)
781 nprow = context%num_pe(1)
782 npcol = context%num_pe(2)
783
784 group = matrix_a%matrix_struct%para_env
785
786 a => matrix_a%local_data
787 b => matrix_b%local_data
788
789 a_sp => matrix_a%local_data_sp
790 b_sp => matrix_b%local_data_sp
791
792 nrow_local = min(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
793 ncol_local = min(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
794
795 ! cries for an accurate_dot_product
796 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
797 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local)* &
798 b_sp(1:nrow_local, 1:ncol_local), dp))
799 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
800 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local), dp)* &
801 b(1:nrow_local, 1:ncol_local))
802 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
803 trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
804 REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
805 ELSE
806 trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
807 b(1:nrow_local, 1:ncol_local))
808 END IF
809
810 CALL group%sum(trace)
811
812 CALL timestop(handle)
813
814 END SUBROUTINE cp_fm_trace_a0b0t0
815
816
817! **************************************************************************************************
818!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
819!> \param matrix_a list of A matrices
820!> \param matrix_b B matrix
821!> \param trace computed traces
822!> \par History
823!> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
824!> \note \parblock
825!> Computing the trace requires collective communication between involved MPI processes
826!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
827!> the amount of time wasted in such synchronisation by performing one large collective
828!> operation which involves all the matrices in question.
829!>
830!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
831!> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
832!> 'matrix_b' is a single matrix.
833!> \endparblock
834! **************************************************************************************************
835 SUBROUTINE cp_fm_trace_a1b0t1_a (matrix_a, matrix_b, trace)
836 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
837 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
838 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
839
840 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_a'
841
842 INTEGER :: handle, imatrix, n_matrices, &
843 ncols_local, nrows_local
844 LOGICAL :: use_sp_a, use_sp_b
845 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
846 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
847 TYPE(mp_comm_type) :: group
848
849 CALL timeset(routinen, handle)
850
851 n_matrices = SIZE(trace)
852 cpassert(SIZE(matrix_a) == n_matrices)
853
854 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
855 use_sp_b = matrix_b%use_sp
856
857 IF (use_sp_b) THEN
858 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
859 ELSE
860 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
861 END IF
862
863!$OMP PARALLEL DO DEFAULT(NONE), &
864!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
865!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
866!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
867
868 DO imatrix = 1, n_matrices
869
870 use_sp_a = matrix_a(imatrix) %use_sp
871
872 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
873 IF (use_sp_a .AND. use_sp_b) THEN
874 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
875 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
876 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
877 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
878 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
879 ELSE
880 cpabort("Matrices A and B are of different types")
881 END IF
882 END DO
883!$OMP END PARALLEL DO
884
885 group = matrix_b%matrix_struct%para_env
886 CALL group%sum(trace)
887
888 CALL timestop(handle)
889 END SUBROUTINE cp_fm_trace_a1b0t1_a
890 SUBROUTINE cp_fm_trace_a1b0t1_p (matrix_a, matrix_b, trace)
891 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
892 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
893 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
894
895 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_p'
896
897 INTEGER :: handle, imatrix, n_matrices, &
898 ncols_local, nrows_local
899 LOGICAL :: use_sp_a, use_sp_b
900 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
901 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
902 TYPE(mp_comm_type) :: group
903
904 CALL timeset(routinen, handle)
905
906 n_matrices = SIZE(trace)
907 cpassert(SIZE(matrix_a) == n_matrices)
908
909 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
910 use_sp_b = matrix_b%use_sp
911
912 IF (use_sp_b) THEN
913 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
914 ELSE
915 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
916 END IF
917
918!$OMP PARALLEL DO DEFAULT(NONE), &
919!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
920!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
921!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
922
923 DO imatrix = 1, n_matrices
924
925 use_sp_a = matrix_a(imatrix) %matrix%use_sp
926
927 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
928 IF (use_sp_a .AND. use_sp_b) THEN
929 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
930 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
931 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
932 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
933 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
934 ELSE
935 cpabort("Matrices A and B are of different types")
936 END IF
937 END DO
938!$OMP END PARALLEL DO
939
940 group = matrix_b%matrix_struct%para_env
941 CALL group%sum(trace)
942
943 CALL timestop(handle)
944 END SUBROUTINE cp_fm_trace_a1b0t1_p
945
946! **************************************************************************************************
947!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
948!> \param matrix_a list of A matrices
949!> \param matrix_b list of B matrices
950!> \param trace computed traces
951!> \param accurate ...
952!> \par History
953!> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
954!> \note \parblock
955!> Computing the trace requires collective communication between involved MPI processes
956!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
957!> the amount of time wasted in such synchronisation by performing one large collective
958!> operation which involves all the matrices in question.
959!>
960!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
961!> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
962!> \endparblock
963! **************************************************************************************************
964 SUBROUTINE cp_fm_trace_a1b1t1_aa (matrix_a, matrix_b, trace, accurate)
965 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
966 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_b
967 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
968 LOGICAL, INTENT(IN), OPTIONAL :: accurate
969
970 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_aa'
971
972 INTEGER :: handle, imatrix, n_matrices, &
973 ncols_local, nrows_local
974 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
975 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
976 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
977 TYPE(mp_comm_type) :: group
978
979 CALL timeset(routinen, handle)
980
981 n_matrices = SIZE(trace)
982 cpassert(SIZE(matrix_a) == n_matrices)
983 cpassert(SIZE(matrix_b) == n_matrices)
984
985 use_accurate_sum = .true.
986 IF (PRESENT(accurate)) use_accurate_sum = accurate
987
988!$OMP PARALLEL DO DEFAULT(NONE), &
989!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
990!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
991!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
992 DO imatrix = 1, n_matrices
993 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
994
995 use_sp_a = matrix_a(imatrix) %use_sp
996 use_sp_b = matrix_b(imatrix) %use_sp
997
998 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
999 IF (use_sp_a .AND. use_sp_b) THEN
1000 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1001 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1002 IF (use_accurate_sum) THEN
1003 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1004 ELSE
1005 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1006 END IF
1007 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1008 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1009 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1010 IF (use_accurate_sum) THEN
1011 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1012 ELSE
1013 trace(imatrix) = sum(ldata_a*ldata_b)
1014 END IF
1015 ELSE
1016 cpabort("Matrices A and B are of different types")
1017 END IF
1018 END DO
1019!$OMP END PARALLEL DO
1020
1021 group = matrix_a(1) %matrix_struct%para_env
1022 CALL group%sum(trace)
1023
1024 CALL timestop(handle)
1025 END SUBROUTINE cp_fm_trace_a1b1t1_aa
1026 SUBROUTINE cp_fm_trace_a1b1t1_ap (matrix_a, matrix_b, trace, accurate)
1027 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
1028 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_b
1029 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1030 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1031
1032 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_ap'
1033
1034 INTEGER :: handle, imatrix, n_matrices, &
1035 ncols_local, nrows_local
1036 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1037 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1038 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1039 TYPE(mp_comm_type) :: group
1040
1041 CALL timeset(routinen, handle)
1042
1043 n_matrices = SIZE(trace)
1044 cpassert(SIZE(matrix_a) == n_matrices)
1045 cpassert(SIZE(matrix_b) == n_matrices)
1046
1047 use_accurate_sum = .true.
1048 IF (PRESENT(accurate)) use_accurate_sum = accurate
1049
1050!$OMP PARALLEL DO DEFAULT(NONE), &
1051!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1052!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1053!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1054 DO imatrix = 1, n_matrices
1055 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1056
1057 use_sp_a = matrix_a(imatrix) %use_sp
1058 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1059
1060 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1061 IF (use_sp_a .AND. use_sp_b) THEN
1062 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1063 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1064 IF (use_accurate_sum) THEN
1065 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1066 ELSE
1067 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1068 END IF
1069 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1070 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1071 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1072 IF (use_accurate_sum) THEN
1073 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1074 ELSE
1075 trace(imatrix) = sum(ldata_a*ldata_b)
1076 END IF
1077 ELSE
1078 cpabort("Matrices A and B are of different types")
1079 END IF
1080 END DO
1081!$OMP END PARALLEL DO
1082
1083 group = matrix_a(1) %matrix_struct%para_env
1084 CALL group%sum(trace)
1085
1086 CALL timestop(handle)
1087 END SUBROUTINE cp_fm_trace_a1b1t1_ap
1088 SUBROUTINE cp_fm_trace_a1b1t1_pa (matrix_a, matrix_b, trace, accurate)
1089 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
1090 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_b
1091 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1092 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1093
1094 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pa'
1095
1096 INTEGER :: handle, imatrix, n_matrices, &
1097 ncols_local, nrows_local
1098 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1099 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1100 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1101 TYPE(mp_comm_type) :: group
1102
1103 CALL timeset(routinen, handle)
1104
1105 n_matrices = SIZE(trace)
1106 cpassert(SIZE(matrix_a) == n_matrices)
1107 cpassert(SIZE(matrix_b) == n_matrices)
1108
1109 use_accurate_sum = .true.
1110 IF (PRESENT(accurate)) use_accurate_sum = accurate
1111
1112!$OMP PARALLEL DO DEFAULT(NONE), &
1113!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1114!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1115!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1116 DO imatrix = 1, n_matrices
1117 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1118
1119 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1120 use_sp_b = matrix_b(imatrix) %use_sp
1121
1122 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1123 IF (use_sp_a .AND. use_sp_b) THEN
1124 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1125 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1126 IF (use_accurate_sum) THEN
1127 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1128 ELSE
1129 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1130 END IF
1131 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1132 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1133 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1134 IF (use_accurate_sum) THEN
1135 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1136 ELSE
1137 trace(imatrix) = sum(ldata_a*ldata_b)
1138 END IF
1139 ELSE
1140 cpabort("Matrices A and B are of different types")
1141 END IF
1142 END DO
1143!$OMP END PARALLEL DO
1144
1145 group = matrix_a(1) %matrix%matrix_struct%para_env
1146 CALL group%sum(trace)
1147
1148 CALL timestop(handle)
1149 END SUBROUTINE cp_fm_trace_a1b1t1_pa
1150 SUBROUTINE cp_fm_trace_a1b1t1_pp (matrix_a, matrix_b, trace, accurate)
1151 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
1152 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_b
1153 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1154 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1155
1156 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pp'
1157
1158 INTEGER :: handle, imatrix, n_matrices, &
1159 ncols_local, nrows_local
1160 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1161 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1162 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1163 TYPE(mp_comm_type) :: group
1164
1165 CALL timeset(routinen, handle)
1166
1167 n_matrices = SIZE(trace)
1168 cpassert(SIZE(matrix_a) == n_matrices)
1169 cpassert(SIZE(matrix_b) == n_matrices)
1170
1171 use_accurate_sum = .true.
1172 IF (PRESENT(accurate)) use_accurate_sum = accurate
1173
1174!$OMP PARALLEL DO DEFAULT(NONE), &
1175!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1176!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1177!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1178 DO imatrix = 1, n_matrices
1179 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1180
1181 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1182 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1183
1184 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1185 IF (use_sp_a .AND. use_sp_b) THEN
1186 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1187 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1188 IF (use_accurate_sum) THEN
1189 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1190 ELSE
1191 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1192 END IF
1193 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1194 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1195 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1196 IF (use_accurate_sum) THEN
1197 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1198 ELSE
1199 trace(imatrix) = sum(ldata_a*ldata_b)
1200 END IF
1201 ELSE
1202 cpabort("Matrices A and B are of different types")
1203 END IF
1204 END DO
1205!$OMP END PARALLEL DO
1206
1207 group = matrix_a(1) %matrix%matrix_struct%para_env
1208 CALL group%sum(trace)
1209
1210 CALL timestop(handle)
1211 END SUBROUTINE cp_fm_trace_a1b1t1_pp
1212
1213! **************************************************************************************************
1214!> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
1215!> \param matrix_a list of A matrices
1216!> \param matrix_b list of B matrices
1217!> \param trace computed traces
1218!> \param accurate ...
1219! **************************************************************************************************
1220 SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa (matrix_a, matrix_b, trace, accurate)
1221 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1222 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1223 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1224 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1225
1226 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_aa'
1227
1228 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1229 nrows_local, nz
1230 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1231 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1232 REAL(kind=dp) :: t
1233 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1234 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1235 TYPE(mp_comm_type) :: group
1236
1237 CALL timeset(routinen, handle)
1238
1239 nz = SIZE(matrix_a, 1)
1240 cpassert(SIZE(matrix_b, 1) == nz)
1241
1242 na = SIZE(matrix_a, 2)
1243 nb = SIZE(matrix_b, 2)
1244 cpassert(SIZE(trace, 1) == na)
1245 cpassert(SIZE(trace, 2) == nb)
1246
1247 use_accurate_sum = .true.
1248 IF (PRESENT(accurate)) use_accurate_sum = accurate
1249
1250 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1251 ! improve load balance between shared-memory threads
1252 ntraces = na*nb
1253 na8 = int(na, kind=int_8)
1254
1255!$OMP PARALLEL DO DEFAULT(NONE), &
1256!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1257!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1258!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1259 DO itrace = 1, ntraces
1260 ib8 = (itrace - 1)/na8
1261 ia = int(itrace - ib8*na8)
1262 ib = int(ib8) + 1
1263
1264 t = 0.0_dp
1265 DO iz = 1, nz
1266 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1267 use_sp_a = matrix_a(iz, ia) %use_sp
1268 use_sp_b = matrix_b(iz, ib) %use_sp
1269
1270 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1271 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1272 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1273 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1274 IF (use_accurate_sum) THEN
1275 t = t + accurate_dot_product(ldata_a, ldata_b)
1276 ELSE
1277 t = t + sum(ldata_a*ldata_b)
1278 END IF
1279 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1280 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1281 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1282 IF (use_accurate_sum) THEN
1283 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1284 ELSE
1285 t = t + sum(ldata_a_sp*ldata_b_sp)
1286 END IF
1287 ELSE
1288 cpabort("Matrices A and B are of different types")
1289 END IF
1290 END DO
1291 trace(ia, ib) = t
1292 END DO
1293!$OMP END PARALLEL DO
1294
1295 group = matrix_a(1, 1) %matrix_struct%para_env
1296 CALL group%sum(trace)
1297
1298 CALL timestop(handle)
1299 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa
1300 SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap (matrix_a, matrix_b, trace, accurate)
1301 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1302 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1303 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1304 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1305
1306 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_ap'
1307
1308 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1309 nrows_local, nz
1310 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1311 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1312 REAL(kind=dp) :: t
1313 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1314 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1315 TYPE(mp_comm_type) :: group
1316
1317 CALL timeset(routinen, handle)
1318
1319 nz = SIZE(matrix_a, 1)
1320 cpassert(SIZE(matrix_b, 1) == nz)
1321
1322 na = SIZE(matrix_a, 2)
1323 nb = SIZE(matrix_b, 2)
1324 cpassert(SIZE(trace, 1) == na)
1325 cpassert(SIZE(trace, 2) == nb)
1326
1327 use_accurate_sum = .true.
1328 IF (PRESENT(accurate)) use_accurate_sum = accurate
1329
1330 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1331 ! improve load balance between shared-memory threads
1332 ntraces = na*nb
1333 na8 = int(na, kind=int_8)
1334
1335!$OMP PARALLEL DO DEFAULT(NONE), &
1336!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1337!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1338!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1339 DO itrace = 1, ntraces
1340 ib8 = (itrace - 1)/na8
1341 ia = int(itrace - ib8*na8)
1342 ib = int(ib8) + 1
1343
1344 t = 0.0_dp
1345 DO iz = 1, nz
1346 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1347 use_sp_a = matrix_a(iz, ia) %use_sp
1348 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1349
1350 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1351 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1352 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1353 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1354 IF (use_accurate_sum) THEN
1355 t = t + accurate_dot_product(ldata_a, ldata_b)
1356 ELSE
1357 t = t + sum(ldata_a*ldata_b)
1358 END IF
1359 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1360 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1361 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1362 IF (use_accurate_sum) THEN
1363 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1364 ELSE
1365 t = t + sum(ldata_a_sp*ldata_b_sp)
1366 END IF
1367 ELSE
1368 cpabort("Matrices A and B are of different types")
1369 END IF
1370 END DO
1371 trace(ia, ib) = t
1372 END DO
1373!$OMP END PARALLEL DO
1374
1375 group = matrix_a(1, 1) %matrix_struct%para_env
1376 CALL group%sum(trace)
1377
1378 CALL timestop(handle)
1379 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap
1380 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa (matrix_a, matrix_b, trace, accurate)
1381 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1382 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1383 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1384 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1385
1386 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pa'
1387
1388 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1389 nrows_local, nz
1390 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1391 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1392 REAL(kind=dp) :: t
1393 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1394 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1395 TYPE(mp_comm_type) :: group
1396
1397 CALL timeset(routinen, handle)
1398
1399 nz = SIZE(matrix_a, 1)
1400 cpassert(SIZE(matrix_b, 1) == nz)
1401
1402 na = SIZE(matrix_a, 2)
1403 nb = SIZE(matrix_b, 2)
1404 cpassert(SIZE(trace, 1) == na)
1405 cpassert(SIZE(trace, 2) == nb)
1406
1407 use_accurate_sum = .true.
1408 IF (PRESENT(accurate)) use_accurate_sum = accurate
1409
1410 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1411 ! improve load balance between shared-memory threads
1412 ntraces = na*nb
1413 na8 = int(na, kind=int_8)
1414
1415!$OMP PARALLEL DO DEFAULT(NONE), &
1416!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1417!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1418!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1419 DO itrace = 1, ntraces
1420 ib8 = (itrace - 1)/na8
1421 ia = int(itrace - ib8*na8)
1422 ib = int(ib8) + 1
1423
1424 t = 0.0_dp
1425 DO iz = 1, nz
1426 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1427 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1428 use_sp_b = matrix_b(iz, ib) %use_sp
1429
1430 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1431 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1432 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1433 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1434 IF (use_accurate_sum) THEN
1435 t = t + accurate_dot_product(ldata_a, ldata_b)
1436 ELSE
1437 t = t + sum(ldata_a*ldata_b)
1438 END IF
1439 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1440 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1441 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1442 IF (use_accurate_sum) THEN
1443 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1444 ELSE
1445 t = t + sum(ldata_a_sp*ldata_b_sp)
1446 END IF
1447 ELSE
1448 cpabort("Matrices A and B are of different types")
1449 END IF
1450 END DO
1451 trace(ia, ib) = t
1452 END DO
1453!$OMP END PARALLEL DO
1454
1455 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1456 CALL group%sum(trace)
1457
1458 CALL timestop(handle)
1459 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa
1460 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp (matrix_a, matrix_b, trace, accurate)
1461 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1462 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1463 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1464 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1465
1466 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pp'
1467
1468 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1469 nrows_local, nz
1470 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1471 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1472 REAL(kind=dp) :: t
1473 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1474 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1475 TYPE(mp_comm_type) :: group
1476
1477 CALL timeset(routinen, handle)
1478
1479 nz = SIZE(matrix_a, 1)
1480 cpassert(SIZE(matrix_b, 1) == nz)
1481
1482 na = SIZE(matrix_a, 2)
1483 nb = SIZE(matrix_b, 2)
1484 cpassert(SIZE(trace, 1) == na)
1485 cpassert(SIZE(trace, 2) == nb)
1486
1487 use_accurate_sum = .true.
1488 IF (PRESENT(accurate)) use_accurate_sum = accurate
1489
1490 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1491 ! improve load balance between shared-memory threads
1492 ntraces = na*nb
1493 na8 = int(na, kind=int_8)
1494
1495!$OMP PARALLEL DO DEFAULT(NONE), &
1496!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1497!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1498!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1499 DO itrace = 1, ntraces
1500 ib8 = (itrace - 1)/na8
1501 ia = int(itrace - ib8*na8)
1502 ib = int(ib8) + 1
1503
1504 t = 0.0_dp
1505 DO iz = 1, nz
1506 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1507 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1508 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1509
1510 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1511 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1512 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1513 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1514 IF (use_accurate_sum) THEN
1515 t = t + accurate_dot_product(ldata_a, ldata_b)
1516 ELSE
1517 t = t + sum(ldata_a*ldata_b)
1518 END IF
1519 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1520 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1521 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1522 IF (use_accurate_sum) THEN
1523 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1524 ELSE
1525 t = t + sum(ldata_a_sp*ldata_b_sp)
1526 END IF
1527 ELSE
1528 cpabort("Matrices A and B are of different types")
1529 END IF
1530 END DO
1531 trace(ia, ib) = t
1532 END DO
1533!$OMP END PARALLEL DO
1534
1535 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1536 CALL group%sum(trace)
1537
1538 CALL timestop(handle)
1539 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp
1540
1541! **************************************************************************************************
1542!> \brief multiplies in place by a triangular matrix:
1543!> matrix_b = alpha op(triangular_matrix) matrix_b
1544!> or (if side='R')
1545!> matrix_b = alpha matrix_b op(triangular_matrix)
1546!> op(triangular_matrix) is:
1547!> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1548!> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1549!> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1550!> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1551!> \param triangular_matrix the triangular matrix that multiplies the other
1552!> \param matrix_b the matrix that gets multiplied and stores the result
1553!> \param side on which side of matrix_b stays op(triangular_matrix)
1554!> (defaults to 'L')
1555!> \param transpose_tr if the triangular matrix should be transposed
1556!> (defaults to false)
1557!> \param invert_tr if the triangular matrix should be inverted
1558!> (defaults to false)
1559!> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1560!> lower ('L') triangle (defaults to 'U')
1561!> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1562!> be assumed to be 1 (defaults to false)
1563!> \param n_rows the number of rows of the result (defaults to
1564!> size(matrix_b,1))
1565!> \param n_cols the number of columns of the result (defaults to
1566!> size(matrix_b,2))
1567!> \param alpha ...
1568!> \par History
1569!> 08.2002 created [fawzi]
1570!> \author Fawzi Mohamed
1571!> \note
1572!> needs an mpi env
1573! **************************************************************************************************
1574 SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1575 transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1576 alpha)
1577 TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1578 CHARACTER, INTENT(IN), OPTIONAL :: side
1579 LOGICAL, INTENT(IN), OPTIONAL :: transpose_tr, invert_tr
1580 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1581 LOGICAL, INTENT(IN), OPTIONAL :: unit_diag_tr
1582 INTEGER, INTENT(IN), OPTIONAL :: n_rows, n_cols
1583 REAL(kind=dp), INTENT(IN), OPTIONAL :: alpha
1584
1585 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_triangular_multiply'
1586
1587 CHARACTER :: side_char, transa, unit_diag, uplo
1588 INTEGER :: handle, m, n
1589 LOGICAL :: invert
1590 REAL(kind=dp) :: al
1591
1592 CALL timeset(routinen, handle)
1593 side_char = 'L'
1594 unit_diag = 'N'
1595 uplo = 'U'
1596 transa = 'N'
1597 invert = .false.
1598 al = 1.0_dp
1599 CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1600 IF (PRESENT(side)) side_char = side
1601 IF (PRESENT(invert_tr)) invert = invert_tr
1602 IF (PRESENT(uplo_tr)) uplo = uplo_tr
1603 IF (PRESENT(unit_diag_tr)) THEN
1604 IF (unit_diag_tr) THEN
1605 unit_diag = 'U'
1606 ELSE
1607 unit_diag = 'N'
1608 END IF
1609 END IF
1610 IF (PRESENT(transpose_tr)) THEN
1611 IF (transpose_tr) THEN
1612 transa = 'T'
1613 ELSE
1614 transa = 'N'
1615 END IF
1616 END IF
1617 IF (PRESENT(alpha)) al = alpha
1618 IF (PRESENT(n_rows)) m = n_rows
1619 IF (PRESENT(n_cols)) n = n_cols
1620
1621 IF (invert) THEN
1622
1623#if defined(__parallel)
1624 CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1625 triangular_matrix%local_data(1, 1), 1, 1, &
1626 triangular_matrix%matrix_struct%descriptor, &
1627 matrix_b%local_data(1, 1), 1, 1, &
1628 matrix_b%matrix_struct%descriptor(1))
1629#else
1630 CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1631 triangular_matrix%local_data(1, 1), &
1632 SIZE(triangular_matrix%local_data, 1), &
1633 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1634#endif
1635
1636 ELSE
1637
1638#if defined(__parallel)
1639 CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1640 triangular_matrix%local_data(1, 1), 1, 1, &
1641 triangular_matrix%matrix_struct%descriptor, &
1642 matrix_b%local_data(1, 1), 1, 1, &
1643 matrix_b%matrix_struct%descriptor(1))
1644#else
1645 CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1646 triangular_matrix%local_data(1, 1), &
1647 SIZE(triangular_matrix%local_data, 1), &
1648 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1649#endif
1650
1651 END IF
1652
1653 CALL timestop(handle)
1654 END SUBROUTINE cp_fm_triangular_multiply
1655
1656! **************************************************************************************************
1657!> \brief scales a matrix
1658!> matrix_a = alpha * matrix_b
1659!> \param alpha ...
1660!> \param matrix_a ...
1661!> \note
1662!> use cp_fm_set_all to zero (avoids problems with nan)
1663! **************************************************************************************************
1664 SUBROUTINE cp_fm_scale(alpha, matrix_a)
1665 REAL(kind=dp), INTENT(IN) :: alpha
1666 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1667
1668 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale'
1669
1670 INTEGER :: handle, size_a
1671 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1672
1673 CALL timeset(routinen, handle)
1674
1675 NULLIFY (a)
1676
1677 a => matrix_a%local_data
1678 size_a = SIZE(a, 1)*SIZE(a, 2)
1679
1680 CALL dscal(size_a, alpha, a, 1)
1681
1682 CALL timestop(handle)
1683
1684 END SUBROUTINE cp_fm_scale
1685
1686! **************************************************************************************************
1687!> \brief transposes a matrix
1688!> matrixt = matrix ^ T
1689!> \param matrix ...
1690!> \param matrixt ...
1691!> \note
1692!> all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
1693! **************************************************************************************************
1694 SUBROUTINE cp_fm_transpose(matrix, matrixt)
1695 TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1696
1697 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_transpose'
1698
1699 INTEGER :: handle, ncol_global, &
1700 nrow_global, ncol_globalt, nrow_globalt
1701 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
1702#if defined(__parallel)
1703 INTEGER, DIMENSION(9) :: desca, descc
1704#elif !defined(__MKL)
1705 INTEGER :: i, j
1706#endif
1707
1708 nrow_global = matrix%matrix_struct%nrow_global
1709 ncol_global = matrix%matrix_struct%ncol_global
1710 nrow_globalt = matrixt%matrix_struct%nrow_global
1711 ncol_globalt = matrixt%matrix_struct%ncol_global
1712 cpassert(nrow_global == ncol_globalt)
1713 cpassert(nrow_globalt == ncol_global)
1714
1715 CALL timeset(routinen, handle)
1716
1717 a => matrix%local_data
1718 c => matrixt%local_data
1719
1720#if defined(__parallel)
1721 desca(:) = matrix%matrix_struct%descriptor(:)
1722 descc(:) = matrixt%matrix_struct%descriptor(:)
1723 CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1724#elif defined(__MKL)
1725 CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
1726#else
1727 DO j = 1, ncol_global
1728 DO i = 1, nrow_global
1729 c(j, i) = a(i, j)
1730 END DO
1731 END DO
1732#endif
1733 CALL timestop(handle)
1734
1735 END SUBROUTINE cp_fm_transpose
1736
1737! **************************************************************************************************
1738!> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
1739!> \param matrix the triangular matrix as input, the full matrix as output
1740!> \param work a matrix of the same size as matrix
1741!> \param uplo triangular format; defaults to 'U'
1742!> \author Matthias Krack
1743!> \note
1744!> the opposite triangular part is irrelevant
1745! **************************************************************************************************
1746 SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
1747
1748 TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1749 CHARACTER, INTENT(IN), OPTIONAL :: uplo
1750
1751 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_uplo_to_full'
1752
1753 CHARACTER :: myuplo
1754 INTEGER :: handle, icol_global, irow_global, &
1755 mypcol, myprow, ncol_global, &
1756 npcol, nprow, nrow_global
1757 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1758 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1759 TYPE(cp_blacs_env_type), POINTER :: context
1760
1761#if defined(__parallel)
1762 INTEGER :: icol_local, irow_local, &
1763 ncol_block, ncol_local, &
1764 nrow_block, nrow_local
1765 INTEGER, DIMENSION(9) :: desca, descc
1766 REAL(kind=dp), DIMENSION(:, :), POINTER :: c
1767 REAL(kind=sp), DIMENSION(:, :), POINTER :: c_sp
1768#endif
1769
1770 myuplo = 'U'
1771 IF (PRESENT(uplo)) myuplo = uplo
1772
1773 nrow_global = matrix%matrix_struct%nrow_global
1774 ncol_global = matrix%matrix_struct%ncol_global
1775 cpassert(nrow_global == ncol_global)
1776 nrow_global = work%matrix_struct%nrow_global
1777 ncol_global = work%matrix_struct%ncol_global
1778 cpassert(nrow_global == ncol_global)
1779 cpassert(matrix%use_sp .EQV. work%use_sp)
1780
1781 CALL timeset(routinen, handle)
1782
1783 context => matrix%matrix_struct%context
1784 myprow = context%mepos(1)
1785 mypcol = context%mepos(2)
1786 nprow = context%num_pe(1)
1787 npcol = context%num_pe(2)
1788
1789#if defined(__parallel)
1790
1791 nrow_block = matrix%matrix_struct%nrow_block
1792 ncol_block = matrix%matrix_struct%ncol_block
1793
1794 nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1795 ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1796
1797 a => work%local_data
1798 a_sp => work%local_data_sp
1799 desca(:) = work%matrix_struct%descriptor(:)
1800 c => matrix%local_data
1801 c_sp => matrix%local_data_sp
1802 descc(:) = matrix%matrix_struct%descriptor(:)
1803
1804 DO icol_local = 1, ncol_local
1805 icol_global = matrix%matrix_struct%col_indices(icol_local)
1806 DO irow_local = 1, nrow_local
1807 irow_global = matrix%matrix_struct%row_indices(irow_local)
1808 IF (merge(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
1809 IF (matrix%use_sp) THEN
1810 c_sp(irow_local, icol_local) = 0.0_sp
1811 ELSE
1812 c(irow_local, icol_local) = 0.0_dp
1813 END IF
1814 ELSE IF (irow_global == icol_global) THEN
1815 IF (matrix%use_sp) THEN
1816 c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1817 ELSE
1818 c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1819 END IF
1820 END IF
1821 END DO
1822 END DO
1823
1824 DO icol_local = 1, ncol_local
1825 DO irow_local = 1, nrow_local
1826 IF (matrix%use_sp) THEN
1827 a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1828 ELSE
1829 a(irow_local, icol_local) = c(irow_local, icol_local)
1830 END IF
1831 END DO
1832 END DO
1833
1834 IF (matrix%use_sp) THEN
1835 CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1836 ELSE
1837 CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1838 END IF
1839
1840#else
1841
1842 a => matrix%local_data
1843 a_sp => matrix%local_data_sp
1844
1845 IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1846 DO irow_global = 1, nrow_global
1847 DO icol_global = irow_global + 1, ncol_global
1848 IF (matrix%use_sp) THEN
1849 a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1850 ELSE
1851 a(icol_global, irow_global) = a(irow_global, icol_global)
1852 END IF
1853 END DO
1854 END DO
1855 ELSE
1856 DO icol_global = 1, ncol_global
1857 DO irow_global = icol_global + 1, nrow_global
1858 IF (matrix%use_sp) THEN
1859 a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
1860 ELSE
1861 a(irow_global, icol_global) = a(icol_global, irow_global)
1862 END IF
1863 END DO
1864 END DO
1865 END IF
1866
1867#endif
1868 CALL timestop(handle)
1869
1870 END SUBROUTINE cp_fm_uplo_to_full
1871
1872! **************************************************************************************************
1873!> \brief scales column i of matrix a with scaling(i)
1874!> \param matrixa ...
1875!> \param scaling : an array used for scaling the columns,
1876!> SIZE(scaling) determines the number of columns to be scaled
1877!> \author Joost VandeVondele
1878!> \note
1879!> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1880!> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1881!> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1882!> where every vector has a different prefactor
1883! **************************************************************************************************
1884 SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1885 TYPE(cp_fm_type), INTENT(IN) :: matrixa
1886 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: scaling
1887
1888 INTEGER :: k, mypcol, myprow, n, ncol_global, &
1889 npcol, nprow
1890 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1891 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1892#if defined(__parallel)
1893 INTEGER :: icol_global, icol_local, &
1894 ipcol, iprow, irow_local
1895#else
1896 INTEGER :: i
1897#endif
1898
1899 myprow = matrixa%matrix_struct%context%mepos(1)
1900 mypcol = matrixa%matrix_struct%context%mepos(2)
1901 nprow = matrixa%matrix_struct%context%num_pe(1)
1902 npcol = matrixa%matrix_struct%context%num_pe(2)
1903
1904 ncol_global = matrixa%matrix_struct%ncol_global
1905
1906 a => matrixa%local_data
1907 a_sp => matrixa%local_data_sp
1908 IF (matrixa%use_sp) THEN
1909 n = SIZE(a_sp, 1)
1910 ELSE
1911 n = SIZE(a, 1)
1912 END IF
1913 k = min(SIZE(scaling), ncol_global)
1914
1915#if defined(__parallel)
1916
1917 DO icol_global = 1, k
1918 CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1919 nprow, npcol, myprow, mypcol, &
1920 irow_local, icol_local, iprow, ipcol)
1921 IF ((ipcol == mypcol)) THEN
1922 IF (matrixa%use_sp) THEN
1923 CALL sscal(n, real(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1924 ELSE
1925 CALL dscal(n, scaling(icol_global), a(:, icol_local), 1)
1926 END IF
1927 END IF
1928 END DO
1929#else
1930 DO i = 1, k
1931 IF (matrixa%use_sp) THEN
1932 CALL sscal(n, real(scaling(i), sp), a_sp(:, i), 1)
1933 ELSE
1934 CALL dscal(n, scaling(i), a(:, i), 1)
1935 END IF
1936 END DO
1937#endif
1938 END SUBROUTINE cp_fm_column_scale
1939
1940! **************************************************************************************************
1941!> \brief scales row i of matrix a with scaling(i)
1942!> \param matrixa ...
1943!> \param scaling : an array used for scaling the columns,
1944!> \author JGH
1945!> \note
1946! **************************************************************************************************
1947 SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1948 TYPE(cp_fm_type), INTENT(IN) :: matrixa
1949 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: scaling
1950
1951 INTEGER :: n, m, nrow_global, nrow_local, ncol_local
1952 INTEGER, DIMENSION(:), POINTER :: row_indices
1953 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1954 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1955#if defined(__parallel)
1956 INTEGER :: irow_global, icol, irow
1957#else
1958 INTEGER :: j
1959#endif
1960
1961 CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1962 nrow_local=nrow_local, ncol_local=ncol_local)
1963 cpassert(SIZE(scaling) == nrow_global)
1964
1965 a => matrixa%local_data
1966 a_sp => matrixa%local_data_sp
1967 IF (matrixa%use_sp) THEN
1968 n = SIZE(a_sp, 1)
1969 m = SIZE(a_sp, 2)
1970 ELSE
1971 n = SIZE(a, 1)
1972 m = SIZE(a, 2)
1973 END IF
1974
1975#if defined(__parallel)
1976 DO icol = 1, ncol_local
1977 IF (matrixa%use_sp) THEN
1978 DO irow = 1, nrow_local
1979 irow_global = row_indices(irow)
1980 a(irow, icol) = real(scaling(irow_global), dp)*a(irow, icol)
1981 END DO
1982 ELSE
1983 DO irow = 1, nrow_local
1984 irow_global = row_indices(irow)
1985 a(irow, icol) = scaling(irow_global)*a(irow, icol)
1986 END DO
1987 END IF
1988 END DO
1989#else
1990 IF (matrixa%use_sp) THEN
1991 DO j = 1, m
1992 a_sp(1:n, j) = real(scaling(1:n), sp)*a_sp(1:n, j)
1993 END DO
1994 ELSE
1995 DO j = 1, m
1996 a(1:n, j) = scaling(1:n)*a(1:n, j)
1997 END DO
1998 END IF
1999#endif
2000 END SUBROUTINE cp_fm_row_scale
2001
2002! **************************************************************************************************
2003!> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
2004!> \param matrix_a the matrix to invert
2005!> \param matrix_inverse the inverse of matrix_a
2006!> \param det_a the determinant of matrix_a
2007!> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
2008!> are screened
2009!> \param eigval optionally return matrix eigenvalues/singular values
2010!> \par History
2011!> note of Jan Wilhelm (12.2015)
2012!> - computation of determinant corrected
2013!> - determinant only computed if det_a is present
2014!> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
2015!> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
2016!> \author Florian Schiffmann(02.2007)
2017! **************************************************************************************************
2018 SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
2019
2020 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
2021 REAL(kind=dp), INTENT(OUT), OPTIONAL :: det_a
2022 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_svd
2023 REAL(kind=dp), DIMENSION(:), POINTER, &
2024 INTENT(INOUT), OPTIONAL :: eigval
2025
2026 INTEGER :: n
2027 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2028 REAL(kind=dp) :: determinant, my_eps_svd
2029 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2030 TYPE(cp_fm_type) :: matrix_lu
2031
2032#if defined(__parallel)
2033 TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
2034 TYPE(mp_comm_type) :: group
2035 INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
2036 INTEGER, DIMENSION(9) :: desca
2037 LOGICAL :: quenched
2038 REAL(kind=dp) :: alpha, beta
2039 REAL(kind=dp), DIMENSION(:), POINTER :: diag
2040 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: work
2041#else
2042 LOGICAL :: sign
2043 REAL(kind=dp) :: eps1
2044#endif
2045
2046 my_eps_svd = 0.0_dp
2047 IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
2048
2049 CALL cp_fm_create(matrix=matrix_lu, &
2050 matrix_struct=matrix_a%matrix_struct, &
2051 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
2052 CALL cp_fm_to_fm(matrix_a, matrix_lu)
2053
2054 a => matrix_lu%local_data
2055 n = matrix_lu%matrix_struct%nrow_global
2056 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2057 ipivot(:) = 0
2058#if defined(__parallel)
2059 IF (my_eps_svd .EQ. 0.0_dp) THEN
2060 ! Use LU decomposition
2061 lwork = 3*n
2062 liwork = 3*n
2063 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2064 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2065
2066 IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
2067
2068 ALLOCATE (diag(n))
2069 diag(:) = 0.0_dp
2070 CALL cp_fm_get_diag(matrix_lu, diag)
2071
2072 exponent_of_minus_one = 0
2073 determinant = 1.0_dp
2074 DO i = 1, n
2075 determinant = determinant*diag(i)
2076 IF (ipivot(i) .NE. i) THEN
2077 exponent_of_minus_one = exponent_of_minus_one + 1
2078 END IF
2079 END DO
2080 IF (PRESENT(eigval)) THEN
2081 cpassert(.NOT. ASSOCIATED(eigval))
2082 ALLOCATE (eigval(n))
2083 eigval(:) = diag
2084 END IF
2085 DEALLOCATE (diag)
2086
2087 group = matrix_lu%matrix_struct%para_env
2088 CALL group%sum(exponent_of_minus_one)
2089
2090 determinant = determinant*(-1.0_dp)**exponent_of_minus_one
2091
2092 END IF
2093
2094 alpha = 0.0_dp
2095 beta = 1.0_dp
2096 CALL cp_fm_set_all(matrix_inverse, alpha, beta)
2097 CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
2098 ELSE
2099 ! Use singular value decomposition
2100 CALL cp_fm_create(matrix=u, &
2101 matrix_struct=matrix_a%matrix_struct, &
2102 name="LEFT_SINGULAR_MATRIX")
2103 CALL cp_fm_set_all(u, alpha=0.0_dp)
2104 CALL cp_fm_create(matrix=vt, &
2105 matrix_struct=matrix_a%matrix_struct, &
2106 name="RIGHT_SINGULAR_MATRIX")
2107 CALL cp_fm_set_all(vt, alpha=0.0_dp)
2108 ALLOCATE (diag(n))
2109 diag(:) = 0.0_dp
2110 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2111 ALLOCATE (work(1))
2112 ! Workspace query
2113 lwork = -1
2114 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2115 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2116 lwork = int(work(1))
2117 DEALLOCATE (work)
2118 ALLOCATE (work(lwork))
2119 ! SVD
2120 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2121 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2122 ! info == n+1 implies homogeneity error when the number of procs is large
2123 ! this likely isnt a problem, but maybe we should handle it separately
2124 IF (info /= 0 .AND. info /= n + 1) &
2125 cpabort("Singular value decomposition of matrix failed.")
2126 ! (Pseudo)inverse and (pseudo)determinant
2127 CALL cp_fm_create(matrix=sigma, &
2128 matrix_struct=matrix_a%matrix_struct, &
2129 name="SINGULAR_VALUE_MATRIX")
2130 CALL cp_fm_set_all(sigma, alpha=0.0_dp)
2131 determinant = 1.0_dp
2132 quenched = .false.
2133 IF (PRESENT(eigval)) THEN
2134 cpassert(.NOT. ASSOCIATED(eigval))
2135 ALLOCATE (eigval(n))
2136 eigval(:) = diag
2137 END IF
2138 DO i = 1, n
2139 IF (diag(i) < my_eps_svd) THEN
2140 diag(i) = 0.0_dp
2141 quenched = .true.
2142 ELSE
2143 determinant = determinant*diag(i)
2144 diag(i) = 1.0_dp/diag(i)
2145 END IF
2146 CALL cp_fm_set_element(sigma, i, i, diag(i))
2147 END DO
2148 DEALLOCATE (diag)
2149 IF (quenched) &
2150 CALL cp_warn(__location__, &
2151 "Linear dependencies were detected in the SVD inversion of matrix "//trim(adjustl(matrix_a%name))// &
2152 ". At least one singular value has been quenched.")
2153 ! Sigma^-1 * U^T
2154 CALL cp_fm_create(matrix=inv_sigma_ut, &
2155 matrix_struct=matrix_a%matrix_struct, &
2156 name="SINGULAR_VALUE_MATRIX")
2157 CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
2158 CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
2159 u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
2160 ! A^-1 = V * (Sigma^-1 * U^T)
2161 CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
2162 CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
2163 inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
2164 ! Clean up
2165 DEALLOCATE (work)
2166 CALL cp_fm_release(u)
2167 CALL cp_fm_release(vt)
2168 CALL cp_fm_release(sigma)
2169 CALL cp_fm_release(inv_sigma_ut)
2170 END IF
2171#else
2172 IF (my_eps_svd .EQ. 0.0_dp) THEN
2173 sign = .true.
2174 CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
2175 eval_error=eps1)
2176 CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
2177 IF (PRESENT(eigval)) &
2178 CALL cp_abort(__location__, &
2179 "NYI. Eigenvalues not available for return without SCALAPACK.")
2180 ELSE
2181 CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
2182 determinant, eigval)
2183 END IF
2184#endif
2185 CALL cp_fm_release(matrix_lu)
2186 DEALLOCATE (ipivot)
2187 IF (PRESENT(det_a)) det_a = determinant
2188 END SUBROUTINE cp_fm_invert
2189
2190! **************************************************************************************************
2191!> \brief inverts a triangular matrix
2192!> \param matrix_a ...
2193!> \param uplo_tr triangular format; defaults to 'U'
2194!> \author MI
2195! **************************************************************************************************
2196 SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
2197
2198 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2199 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
2200
2201 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_triangular_invert'
2202
2203 CHARACTER :: unit_diag, uplo
2204 INTEGER :: handle, info, ncol_global
2205 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2206#if defined(__parallel)
2207 INTEGER, DIMENSION(9) :: desca
2208#endif
2209
2210 CALL timeset(routinen, handle)
2211
2212 unit_diag = 'N'
2213 uplo = 'U'
2214 IF (PRESENT(uplo_tr)) uplo = uplo_tr
2215
2216 ncol_global = matrix_a%matrix_struct%ncol_global
2217
2218 a => matrix_a%local_data
2219
2220#if defined(__parallel)
2221 desca(:) = matrix_a%matrix_struct%descriptor(:)
2222
2223 CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
2224
2225#else
2226 CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
2227#endif
2228
2229 CALL timestop(handle)
2230 END SUBROUTINE cp_fm_triangular_invert
2231
2232! **************************************************************************************************
2233!> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
2234!> the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
2235!> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
2236!> \param matrix_a ...
2237!> \param matrix_r ...
2238!> \param nrow_fact ...
2239!> \param ncol_fact ...
2240!> \param first_row ...
2241!> \param first_col ...
2242!> \author MI
2243! **************************************************************************************************
2244 SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
2245 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
2246 INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
2247 first_row, first_col
2248 CHARACTER, INTENT(IN), OPTIONAL :: uplo
2249
2250 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_qr_factorization'
2251
2252 CHARACTER :: myuplo
2253 INTEGER :: handle, i, icol, info, irow, &
2254 j, lda, lwork, ncol, &
2255 ndim, nrow
2256 REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
2257 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
2258 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2259#if defined(__parallel)
2260 INTEGER, DIMENSION(9) :: desca
2261#endif
2262
2263 CALL timeset(routinen, handle)
2264
2265 myuplo = 'U'
2266 IF (PRESENT(uplo)) myuplo = uplo
2267
2268 ncol = matrix_a%matrix_struct%ncol_global
2269 nrow = matrix_a%matrix_struct%nrow_global
2270 lda = nrow
2271
2272 a => matrix_a%local_data
2273
2274 IF (PRESENT(nrow_fact)) nrow = nrow_fact
2275 IF (PRESENT(ncol_fact)) ncol = ncol_fact
2276 irow = 1
2277 IF (PRESENT(first_row)) irow = first_row
2278 icol = 1
2279 IF (PRESENT(first_col)) icol = first_col
2280
2281 cpassert(nrow >= ncol)
2282 ndim = SIZE(a, 2)
2283 ALLOCATE (tau(ndim))
2284
2285#if defined(__parallel)
2286
2287 desca(:) = matrix_a%matrix_struct%descriptor(:)
2288
2289 lwork = -1
2290 ALLOCATE (work(2*ndim))
2291 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2292 lwork = int(work(1))
2293 DEALLOCATE (work)
2294 ALLOCATE (work(lwork))
2295 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2296
2297#else
2298 lwork = -1
2299 ALLOCATE (work(2*ndim))
2300 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2301 lwork = int(work(1))
2302 DEALLOCATE (work)
2303 ALLOCATE (work(lwork))
2304 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2305
2306#endif
2307
2308 ALLOCATE (r_mat(ncol, ncol))
2309 CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
2310 IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
2311 DO i = 1, ncol
2312 DO j = i + 1, ncol
2313 r_mat(j, i) = 0.0_dp
2314 END DO
2315 END DO
2316 ELSE
2317 DO j = 1, ncol
2318 DO i = j + 1, ncol
2319 r_mat(i, j) = 0.0_dp
2320 END DO
2321 END DO
2322 END IF
2323 CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
2324
2325 DEALLOCATE (tau, work, r_mat)
2326
2327 CALL timestop(handle)
2328
2329 END SUBROUTINE cp_fm_qr_factorization
2330
2331! **************************************************************************************************
2332!> \brief computes the the solution to A*b=A_general using lu decomposition
2333!> \param matrix_a input matrix; will be overwritten
2334!> \param general_a contains the result
2335!> \author Florian Schiffmann
2336! **************************************************************************************************
2337 SUBROUTINE cp_fm_solve(matrix_a, general_a)
2338 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
2339
2340 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_solve'
2341
2342 INTEGER :: handle, info, n, nrhs
2343 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2344 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, a_general
2345#if defined(__parallel)
2346 INTEGER, DIMENSION(9) :: desca, descb
2347#else
2348 INTEGER :: lda, ldb
2349#endif
2350
2351 CALL timeset(routinen, handle)
2352
2353 a => matrix_a%local_data
2354 a_general => general_a%local_data
2355 n = matrix_a%matrix_struct%nrow_global
2356 nrhs = general_a%matrix_struct%ncol_global
2357 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2358
2359#if defined(__parallel)
2360 desca(:) = matrix_a%matrix_struct%descriptor(:)
2361 descb(:) = general_a%matrix_struct%descriptor(:)
2362 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2363 CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
2364 1, 1, descb, info)
2365
2366#else
2367 lda = SIZE(a, 1)
2368 ldb = SIZE(a_general, 1)
2369 CALL dgetrf(n, n, a, lda, ipivot, info)
2370 CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
2371
2372#endif
2373 ! info is allowed to be zero
2374 ! this does just signal a zero diagonal element
2375 DEALLOCATE (ipivot)
2376 CALL timestop(handle)
2377 END SUBROUTINE
2378
2379! **************************************************************************************************
2380!> \brief Convenience function. Computes the matrix multiplications needed
2381!> for the multiplication of complex matrices.
2382!> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
2383!> \param transa : 'N' -> normal 'T' -> transpose
2384!> alpha,beta :: can be 0.0_dp and 1.0_dp
2385!> \param transb ...
2386!> \param m ...
2387!> \param n ...
2388!> \param k ...
2389!> \param alpha ...
2390!> \param A_re m x k matrix ( ! for transa = 'N'), real part
2391!> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
2392!> \param B_re k x n matrix ( ! for transa = 'N'), real part
2393!> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
2394!> \param beta ...
2395!> \param C_re m x n matrix, real part
2396!> \param C_im m x n matrix, imaginary part
2397!> \param a_first_col ...
2398!> \param a_first_row ...
2399!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
2400!> \param b_first_row ...
2401!> \param c_first_col ...
2402!> \param c_first_row ...
2403!> \author Samuel Andermatt
2404!> \note
2405!> C should have no overlap with A, B
2406! **************************************************************************************************
2407 SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
2408 C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
2409 c_first_row)
2410 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
2411 INTEGER, INTENT(IN) :: m, n, k
2412 REAL(kind=dp), INTENT(IN) :: alpha
2413 TYPE(cp_fm_type), INTENT(IN) :: a_re, a_im, b_re, b_im
2414 REAL(kind=dp), INTENT(IN) :: beta
2415 TYPE(cp_fm_type), INTENT(IN) :: c_re, c_im
2416 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
2417 b_first_row, c_first_col, c_first_row
2418
2419 CHARACTER(len=*), PARAMETER :: routinen = 'cp_complex_fm_gemm'
2420
2421 INTEGER :: handle
2422
2423 CALL timeset(routinen, handle)
2424
2425 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_re, beta, c_re, &
2426 a_first_col=a_first_col, &
2427 a_first_row=a_first_row, &
2428 b_first_col=b_first_col, &
2429 b_first_row=b_first_row, &
2430 c_first_col=c_first_col, &
2431 c_first_row=c_first_row)
2432 CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, a_im, b_im, 1.0_dp, c_re, &
2433 a_first_col=a_first_col, &
2434 a_first_row=a_first_row, &
2435 b_first_col=b_first_col, &
2436 b_first_row=b_first_row, &
2437 c_first_col=c_first_col, &
2438 c_first_row=c_first_row)
2439 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_im, beta, c_im, &
2440 a_first_col=a_first_col, &
2441 a_first_row=a_first_row, &
2442 b_first_col=b_first_col, &
2443 b_first_row=b_first_row, &
2444 c_first_col=c_first_col, &
2445 c_first_row=c_first_row)
2446 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_im, b_re, 1.0_dp, c_im, &
2447 a_first_col=a_first_col, &
2448 a_first_row=a_first_row, &
2449 b_first_col=b_first_col, &
2450 b_first_row=b_first_row, &
2451 c_first_col=c_first_col, &
2452 c_first_row=c_first_row)
2453
2454 CALL timestop(handle)
2455
2456 END SUBROUTINE cp_complex_fm_gemm
2457
2458! **************************************************************************************************
2459!> \brief inverts a matrix using LU decomposition
2460!> the input matrix will be overwritten
2461!> \param matrix : input a general square non-singular matrix, outputs its inverse
2462!> \param info_out : optional, if present outputs the info from (p)zgetri
2463!> \author Lianheng Tong
2464! **************************************************************************************************
2465 SUBROUTINE cp_fm_lu_invert(matrix, info_out)
2466 TYPE(cp_fm_type), INTENT(IN) :: matrix
2467 INTEGER, INTENT(OUT), OPTIONAL :: info_out
2468
2469 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_invert'
2470
2471 INTEGER :: nrows_global, handle, info, lwork
2472 INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
2473 REAL(kind=dp), DIMENSION(:, :), POINTER :: mat
2474 REAL(kind=sp), DIMENSION(:, :), POINTER :: mat_sp
2475 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2476 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2477#if defined(__parallel)
2478 INTEGER :: liwork
2479 INTEGER, DIMENSION(9) :: desca
2480 INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
2481#else
2482 INTEGER :: lda
2483#endif
2484
2485 CALL timeset(routinen, handle)
2486
2487 mat => matrix%local_data
2488 mat_sp => matrix%local_data_sp
2489 nrows_global = matrix%matrix_struct%nrow_global
2490 cpassert(nrows_global .EQ. matrix%matrix_struct%ncol_global)
2491 ALLOCATE (ipivot(nrows_global))
2492 ! do LU decomposition
2493#if defined(__parallel)
2494 desca = matrix%matrix_struct%descriptor
2495 IF (matrix%use_sp) THEN
2496 CALL psgetrf(nrows_global, nrows_global, &
2497 mat_sp, 1, 1, desca, ipivot, info)
2498 ELSE
2499 CALL pdgetrf(nrows_global, nrows_global, &
2500 mat, 1, 1, desca, ipivot, info)
2501 END IF
2502#else
2503 lda = SIZE(mat, 1)
2504 IF (matrix%use_sp) THEN
2505 CALL sgetrf(nrows_global, nrows_global, &
2506 mat_sp, lda, ipivot, info)
2507 ELSE
2508 CALL dgetrf(nrows_global, nrows_global, &
2509 mat, lda, ipivot, info)
2510 END IF
2511#endif
2512 IF (info /= 0) THEN
2513 CALL cp_abort(__location__, "LU decomposition has failed")
2514 END IF
2515 ! do inversion
2516 IF (matrix%use_sp) THEN
2517 ALLOCATE (work(1))
2518 ELSE
2519 ALLOCATE (work_sp(1))
2520 END IF
2521#if defined(__parallel)
2522 ALLOCATE (iwork(1))
2523 IF (matrix%use_sp) THEN
2524 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2525 ipivot, work_sp, -1, iwork, -1, info)
2526 lwork = int(work_sp(1))
2527 DEALLOCATE (work_sp)
2528 ALLOCATE (work_sp(lwork))
2529 ELSE
2530 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2531 ipivot, work, -1, iwork, -1, info)
2532 lwork = int(work(1))
2533 DEALLOCATE (work)
2534 ALLOCATE (work(lwork))
2535 END IF
2536 liwork = int(iwork(1))
2537 DEALLOCATE (iwork)
2538 ALLOCATE (iwork(liwork))
2539 IF (matrix%use_sp) THEN
2540 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2541 ipivot, work_sp, lwork, iwork, liwork, info)
2542 ELSE
2543 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2544 ipivot, work, lwork, iwork, liwork, info)
2545 END IF
2546 DEALLOCATE (iwork)
2547#else
2548 IF (matrix%use_sp) THEN
2549 CALL sgetri(nrows_global, mat_sp, lda, &
2550 ipivot, work_sp, -1, info)
2551 lwork = int(work_sp(1))
2552 DEALLOCATE (work_sp)
2553 ALLOCATE (work_sp(lwork))
2554 CALL sgetri(nrows_global, mat_sp, lda, &
2555 ipivot, work_sp, lwork, info)
2556 ELSE
2557 CALL dgetri(nrows_global, mat, lda, &
2558 ipivot, work, -1, info)
2559 lwork = int(work(1))
2560 DEALLOCATE (work)
2561 ALLOCATE (work(lwork))
2562 CALL dgetri(nrows_global, mat, lda, &
2563 ipivot, work, lwork, info)
2564 END IF
2565#endif
2566 IF (matrix%use_sp) THEN
2567 DEALLOCATE (work_sp)
2568 ELSE
2569 DEALLOCATE (work)
2570 END IF
2571 DEALLOCATE (ipivot)
2572
2573 IF (PRESENT(info_out)) THEN
2574 info_out = info
2575 ELSE
2576 IF (info /= 0) &
2577 CALL cp_abort(__location__, "LU inversion has failed")
2578 END IF
2579
2580 CALL timestop(handle)
2581
2582 END SUBROUTINE cp_fm_lu_invert
2583
2584! **************************************************************************************************
2585!> \brief norm of matrix using (p)dlange
2586!> \param matrix : input a general matrix
2587!> \param mode : 'M' max abs element value,
2588!> '1' or 'O' one norm, i.e. maximum column sum
2589!> 'I' infinity norm, i.e. maximum row sum
2590!> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2591!> \return : the norm according to mode
2592!> \author Lianheng Tong
2593! **************************************************************************************************
2594 FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2595 TYPE(cp_fm_type), INTENT(IN) :: matrix
2596 CHARACTER, INTENT(IN) :: mode
2597 REAL(kind=dp) :: res
2598
2599 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_norm'
2600
2601 INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2602 REAL(kind=sp) :: res_sp
2603 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2604 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2605 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2606 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2607#if defined(__parallel)
2608 INTEGER, DIMENSION(9) :: desca
2609#else
2610 INTEGER :: lda
2611#endif
2612
2613 CALL timeset(routinen, handle)
2614
2615 CALL cp_fm_get_info(matrix=matrix, &
2616 nrow_global=nrows, &
2617 ncol_global=ncols, &
2618 nrow_local=nrows_local, &
2619 ncol_local=ncols_local)
2620 aa => matrix%local_data
2621 aa_sp => matrix%local_data_sp
2622
2623#if defined(__parallel)
2624 desca = matrix%matrix_struct%descriptor
2625 SELECT CASE (mode)
2626 CASE ('M', 'm')
2627 lwork = 1
2628 CASE ('1', 'O', 'o')
2629 lwork = ncols_local
2630 CASE ('I', 'i')
2631 lwork = nrows_local
2632 CASE ('F', 'f', 'E', 'e')
2633 lwork = 1
2634 CASE DEFAULT
2635 cpabort("mode input is not valid")
2636 END SELECT
2637 IF (matrix%use_sp) THEN
2638 ALLOCATE (work_sp(lwork))
2639 res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2640 DEALLOCATE (work_sp)
2641 res = real(res_sp, kind=dp)
2642 ELSE
2643 ALLOCATE (work(lwork))
2644 res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2645 DEALLOCATE (work)
2646 END IF
2647#else
2648 SELECT CASE (mode)
2649 CASE ('M', 'm')
2650 lwork = 1
2651 CASE ('1', 'O', 'o')
2652 lwork = 1
2653 CASE ('I', 'i')
2654 lwork = nrows
2655 CASE ('F', 'f', 'E', 'e')
2656 lwork = 1
2657 CASE DEFAULT
2658 cpabort("mode input is not valid")
2659 END SELECT
2660 IF (matrix%use_sp) THEN
2661 ALLOCATE (work_sp(lwork))
2662 lda = SIZE(aa_sp, 1)
2663 res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2664 DEALLOCATE (work_sp)
2665 res = real(res_sp, kind=dp)
2666 ELSE
2667 ALLOCATE (work(lwork))
2668 lda = SIZE(aa, 1)
2669 res = dlange(mode, nrows, ncols, aa, lda, work)
2670 DEALLOCATE (work)
2671 END IF
2672#endif
2673
2674 CALL timestop(handle)
2675
2676 END FUNCTION cp_fm_norm
2677
2678! **************************************************************************************************
2679!> \brief trace of a matrix using pdlatra
2680!> \param matrix : input a square matrix
2681!> \return : the trace
2682!> \author Lianheng Tong
2683! **************************************************************************************************
2684 FUNCTION cp_fm_latra(matrix) RESULT(res)
2685 TYPE(cp_fm_type), INTENT(IN) :: matrix
2686 REAL(kind=dp) :: res
2687
2688 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_latra'
2689
2690 INTEGER :: nrows, ncols, handle
2691 REAL(kind=sp) :: res_sp
2692 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2693 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2694#if defined(__parallel)
2695 INTEGER, DIMENSION(9) :: desca
2696#else
2697 INTEGER :: ii
2698#endif
2699
2700 CALL timeset(routinen, handle)
2701
2702 nrows = matrix%matrix_struct%nrow_global
2703 ncols = matrix%matrix_struct%ncol_global
2704 cpassert(nrows .EQ. ncols)
2705 aa => matrix%local_data
2706 aa_sp => matrix%local_data_sp
2707
2708#if defined(__parallel)
2709 desca = matrix%matrix_struct%descriptor
2710 IF (matrix%use_sp) THEN
2711 res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2712 res = real(res_sp, kind=dp)
2713 ELSE
2714 res = pdlatra(nrows, aa, 1, 1, desca)
2715 END IF
2716#else
2717 IF (matrix%use_sp) THEN
2718 res_sp = 0.0_sp
2719 DO ii = 1, nrows
2720 res_sp = res_sp + aa_sp(ii, ii)
2721 END DO
2722 res = real(res_sp, kind=dp)
2723 ELSE
2724 res = 0.0_dp
2725 DO ii = 1, nrows
2726 res = res + aa(ii, ii)
2727 END DO
2728 END IF
2729#endif
2730
2731 CALL timestop(handle)
2732
2733 END FUNCTION cp_fm_latra
2734
2735! **************************************************************************************************
2736!> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2737!> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2738!> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2739!> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2740!> \param nrow ...
2741!> \param ncol ...
2742!> \param first_row ...
2743!> \param first_col ...
2744!> \author MI
2745! **************************************************************************************************
2746 SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2747
2748 TYPE(cp_fm_type), INTENT(IN) :: matrix
2749 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2750 INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2751
2752 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdgeqpf'
2753
2754 INTEGER :: handle
2755 INTEGER :: info, lwork
2756 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2757 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2758 REAL(kind=dp), DIMENSION(:), POINTER :: work
2759#if defined(__parallel)
2760 INTEGER, DIMENSION(9) :: descc
2761#else
2762 INTEGER :: lda
2763#endif
2764
2765 CALL timeset(routinen, handle)
2766
2767 a => matrix%local_data
2768 lwork = -1
2769 ALLOCATE (work(2*nrow))
2770 ALLOCATE (ipiv(ncol))
2771 info = 0
2772
2773#if defined(__parallel)
2774 descc(:) = matrix%matrix_struct%descriptor(:)
2775 ! Call SCALAPACK routine to get optimal work dimension
2776 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2777 lwork = int(work(1))
2778 DEALLOCATE (work)
2779 ALLOCATE (work(lwork))
2780 tau = 0.0_dp
2781 ipiv = 0
2782
2783 ! Call SCALAPACK routine to get QR decomposition of CTs
2784 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2785#else
2786 cpassert(first_row == 1 .AND. first_col == 1)
2787 lda = SIZE(a, 1)
2788 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2789 lwork = int(work(1))
2790 DEALLOCATE (work)
2791 ALLOCATE (work(lwork))
2792 tau = 0.0_dp
2793 ipiv = 0
2794 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2795#endif
2796 cpassert(info == 0)
2797
2798 DEALLOCATE (work)
2799 DEALLOCATE (ipiv)
2800
2801 CALL timestop(handle)
2802
2803 END SUBROUTINE cp_fm_pdgeqpf
2804
2805! **************************************************************************************************
2806!> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2807!> with orthonormal columns, which is defined as the first N columns of a product of K
2808!> elementary reflectors of order M
2809!> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2810!> as returned from PDGEQRF
2811!> On exit it contains the M-by-N distributed matrix Q
2812!> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2813!> \param nrow ...
2814!> \param first_row ...
2815!> \param first_col ...
2816!> \author MI
2817! **************************************************************************************************
2818 SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2819
2820 TYPE(cp_fm_type), INTENT(IN) :: matrix
2821 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2822 INTEGER, INTENT(IN) :: nrow, first_row, first_col
2823
2824 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdorgqr'
2825
2826 INTEGER :: handle
2827 INTEGER :: info, lwork
2828 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2829 REAL(kind=dp), DIMENSION(:), POINTER :: work
2830#if defined(__parallel)
2831 INTEGER, DIMENSION(9) :: descc
2832#else
2833 INTEGER :: lda
2834#endif
2835
2836 CALL timeset(routinen, handle)
2837
2838 a => matrix%local_data
2839 lwork = -1
2840 ALLOCATE (work(2*nrow))
2841 info = 0
2842
2843#if defined(__parallel)
2844 descc(:) = matrix%matrix_struct%descriptor(:)
2845
2846 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2847 cpassert(info == 0)
2848 lwork = int(work(1))
2849 DEALLOCATE (work)
2850 ALLOCATE (work(lwork))
2851
2852 ! Call SCALAPACK routine to get Q
2853 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2854#else
2855 cpassert(first_row == 1 .AND. first_col == 1)
2856 lda = SIZE(a, 1)
2857 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2858 lwork = int(work(1))
2859 DEALLOCATE (work)
2860 ALLOCATE (work(lwork))
2861 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2862#endif
2863 cpassert(info == 0)
2864
2865 DEALLOCATE (work)
2866 CALL timestop(handle)
2867
2868 END SUBROUTINE cp_fm_pdorgqr
2869
2870! **************************************************************************************************
2871!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2872!> \param cs cosine of the rotation angle
2873!> \param sn sinus of the rotation angle
2874!> \param irow ...
2875!> \param jrow ...
2876!> \author Ole Schuett
2877! **************************************************************************************************
2878 SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2879 TYPE(cp_fm_type), INTENT(IN) :: matrix
2880 INTEGER, INTENT(IN) :: irow, jrow
2881 REAL(dp), INTENT(IN) :: cs, sn
2882
2883 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_rows'
2884 INTEGER :: handle, nrow, ncol
2885
2886#if defined(__parallel)
2887 INTEGER :: info, lwork
2888 INTEGER, DIMENSION(9) :: desc
2889 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2890#endif
2891
2892 CALL timeset(routinen, handle)
2893 CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2894
2895#if defined(__parallel)
2896 lwork = 2*ncol + 1
2897 ALLOCATE (work(lwork))
2898 desc(:) = matrix%matrix_struct%descriptor(:)
2899 CALL pdrot(ncol, &
2900 matrix%local_data(1, 1), irow, 1, desc, ncol, &
2901 matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2902 cs, sn, work, lwork, info)
2903 cpassert(info == 0)
2904 DEALLOCATE (work)
2905#else
2906 CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2907#endif
2908
2909 CALL timestop(handle)
2910 END SUBROUTINE cp_fm_rot_rows
2911
2912! **************************************************************************************************
2913!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2914!> \param cs cosine of the rotation angle
2915!> \param sn sinus of the rotation angle
2916!> \param icol ...
2917!> \param jcol ...
2918!> \author Ole Schuett
2919! **************************************************************************************************
2920 SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2921 TYPE(cp_fm_type), INTENT(IN) :: matrix
2922 INTEGER, INTENT(IN) :: icol, jcol
2923 REAL(dp), INTENT(IN) :: cs, sn
2924
2925 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_cols'
2926 INTEGER :: handle, nrow, ncol
2927
2928#if defined(__parallel)
2929 INTEGER :: info, lwork
2930 INTEGER, DIMENSION(9) :: desc
2931 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2932#endif
2933
2934 CALL timeset(routinen, handle)
2935 CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2936
2937#if defined(__parallel)
2938 lwork = 2*nrow + 1
2939 ALLOCATE (work(lwork))
2940 desc(:) = matrix%matrix_struct%descriptor(:)
2941 CALL pdrot(nrow, &
2942 matrix%local_data(1, 1), 1, icol, desc, 1, &
2943 matrix%local_data(1, 1), 1, jcol, desc, 1, &
2944 cs, sn, work, lwork, info)
2945 cpassert(info == 0)
2946 DEALLOCATE (work)
2947#else
2948 CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
2949#endif
2950
2951 CALL timestop(handle)
2952 END SUBROUTINE cp_fm_rot_cols
2953
2954! **************************************************************************************************
2955!> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
2956!> \param matrix_a ...
2957!> \param B ...
2958!> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
2959!> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
2960!> \param start_row starting index of rows, optional, defaults to 1
2961!> \param start_col starting index of columns, optional, defaults to 1
2962!> \param do_norm ...
2963!> \param do_print ...
2964! **************************************************************************************************
2965 SUBROUTINE cp_fm_gram_schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
2966 do_norm, do_print)
2967
2968 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2969 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: b
2970 INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
2971 LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
2972
2973 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_Gram_Schmidt_orthonorm'
2974
2975 INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
2976 j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
2977 start_col_local, start_row_global, start_row_local, this_col, unit_nr
2978 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2979 LOGICAL :: my_do_norm, my_do_print
2980 REAL(kind=dp) :: norm
2981 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2982
2983 CALL timeset(routinen, handle)
2984
2985 my_do_norm = .true.
2986 IF (PRESENT(do_norm)) my_do_norm = do_norm
2987
2988 my_do_print = .false.
2989 IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
2990
2991 unit_nr = -1
2992 IF (my_do_print) THEN
2994 IF (unit_nr < 1) my_do_print = .false.
2995 END IF
2996
2997 IF (SIZE(b) /= 0) THEN
2998 IF (PRESENT(nrows)) THEN
2999 nrow_global = nrows
3000 ELSE
3001 nrow_global = SIZE(b, 1)
3002 END IF
3003
3004 IF (PRESENT(ncols)) THEN
3005 ncol_global = ncols
3006 ELSE
3007 ncol_global = SIZE(b, 2)
3008 END IF
3009
3010 IF (PRESENT(start_row)) THEN
3011 start_row_global = start_row
3012 ELSE
3013 start_row_global = 1
3014 END IF
3015
3016 IF (PRESENT(start_col)) THEN
3017 start_col_global = start_col
3018 ELSE
3019 start_col_global = 1
3020 END IF
3021
3022 end_row_global = start_row_global + nrow_global - 1
3023 end_col_global = start_col_global + ncol_global - 1
3024
3025 CALL cp_fm_get_info(matrix=matrix_a, &
3026 nrow_global=nrow_global, ncol_global=ncol_global, &
3027 nrow_local=nrow_local, ncol_local=ncol_local, &
3028 row_indices=row_indices, col_indices=col_indices)
3029 IF (end_row_global > nrow_global) THEN
3030 end_row_global = nrow_global
3031 END IF
3032 IF (end_col_global > ncol_global) THEN
3033 end_col_global = ncol_global
3034 END IF
3035
3036 ! find out row/column indices of locally stored matrix elements that
3037 ! needs to be copied.
3038 ! Arrays row_indices and col_indices are assumed to be sorted in
3039 ! ascending order
3040 DO start_row_local = 1, nrow_local
3041 IF (row_indices(start_row_local) >= start_row_global) EXIT
3042 END DO
3043
3044 DO end_row_local = start_row_local, nrow_local
3045 IF (row_indices(end_row_local) > end_row_global) EXIT
3046 END DO
3047 end_row_local = end_row_local - 1
3048
3049 DO start_col_local = 1, ncol_local
3050 IF (col_indices(start_col_local) >= start_col_global) EXIT
3051 END DO
3052
3053 DO end_col_local = start_col_local, ncol_local
3054 IF (col_indices(end_col_local) > end_col_global) EXIT
3055 END DO
3056 end_col_local = end_col_local - 1
3057
3058 a => matrix_a%local_data
3059
3060 this_col = col_indices(start_col_local) - start_col_global + 1
3061
3062 b(:, this_col) = a(:, start_col_local)
3063
3064 IF (my_do_norm) THEN
3065 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3066 b(:, this_col) = b(:, this_col)/norm
3067 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3068 END IF
3069
3070 DO i = start_col_local + 1, end_col_local
3071 this_col = col_indices(i) - start_col_global + 1
3072 b(:, this_col) = a(:, i)
3073 DO j = start_col_local, i - 1
3074 j_col = col_indices(j) - start_col_global + 1
3075 b(:, this_col) = b(:, this_col) - &
3076 accurate_dot_product(b(:, j_col), b(:, this_col))* &
3077 b(:, j_col)/accurate_dot_product(b(:, j_col), b(:, j_col))
3078 END DO
3079
3080 IF (my_do_norm) THEN
3081 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3082 b(:, this_col) = b(:, this_col)/norm
3083 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3084 END IF
3085
3086 END DO
3087 CALL matrix_a%matrix_struct%para_env%sum(b)
3088 END IF
3089
3090 CALL timestop(handle)
3091
3092 END SUBROUTINE cp_fm_gram_schmidt_orthonorm
3093
3094! **************************************************************************************************
3095!> \brief Cholesky decomposition
3096!> \param fm_matrix ...
3097!> \param n ...
3098!> \param uplo triangular format; defaults to 'U'
3099! **************************************************************************************************
3100 SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
3101 TYPE(cp_fm_type) :: fm_matrix
3102 INTEGER, INTENT(IN) :: n
3103 CHARACTER, INTENT(IN), OPTIONAL :: uplo
3104
3105 CHARACTER :: myuplo
3106 INTEGER :: info
3107 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3108 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3109#if defined(__parallel)
3110 INTEGER, DIMENSION(9) :: desca
3111#endif
3112
3113 myuplo = 'U'
3114 IF (PRESENT(uplo)) myuplo = uplo
3115
3116 a => fm_matrix%local_data
3117 a_sp => fm_matrix%local_data_sp
3118#if defined(__parallel)
3119 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3120 IF (fm_matrix%use_sp) THEN
3121 CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
3122 ELSE
3123 CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
3124 END IF
3125#else
3126 IF (fm_matrix%use_sp) THEN
3127 CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
3128 ELSE
3129 CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
3130 END IF
3131#endif
3132 IF (info /= 0) &
3133 cpabort("Cholesky decomposition failed. Matrix ill-conditioned?")
3134
3135 END SUBROUTINE cp_fm_potrf
3136
3137! **************************************************************************************************
3138!> \brief Invert trianguar matrix
3139!> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
3140!> \param n size of the matrix to invert
3141!> \param uplo triangular format; defaults to 'U'
3142! **************************************************************************************************
3143 SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
3144 TYPE(cp_fm_type) :: fm_matrix
3145 INTEGER, INTENT(IN) :: n
3146 CHARACTER, INTENT(IN), OPTIONAL :: uplo
3147
3148 CHARACTER :: myuplo
3149 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3150 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3151 INTEGER :: info
3152#if defined(__parallel)
3153 INTEGER, DIMENSION(9) :: desca
3154#endif
3155
3156 myuplo = 'U'
3157 IF (PRESENT(uplo)) myuplo = uplo
3158
3159 a => fm_matrix%local_data
3160 a_sp => fm_matrix%local_data_sp
3161#if defined(__parallel)
3162 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3163 IF (fm_matrix%use_sp) THEN
3164 CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
3165 ELSE
3166 CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
3167 END IF
3168#else
3169 IF (fm_matrix%use_sp) THEN
3170 CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
3171 ELSE
3172 CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
3173 END IF
3174#endif
3175 cpassert(info == 0)
3176 END SUBROUTINE cp_fm_potri
3177
3178! **************************************************************************************************
3179!> \brief ...
3180!> \param fm_matrix ...
3181!> \param neig ...
3182!> \param fm_matrixb ...
3183!> \param fm_matrixout ...
3184!> \param op ...
3185!> \param pos ...
3186!> \param transa ...
3187! **************************************************************************************************
3188 SUBROUTINE cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
3189 TYPE(cp_fm_type) :: fm_matrix
3190 TYPE(cp_fm_type) :: fm_matrixb
3191 TYPE(cp_fm_type) :: fm_matrixout
3192 INTEGER, INTENT(IN) :: neig
3193 CHARACTER(LEN=*), INTENT(IN) :: op
3194 CHARACTER(LEN=*), INTENT(IN) :: pos
3195 CHARACTER(LEN=*), INTENT(IN) :: transa
3196
3197 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, outm
3198 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, outm_sp
3199 INTEGER :: n, itype
3200 REAL(kind=dp) :: alpha
3201#if defined(__parallel)
3202 INTEGER :: i
3203 INTEGER, DIMENSION(9) :: desca, descb, descout
3204#endif
3205
3206 ! notice b is the cholesky guy
3207 a => fm_matrix%local_data
3208 b => fm_matrixb%local_data
3209 outm => fm_matrixout%local_data
3210 a_sp => fm_matrix%local_data_sp
3211 b_sp => fm_matrixb%local_data_sp
3212 outm_sp => fm_matrixout%local_data_sp
3213
3214 n = fm_matrix%matrix_struct%nrow_global
3215 itype = 1
3216
3217#if defined(__parallel)
3218 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3219 descb(:) = fm_matrixb%matrix_struct%descriptor(:)
3220 descout(:) = fm_matrixout%matrix_struct%descriptor(:)
3221 alpha = 1.0_dp
3222 DO i = 1, neig
3223 IF (fm_matrix%use_sp) THEN
3224 CALL pscopy(n, a_sp(1, 1), 1, i, desca, 1, outm_sp(1, 1), 1, i, descout, 1)
3225 ELSE
3226 CALL pdcopy(n, a(1, 1), 1, i, desca, 1, outm(1, 1), 1, i, descout, 1)
3227 END IF
3228 END DO
3229 IF (op .EQ. "SOLVE") THEN
3230 IF (fm_matrix%use_sp) THEN
3231 CALL pstrsm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), 1, 1, descb, &
3232 outm_sp(1, 1), 1, 1, descout)
3233 ELSE
3234 CALL pdtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
3235 END IF
3236 ELSE
3237 IF (fm_matrix%use_sp) THEN
3238 CALL pstrmm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), 1, 1, descb, &
3239 outm_sp(1, 1), 1, 1, descout)
3240 ELSE
3241 CALL pdtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
3242 END IF
3243 END IF
3244#else
3245 alpha = 1.0_dp
3246 IF (fm_matrix%use_sp) THEN
3247 CALL scopy(neig*n, a_sp(1, 1), 1, outm_sp(1, 1), 1)
3248 ELSE
3249 CALL dcopy(neig*n, a(1, 1), 1, outm(1, 1), 1)
3250 END IF
3251 IF (op .EQ. "SOLVE") THEN
3252 IF (fm_matrix%use_sp) THEN
3253 CALL strsm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), SIZE(b_sp, 1), outm_sp(1, 1), n)
3254 ELSE
3255 CALL dtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), SIZE(b, 1), outm(1, 1), n)
3256 END IF
3257 ELSE
3258 IF (fm_matrix%use_sp) THEN
3259 CALL strmm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), n, outm_sp(1, 1), n)
3260 ELSE
3261 CALL dtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), n, outm(1, 1), n)
3262 END IF
3263 END IF
3264#endif
3265
3266 END SUBROUTINE cp_fm_cholesky_restore
3267
3268! **************************************************************************************************
3269!> \brief Calculates
3270!> yv = alpha*amat*xv + beta*yv
3271!> where amat: fm matrix
3272!> xv : vector replicated
3273!> yv : vector replicated
3274!> Defaults: alpha = 1, beta = 0
3275! **************************************************************************************************
3276 SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
3277 TYPE(cp_fm_type), INTENT(IN) :: amat
3278 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: xv
3279 REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: yv
3280 REAL(kind=dp), OPTIONAL, INTENT(IN) :: alpha, beta
3281
3282 INTEGER :: na, nc, nx, ny
3283 REAL(kind=dp) :: aval, bval
3284#if defined(__parallel)
3285 INTEGER :: nrl, ncl, ic, ir
3286 INTEGER, DIMENSION(:), POINTER :: rind, cind
3287 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: xvl, yvl, yvm
3288#endif
3289
3290 IF (amat%use_sp) THEN
3291 cpabort("cp_fm_matvec: SP option not available")
3292 END IF
3293 aval = 1.0_dp
3294 IF (PRESENT(alpha)) aval = alpha
3295 bval = 0.0_dp
3296 IF (PRESENT(beta)) bval = beta
3297
3298 CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
3299 nx = SIZE(xv)
3300 ny = SIZE(yv)
3301 IF ((nx /= ny) .OR. (nc /= nx)) THEN
3302 cpabort("cp_fm_matvec: incompatible dimensions")
3303 END IF
3304#if defined(__parallel)
3305 CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
3306 row_indices=rind, col_indices=cind)
3307 ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
3308 DO ic = 1, ncl
3309 xvl(ic) = xv(cind(ic))
3310 END DO
3311 yvl(1:nrl) = matmul(amat%local_data, xvl(1:ncl))
3312 yvm = 0.0_dp
3313 DO ir = 1, nrl
3314 yvm(rind(ir)) = yvl(ir)
3315 END DO
3316 CALL amat%matrix_struct%para_env%sum(yvm)
3317 IF (bval == 0.0_dp) THEN
3318 yv = aval*yvm
3319 ELSE
3320 yv = bval*yv + aval*yvm
3321 END IF
3322#else
3323 IF (bval == 0.0_dp) THEN
3324 yv = aval*matmul(amat%local_data, xv)
3325 ELSE
3326 yv = bval*yv + aval*matmul(amat%local_data, xv)
3327 END IF
3328#endif
3329
3330 END SUBROUTINE cp_fm_matvec
3331
3332END MODULE cp_fm_basic_linalg
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
methods related to the blacs parallel environment
basic linear algebra operations for full matrices
subroutine, public cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_fm_row_scale(matrixa, scaling)
scales row i of matrix a with scaling(i)
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_fm_solve(matrix_a, general_a)
computes the the solution to A*b=A_general using lu decomposition
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_det(matrix_a, det_a)
Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix.
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
performs a QR factorization of the input rectangular matrix A or of a submatrix of A the computed tri...
subroutine, public cp_fm_gram_schmidt_orthonorm(matrix_a, b, nrows, ncols, start_row, start_col, do_norm, do_print)
Orthonormalizes selected rows and columns of a full matrix, matrix_a.
subroutine, public cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
performs a rank-k update of a symmetric matrix_c matrix_c = beta * matrix_c + alpha * matrix_a * tran...
subroutine, public cp_fm_potrf(fm_matrix, n, uplo)
Cholesky decomposition.
subroutine, public cp_fm_potri(fm_matrix, n, uplo)
Invert trianguar matrix.
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
computes the schur product of two matrices c_ij = a_ij * b_ij
real(kind=dp) function, public cp_fm_norm(matrix, mode)
norm of matrix using (p)dlange
subroutine, public cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
...
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
subroutine, public cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix.
subroutine, public cp_complex_fm_gemm(transa, transb, m, n, k, alpha, a_re, a_im, b_re, b_im, beta, c_re, c_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
Convenience function. Computes the matrix multiplications needed for the multiplication of complex ma...
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_invert(matrix_a, uplo_tr)
inverts a triangular matrix
subroutine, public cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b computes matrix_c = beta * matrix_c...
subroutine, public cp_fm_matvec(amat, xv, yv, alpha, beta)
Calculates yv = alpha*amat*xv + beta*yv where amat: fm matrix xv : vector replicated yv : vector repl...
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
represent the structure of a full matrix
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:440
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
subroutine, public get_pseudo_inverse_svd(a, a_pinverse, rskip, determinant, sval)
returns the pseudoinverse of a real, square matrix using singular value decomposition
Definition mathlib.F:938
subroutine, public diag(n, a, d, v)
Diagonalize matrix a. The eigenvalues are returned in vector d and the eigenvectors are returned in m...
Definition mathlib.F:1496
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
just to build arrays of pointers to matrices
represent a full matrix