(git:b17b328)
Loading...
Searching...
No Matches
cp_fm_basic_linalg.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Basic linear algebra operations for full matrices.
10!> \par History
11!> 08.2002 split out of qs_blacs [fawzi]
12!> \author Fawzi Mohamed
13! **************************************************************************************************
17 USE cp_fm_types, ONLY: &
25 USE kinds, ONLY: dp, &
26 int_8, &
27 sp
28 USE machine, ONLY: m_memory
32#include "../base/base_uses.f90"
33
34 IMPLICIT NONE
35 PRIVATE
36
37 LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .true.
38 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
39
40 PUBLIC :: cp_fm_scale, & ! scale a matrix
41 cp_fm_scale_and_add, & ! scale and add two matrices
42 cp_fm_geadd, & ! general addition
43 cp_fm_column_scale, & ! scale columns of a matrix
44 cp_fm_row_scale, & ! scale rows of a matrix
45 cp_fm_trace, & ! trace of the transpose(A)*B
46 cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
47 cp_fm_norm, & ! different norms of A
48 cp_fm_schur_product, & ! schur product
49 cp_fm_transpose, & ! transpose a matrix
50 cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
51 cp_fm_syrk, & ! rank k update
52 cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
53 cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
54 cp_fm_gemm, & ! multiply two matrices
55 cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
56 cp_fm_invert, & ! computes the inverse and determinant
57 cp_fm_frobenius_norm, & ! frobenius norm
58 cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
59 cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
60 cp_fm_solve, & ! solves the equation A*B=C A and C are input
61 cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
62 cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
63 cp_fm_potrf, & ! Cholesky decomposition
64 cp_fm_potri, & ! Invert triangular matrix
65 cp_fm_rot_rows, & ! rotates two rows
66 cp_fm_rot_cols, & ! rotates two columns
67 cp_fm_gram_schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
68 cp_fm_det, & ! determinant of a real matrix with correct sign
69 cp_fm_matvec ! matrix-vector multiplication (vector replicated)
70
71 REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
72 REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
73
74 INTERFACE cp_fm_trace
75 MODULE PROCEDURE cp_fm_trace_a0b0t0
76 MODULE PROCEDURE cp_fm_trace_a1b0t1_a
77 MODULE PROCEDURE cp_fm_trace_a1b0t1_p
78 MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
79 MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
80 MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
81 MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
82 END INTERFACE cp_fm_trace
83
85 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
86 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
87 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
88 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
89 END INTERFACE cp_fm_contracted_trace
90CONTAINS
91
92! **************************************************************************************************
93!> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
94!> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
95! **************************************************************************************************
96 SUBROUTINE cp_fm_det(matrix_a, det_a)
97
98 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
99 REAL(kind=dp), INTENT(OUT) :: det_a
100 REAL(kind=dp) :: determinant
101 TYPE(cp_fm_type) :: matrix_lu
102 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
103 INTEGER :: n, i, info, p
104 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
105 REAL(kind=dp), DIMENSION(:), POINTER :: diag
106
107#if defined(__parallel)
108 INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
109 INTEGER, DIMENSION(9) :: desca
110#endif
111
112 CALL cp_fm_create(matrix=matrix_lu, &
113 matrix_struct=matrix_a%matrix_struct, &
114 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
115 CALL cp_fm_to_fm(matrix_a, matrix_lu)
116
117 a => matrix_lu%local_data
118 n = matrix_lu%matrix_struct%nrow_global
119 ALLOCATE (ipivot(n))
120 ipivot(:) = 0
121 p = 0
122 ALLOCATE (diag(n))
123 diag(:) = 0.0_dp
124#if defined(__parallel)
125 ! Use LU decomposition
126 desca(:) = matrix_lu%matrix_struct%descriptor(:)
127 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
128 CALL cp_fm_get_diag(matrix_lu, diag)
129 determinant = product(diag)
130 myprow = matrix_lu%matrix_struct%context%mepos(1)
131 nprow = matrix_lu%matrix_struct%context%num_pe(1)
132 npcol = matrix_lu%matrix_struct%context%num_pe(2)
133 nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
134 nrow_block = matrix_lu%matrix_struct%nrow_block
135 DO irow_local = 1, nrow_local
136 i = matrix_lu%matrix_struct%row_indices(irow_local)
137 IF (ipivot(irow_local) /= i) p = p + 1
138 END DO
139 CALL matrix_lu%matrix_struct%para_env%sum(p)
140 ! very important fix
141 p = p/npcol
142#else
143 CALL dgetrf(n, n, a, n, ipivot, info)
144 CALL cp_fm_get_diag(matrix_lu, diag)
145 determinant = product(diag)
146 DO i = 1, n
147 IF (ipivot(i) /= i) p = p + 1
148 END DO
149#endif
150 DEALLOCATE (ipivot)
151 DEALLOCATE (diag)
152 CALL cp_fm_release(matrix_lu)
153 det_a = determinant*(-2*mod(p, 2) + 1.0_dp)
154 END SUBROUTINE cp_fm_det
155
156! **************************************************************************************************
157!> \brief calc A <- alpha*A + beta*B
158!> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
159!> scale A)
160!> \param alpha ...
161!> \param matrix_a ...
162!> \param beta ...
163!> \param matrix_b ...
164! **************************************************************************************************
165 SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
166
167 REAL(kind=dp), INTENT(IN) :: alpha
168 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
169 REAL(kind=dp), INTENT(IN), OPTIONAL :: beta
170 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
171
172 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale_and_add'
173
174 INTEGER :: handle, size_a, size_b
175 REAL(kind=dp) :: my_beta
176 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
177 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
178
179 CALL timeset(routinen, handle)
180
181 my_beta = 0.0_dp
182 IF (PRESENT(matrix_b)) my_beta = 1.0_dp
183 IF (PRESENT(beta)) my_beta = beta
184 NULLIFY (a, b)
185
186 IF (PRESENT(beta)) THEN
187 cpassert(PRESENT(matrix_b))
188 IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
189 cpwarn("Bad use of routine. Call cp_fm_scale instead")
190 CALL cp_fm_scale(alpha + beta, matrix_a)
191 CALL timestop(handle)
192 RETURN
193 END IF
194 END IF
195
196 a => matrix_a%local_data
197 a_sp => matrix_a%local_data_sp
198
199 IF (matrix_a%use_sp) THEN
200 size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
201 ELSE
202 size_a = SIZE(a, 1)*SIZE(a, 2)
203 END IF
204
205 IF (alpha .NE. 1.0_dp) THEN
206 IF (matrix_a%use_sp) THEN
207 CALL sscal(size_a, real(alpha, sp), a_sp, 1)
208 ELSE
209 CALL dscal(size_a, alpha, a, 1)
210 END IF
211 END IF
212 IF (my_beta .NE. 0.0_dp) THEN
213 IF (matrix_a%matrix_struct%context .NE. matrix_b%matrix_struct%context) &
214 cpabort("Matrices must be in the same blacs context")
215
216 IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
217 matrix_b%matrix_struct)) THEN
218
219 b => matrix_b%local_data
220 b_sp => matrix_b%local_data_sp
221 IF (matrix_b%use_sp) THEN
222 size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
223 ELSE
224 size_b = SIZE(b, 1)*SIZE(b, 2)
225 END IF
226 IF (size_a .NE. size_b) &
227 cpabort("Matrices must have same local sizes")
228
229 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
230 CALL saxpy(size_a, real(my_beta, sp), b_sp, 1, a_sp, 1)
231 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
232 CALL saxpy(size_a, real(my_beta, sp), real(b, sp), 1, a_sp, 1)
233 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
234 CALL daxpy(size_a, my_beta, real(b_sp, dp), 1, a, 1)
235 ELSE
236 CALL daxpy(size_a, my_beta, b, 1, a, 1)
237 END IF
238
239 ELSE
240#ifdef __parallel
241 cpabort("to do (pdscal,pdcopy,pdaxpy)")
242#else
243 cpabort("")
244#endif
245 END IF
246
247 END IF
248
249 CALL timestop(handle)
250
251 END SUBROUTINE cp_fm_scale_and_add
252
253! **************************************************************************************************
254!> \brief interface to BLACS geadd:
255!> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
256!> where opt(matrix_a) can be either:
257!> 'N': matrix_a
258!> 'T': matrix_a^T
259!> 'C': matrix_a^H (Hermitian conjugate)
260!> note that this is a level three routine, use cp_fm_scale_and_add if that
261!> is sufficient for your needs
262!> \param alpha : complex scalar
263!> \param trans : 'N' normal, 'T' transposed
264!> \param matrix_a : input matrix_a
265!> \param beta : complex scalar
266!> \param matrix_b : input matrix_b, upon out put the updated matrix_b
267!> \author Lianheng Tong
268! **************************************************************************************************
269 SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
270 REAL(kind=dp), INTENT(IN) :: alpha, beta
271 CHARACTER, INTENT(IN) :: trans
272 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
273
274 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geadd'
275
276 INTEGER :: nrow_global, ncol_global, handle
277 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
278#if defined(__parallel)
279 INTEGER, DIMENSION(9) :: desca, descb
280#elif !defined(__MKL)
281 INTEGER :: ii, jj
282#endif
283
284 CALL timeset(routinen, handle)
285
286 nrow_global = matrix_a%matrix_struct%nrow_global
287 ncol_global = matrix_a%matrix_struct%ncol_global
288 cpassert(nrow_global .EQ. matrix_b%matrix_struct%nrow_global)
289 cpassert(ncol_global .EQ. matrix_b%matrix_struct%ncol_global)
290
291 aa => matrix_a%local_data
292 bb => matrix_b%local_data
293
294#if defined(__parallel)
295 desca = matrix_a%matrix_struct%descriptor
296 descb = matrix_b%matrix_struct%descriptor
297 CALL pdgeadd(trans, &
298 nrow_global, &
299 ncol_global, &
300 alpha, &
301 aa, &
302 1, 1, &
303 desca, &
304 beta, &
305 bb, &
306 1, 1, &
307 descb)
308#elif defined(__MKL)
309 CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
310 alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
311#else
312 ! dgeadd is not a standard BLAS function, although it is implemented
313 ! in some libraries like OpenBLAS, so not going to use it here
314 SELECT CASE (trans)
315 CASE ('T')
316 DO jj = 1, ncol_global
317 DO ii = 1, nrow_global
318 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
319 END DO
320 END DO
321 CASE DEFAULT
322 DO jj = 1, ncol_global
323 DO ii = 1, nrow_global
324 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
325 END DO
326 END DO
327 END SELECT
328#endif
329
330 CALL timestop(handle)
331
332 END SUBROUTINE cp_fm_geadd
333
334! **************************************************************************************************
335!> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
336!> IMPORTANT : the sign of the determinant is not defined correctly yet ....
337!> \param matrix_a ...
338!> \param almost_determinant ...
339!> \param correct_sign ...
340!> \par History
341!> added correct_sign 02.07 (fschiff)
342!> \author Joost VandeVondele
343!> \note
344!> - matrix_a is overwritten
345!> - the sign of the determinant might be wrong
346!> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
347!> - one should be able to find out if ipivot is an even or an odd permutation...
348!> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
349!> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
350! **************************************************************************************************
351 SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
352 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
353 REAL(kind=dp), INTENT(OUT) :: almost_determinant
354 LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
355
356 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_decompose'
357
358 INTEGER :: handle, i, info, n
359 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
360 REAL(kind=dp) :: determinant
361 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
362#if defined(__parallel)
363 INTEGER, DIMENSION(9) :: desca
364 REAL(kind=dp), DIMENSION(:), POINTER :: diag
365#else
366 INTEGER :: lda
367#endif
368
369 CALL timeset(routinen, handle)
370
371 a => matrix_a%local_data
372 n = matrix_a%matrix_struct%nrow_global
373 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
374
375#if defined(__parallel)
376 mark_used(correct_sign)
377 desca(:) = matrix_a%matrix_struct%descriptor(:)
378 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
379
380 ALLOCATE (diag(n))
381 diag(:) = 0.0_dp
382 CALL cp_fm_get_diag(matrix_a, diag)
383 determinant = 1.0_dp
384 DO i = 1, n
385 determinant = determinant*diag(i)
386 END DO
387 DEALLOCATE (diag)
388#else
389 lda = SIZE(a, 1)
390 CALL dgetrf(n, n, a, lda, ipivot, info)
391 determinant = 1.0_dp
392 IF (correct_sign) THEN
393 DO i = 1, n
394 IF (ipivot(i) .NE. i) THEN
395 determinant = -determinant*a(i, i)
396 ELSE
397 determinant = determinant*a(i, i)
398 END IF
399 END DO
400 ELSE
401 DO i = 1, n
402 determinant = determinant*a(i, i)
403 END DO
404 END IF
405#endif
406 ! info is allowed to be zero
407 ! this does just signal a zero diagonal element
408 DEALLOCATE (ipivot)
409 almost_determinant = determinant ! notice that the sign is random
410 CALL timestop(handle)
411 END SUBROUTINE cp_fm_lu_decompose
412
413! **************************************************************************************************
414!> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
415!> \param transa : 'N' -> normal 'T' -> transpose
416!> alpha,beta :: can be 0.0_dp and 1.0_dp
417!> \param transb ...
418!> \param m ...
419!> \param n ...
420!> \param k ...
421!> \param alpha ...
422!> \param matrix_a : m x k matrix ( ! for transa = 'N')
423!> \param matrix_b : k x n matrix ( ! for transb = 'N')
424!> \param beta ...
425!> \param matrix_c : m x n matrix
426!> \param a_first_col ...
427!> \param a_first_row ...
428!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
429!> \param b_first_row ...
430!> \param c_first_col ...
431!> \param c_first_row ...
432!> \author Matthias Krack
433!> \note
434!> matrix_c should have no overlap with matrix_a, matrix_b
435! **************************************************************************************************
436 SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
437 matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
438 c_first_col, c_first_row)
439
440 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
441 INTEGER, INTENT(IN) :: m, n, k
442 REAL(kind=dp), INTENT(IN) :: alpha
443 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
444 REAL(kind=dp), INTENT(IN) :: beta
445 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
446 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
447 b_first_col, b_first_row, &
448 c_first_col, c_first_row
449
450 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_gemm'
451
452 INTEGER :: handle, i_a, i_b, i_c, j_a, &
453 j_b, j_c
454 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
455 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
456#if defined(__parallel)
457 INTEGER, DIMENSION(9) :: desca, descb, descc
458#else
459 INTEGER :: lda, ldb, ldc
460#endif
461
462 CALL timeset(routinen, handle)
463
464 !sample peak memory
465 CALL m_memory()
466
467 a => matrix_a%local_data
468 b => matrix_b%local_data
469 c => matrix_c%local_data
470
471 a_sp => matrix_a%local_data_sp
472 b_sp => matrix_b%local_data_sp
473 c_sp => matrix_c%local_data_sp
474
475 i_a = 1
476 IF (PRESENT(a_first_row)) i_a = a_first_row
477
478 j_a = 1
479 IF (PRESENT(a_first_col)) j_a = a_first_col
480
481 i_b = 1
482 IF (PRESENT(b_first_row)) i_b = b_first_row
483
484 j_b = 1
485 IF (PRESENT(b_first_col)) j_b = b_first_col
486
487 i_c = 1
488 IF (PRESENT(c_first_row)) i_c = c_first_row
489
490 j_c = 1
491 IF (PRESENT(c_first_col)) j_c = c_first_col
492
493#if defined(__parallel)
494
495 desca(:) = matrix_a%matrix_struct%descriptor(:)
496 descb(:) = matrix_b%matrix_struct%descriptor(:)
497 descc(:) = matrix_c%matrix_struct%descriptor(:)
498
499 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
500
501 CALL psgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
502 descb, real(beta, sp), c_sp(1, 1), i_c, j_c, descc)
503
504 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
505
506 CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
507 descb, beta, c, i_c, j_c, descc)
508
509 ELSE
510 cpabort("Mixed precision gemm NYI")
511 END IF
512#else
513
514 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
515
516 lda = SIZE(a_sp, 1)
517 ldb = SIZE(b_sp, 1)
518 ldc = SIZE(c_sp, 1)
519
520 CALL sgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
521 REAL(beta, sp), c_sp(i_c, j_c), ldc)
522
523 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
524
525 lda = SIZE(a, 1)
526 ldb = SIZE(b, 1)
527 ldc = SIZE(c, 1)
528
529 CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
530
531 ELSE
532 cpabort("Mixed precision gemm NYI")
533 END IF
534
535#endif
536 CALL timestop(handle)
537
538 END SUBROUTINE cp_fm_gemm
539
540! **************************************************************************************************
541!> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
542!> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
543!> where matrix_a is symmetric
544!> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
545!> alpha,beta :: can be 0.0_dp and 1.0_dp
546!> \param uplo triangular format
547!> \param m ...
548!> \param n ...
549!> \param alpha ...
550!> \param matrix_a : m x m matrix
551!> \param matrix_b : m x n matrix
552!> \param beta ...
553!> \param matrix_c : m x n matrix
554!> \author Matthias Krack
555!> \note
556!> matrix_c should have no overlap with matrix_a, matrix_b
557!> all matrices in QS are triangular according to uplo
558!> matrix_a is always an m x m matrix
559!> typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
560! **************************************************************************************************
561 SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
562
563 CHARACTER(LEN=1), INTENT(IN) :: side, uplo
564 INTEGER, INTENT(IN) :: m, n
565 REAL(kind=dp), INTENT(IN) :: alpha
566 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
567 REAL(kind=dp), INTENT(IN) :: beta
568 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
569
570 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_symm'
571
572 INTEGER :: handle
573 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
574#if defined(__parallel)
575 INTEGER, DIMENSION(9) :: desca, descb, descc
576#else
577 INTEGER :: lda, ldb, ldc
578#endif
579
580 CALL timeset(routinen, handle)
581
582 a => matrix_a%local_data
583 b => matrix_b%local_data
584 c => matrix_c%local_data
585
586#if defined(__parallel)
587
588 desca(:) = matrix_a%matrix_struct%descriptor(:)
589 descb(:) = matrix_b%matrix_struct%descriptor(:)
590 descc(:) = matrix_c%matrix_struct%descriptor(:)
591
592 CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
593
594#else
595
596 lda = matrix_a%matrix_struct%local_leading_dimension
597 ldb = matrix_b%matrix_struct%local_leading_dimension
598 ldc = matrix_c%matrix_struct%local_leading_dimension
599
600 CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
601
602#endif
603 CALL timestop(handle)
604
605 END SUBROUTINE cp_fm_symm
606
607! **************************************************************************************************
608!> \brief computes the Frobenius norm of matrix_a
609!> \brief computes the Frobenius norm of matrix_a
610!> \param matrix_a : m x n matrix
611!> \return ...
612!> \author VW
613! **************************************************************************************************
614 FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
615 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
616 REAL(kind=dp) :: norm
617
618 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_frobenius_norm'
619
620 INTEGER :: handle, size_a
621 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
622 REAL(kind=dp), EXTERNAL :: ddot
623#if defined(__parallel)
624 TYPE(mp_comm_type) :: group
625#endif
626
627 CALL timeset(routinen, handle)
628
629 norm = 0.0_dp
630 a => matrix_a%local_data
631 size_a = SIZE(a, 1)*SIZE(a, 2)
632 norm = ddot(size_a, a(1, 1), 1, a(1, 1), 1)
633#if defined(__parallel)
634 group = matrix_a%matrix_struct%para_env
635 CALL group%sum(norm)
636#endif
637 norm = sqrt(norm)
638
639 CALL timestop(handle)
640
641 END FUNCTION cp_fm_frobenius_norm
642
643! **************************************************************************************************
644!> \brief performs a rank-k update of a symmetric matrix_c
645!> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
646!> \param uplo : 'U' ('L')
647!> \param trans : 'N' ('T')
648!> \param k : number of cols to use in matrix_a
649!> ia,ja :: 1,1 (could be used for selecting subblock of a)
650!> \param alpha ...
651!> \param matrix_a ...
652!> \param ia ...
653!> \param ja ...
654!> \param beta ...
655!> \param matrix_c ...
656!> \author Matthias Krack
657! **************************************************************************************************
658 SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
659 CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
660 INTEGER, INTENT(IN) :: k
661 REAL(kind=dp), INTENT(IN) :: alpha
662 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
663 INTEGER, INTENT(IN) :: ia, ja
664 REAL(kind=dp), INTENT(IN) :: beta
665 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
666
667 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_syrk'
668
669 INTEGER :: handle, n
670 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
671#if defined(__parallel)
672 INTEGER, DIMENSION(9) :: desca, descc
673#else
674 INTEGER :: lda, ldc
675#endif
676
677 CALL timeset(routinen, handle)
678
679 n = matrix_c%matrix_struct%nrow_global
680
681 a => matrix_a%local_data
682 c => matrix_c%local_data
683
684#if defined(__parallel)
685
686 desca(:) = matrix_a%matrix_struct%descriptor(:)
687 descc(:) = matrix_c%matrix_struct%descriptor(:)
688
689 CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
690
691#else
692
693 lda = SIZE(a, 1)
694 ldc = SIZE(c, 1)
695
696 CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
697
698#endif
699 CALL timestop(handle)
700
701 END SUBROUTINE cp_fm_syrk
702
703! **************************************************************************************************
704!> \brief computes the schur product of two matrices
705!> c_ij = a_ij * b_ij
706!> \param matrix_a ...
707!> \param matrix_b ...
708!> \param matrix_c ...
709!> \author Joost VandeVondele
710! **************************************************************************************************
711 SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
712
713 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
714
715 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_schur_product'
716
717 INTEGER :: handle, icol_local, irow_local, mypcol, &
718 myprow, ncol_local, &
719 nrow_local
720 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
721 TYPE(cp_blacs_env_type), POINTER :: context
722
723 CALL timeset(routinen, handle)
724
725 context => matrix_a%matrix_struct%context
726 myprow = context%mepos(1)
727 mypcol = context%mepos(2)
728
729 a => matrix_a%local_data
730 b => matrix_b%local_data
731 c => matrix_c%local_data
732
733 nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
734 ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
735
736 DO icol_local = 1, ncol_local
737 DO irow_local = 1, nrow_local
738 c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
739 END DO
740 END DO
741
742 CALL timestop(handle)
743
744 END SUBROUTINE cp_fm_schur_product
745
746! **************************************************************************************************
747!> \brief returns the trace of matrix_a^T matrix_b, i.e
748!> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
749!> \param matrix_a a matrix
750!> \param matrix_b another matrix
751!> \param trace ...
752!> \par History
753!> 11.06.2001 Creation (Matthias Krack)
754!> 12.2002 added doc [fawzi]
755!> \author Matthias Krack
756!> \note
757!> note the transposition of matrix_a!
758! **************************************************************************************************
759 SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
760
761 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
762 REAL(KIND=dp), INTENT(OUT) :: trace
763
764 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
765
766 INTEGER :: handle, mypcol, myprow, ncol_local, &
767 nrow_local
768 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
769 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
770 TYPE(cp_blacs_env_type), POINTER :: context
771 TYPE(mp_comm_type) :: group
772
773 CALL timeset(routinen, handle)
774
775 context => matrix_a%matrix_struct%context
776 myprow = context%mepos(1)
777 mypcol = context%mepos(2)
778
779 group = matrix_a%matrix_struct%para_env
780
781 a => matrix_a%local_data
782 b => matrix_b%local_data
783
784 a_sp => matrix_a%local_data_sp
785 b_sp => matrix_b%local_data_sp
786
787 nrow_local = min(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
788 ncol_local = min(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
789
790 ! cries for an accurate_dot_product
791 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
792 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local)* &
793 b_sp(1:nrow_local, 1:ncol_local), dp))
794 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
795 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local), dp)* &
796 b(1:nrow_local, 1:ncol_local))
797 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
798 trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
799 REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
800 ELSE
801 trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
802 b(1:nrow_local, 1:ncol_local))
803 END IF
804
805 CALL group%sum(trace)
806
807 CALL timestop(handle)
808
809 END SUBROUTINE cp_fm_trace_a0b0t0
810
811
812! **************************************************************************************************
813!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
814!> \param matrix_a list of A matrices
815!> \param matrix_b B matrix
816!> \param trace computed traces
817!> \par History
818!> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
819!> \note \parblock
820!> Computing the trace requires collective communication between involved MPI processes
821!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
822!> the amount of time wasted in such synchronisation by performing one large collective
823!> operation which involves all the matrices in question.
824!>
825!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
826!> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
827!> 'matrix_b' is a single matrix.
828!> \endparblock
829! **************************************************************************************************
830 SUBROUTINE cp_fm_trace_a1b0t1_a (matrix_a, matrix_b, trace)
831 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
832 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
833 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
834
835 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_a'
836
837 INTEGER :: handle, imatrix, n_matrices, &
838 ncols_local, nrows_local
839 LOGICAL :: use_sp_a, use_sp_b
840 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
841 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
842 TYPE(mp_comm_type) :: group
843
844 CALL timeset(routinen, handle)
845
846 n_matrices = SIZE(trace)
847 cpassert(SIZE(matrix_a) == n_matrices)
848
849 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
850 use_sp_b = matrix_b%use_sp
851
852 IF (use_sp_b) THEN
853 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
854 ELSE
855 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
856 END IF
857
858!$OMP PARALLEL DO DEFAULT(NONE), &
859!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
860!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
861!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
862
863 DO imatrix = 1, n_matrices
864
865 use_sp_a = matrix_a(imatrix) %use_sp
866
867 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
868 IF (use_sp_a .AND. use_sp_b) THEN
869 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
870 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
871 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
872 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
873 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
874 ELSE
875 cpabort("Matrices A and B are of different types")
876 END IF
877 END DO
878!$OMP END PARALLEL DO
879
880 group = matrix_b%matrix_struct%para_env
881 CALL group%sum(trace)
882
883 CALL timestop(handle)
884 END SUBROUTINE cp_fm_trace_a1b0t1_a
885 SUBROUTINE cp_fm_trace_a1b0t1_p (matrix_a, matrix_b, trace)
886 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
887 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
888 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
889
890 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_p'
891
892 INTEGER :: handle, imatrix, n_matrices, &
893 ncols_local, nrows_local
894 LOGICAL :: use_sp_a, use_sp_b
895 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
896 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
897 TYPE(mp_comm_type) :: group
898
899 CALL timeset(routinen, handle)
900
901 n_matrices = SIZE(trace)
902 cpassert(SIZE(matrix_a) == n_matrices)
903
904 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
905 use_sp_b = matrix_b%use_sp
906
907 IF (use_sp_b) THEN
908 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
909 ELSE
910 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
911 END IF
912
913!$OMP PARALLEL DO DEFAULT(NONE), &
914!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
915!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
916!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
917
918 DO imatrix = 1, n_matrices
919
920 use_sp_a = matrix_a(imatrix) %matrix%use_sp
921
922 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
923 IF (use_sp_a .AND. use_sp_b) THEN
924 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
925 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
926 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
927 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
928 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
929 ELSE
930 cpabort("Matrices A and B are of different types")
931 END IF
932 END DO
933!$OMP END PARALLEL DO
934
935 group = matrix_b%matrix_struct%para_env
936 CALL group%sum(trace)
937
938 CALL timestop(handle)
939 END SUBROUTINE cp_fm_trace_a1b0t1_p
940
941! **************************************************************************************************
942!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
943!> \param matrix_a list of A matrices
944!> \param matrix_b list of B matrices
945!> \param trace computed traces
946!> \param accurate ...
947!> \par History
948!> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
949!> \note \parblock
950!> Computing the trace requires collective communication between involved MPI processes
951!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
952!> the amount of time wasted in such synchronisation by performing one large collective
953!> operation which involves all the matrices in question.
954!>
955!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
956!> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
957!> \endparblock
958! **************************************************************************************************
959 SUBROUTINE cp_fm_trace_a1b1t1_aa (matrix_a, matrix_b, trace, accurate)
960 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
961 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_b
962 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
963 LOGICAL, INTENT(IN), OPTIONAL :: accurate
964
965 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_aa'
966
967 INTEGER :: handle, imatrix, n_matrices, &
968 ncols_local, nrows_local
969 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
970 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
971 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
972 TYPE(mp_comm_type) :: group
973
974 CALL timeset(routinen, handle)
975
976 n_matrices = SIZE(trace)
977 cpassert(SIZE(matrix_a) == n_matrices)
978 cpassert(SIZE(matrix_b) == n_matrices)
979
980 use_accurate_sum = .true.
981 IF (PRESENT(accurate)) use_accurate_sum = accurate
982
983!$OMP PARALLEL DO DEFAULT(NONE), &
984!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
985!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
986!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
987 DO imatrix = 1, n_matrices
988 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
989
990 use_sp_a = matrix_a(imatrix) %use_sp
991 use_sp_b = matrix_b(imatrix) %use_sp
992
993 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
994 IF (use_sp_a .AND. use_sp_b) THEN
995 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
996 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
997 IF (use_accurate_sum) THEN
998 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
999 ELSE
1000 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1001 END IF
1002 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1003 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1004 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1005 IF (use_accurate_sum) THEN
1006 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1007 ELSE
1008 trace(imatrix) = sum(ldata_a*ldata_b)
1009 END IF
1010 ELSE
1011 cpabort("Matrices A and B are of different types")
1012 END IF
1013 END DO
1014!$OMP END PARALLEL DO
1015
1016 group = matrix_a(1) %matrix_struct%para_env
1017 CALL group%sum(trace)
1018
1019 CALL timestop(handle)
1020 END SUBROUTINE cp_fm_trace_a1b1t1_aa
1021 SUBROUTINE cp_fm_trace_a1b1t1_ap (matrix_a, matrix_b, trace, accurate)
1022 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
1023 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_b
1024 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1025 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1026
1027 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_ap'
1028
1029 INTEGER :: handle, imatrix, n_matrices, &
1030 ncols_local, nrows_local
1031 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1032 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1033 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1034 TYPE(mp_comm_type) :: group
1035
1036 CALL timeset(routinen, handle)
1037
1038 n_matrices = SIZE(trace)
1039 cpassert(SIZE(matrix_a) == n_matrices)
1040 cpassert(SIZE(matrix_b) == n_matrices)
1041
1042 use_accurate_sum = .true.
1043 IF (PRESENT(accurate)) use_accurate_sum = accurate
1044
1045!$OMP PARALLEL DO DEFAULT(NONE), &
1046!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1047!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1048!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1049 DO imatrix = 1, n_matrices
1050 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1051
1052 use_sp_a = matrix_a(imatrix) %use_sp
1053 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1054
1055 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1056 IF (use_sp_a .AND. use_sp_b) THEN
1057 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1058 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1059 IF (use_accurate_sum) THEN
1060 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1061 ELSE
1062 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1063 END IF
1064 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1065 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1066 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1067 IF (use_accurate_sum) THEN
1068 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1069 ELSE
1070 trace(imatrix) = sum(ldata_a*ldata_b)
1071 END IF
1072 ELSE
1073 cpabort("Matrices A and B are of different types")
1074 END IF
1075 END DO
1076!$OMP END PARALLEL DO
1077
1078 group = matrix_a(1) %matrix_struct%para_env
1079 CALL group%sum(trace)
1080
1081 CALL timestop(handle)
1082 END SUBROUTINE cp_fm_trace_a1b1t1_ap
1083 SUBROUTINE cp_fm_trace_a1b1t1_pa (matrix_a, matrix_b, trace, accurate)
1084 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
1085 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_b
1086 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1087 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1088
1089 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pa'
1090
1091 INTEGER :: handle, imatrix, n_matrices, &
1092 ncols_local, nrows_local
1093 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1094 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1095 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1096 TYPE(mp_comm_type) :: group
1097
1098 CALL timeset(routinen, handle)
1099
1100 n_matrices = SIZE(trace)
1101 cpassert(SIZE(matrix_a) == n_matrices)
1102 cpassert(SIZE(matrix_b) == n_matrices)
1103
1104 use_accurate_sum = .true.
1105 IF (PRESENT(accurate)) use_accurate_sum = accurate
1106
1107!$OMP PARALLEL DO DEFAULT(NONE), &
1108!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1109!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1110!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1111 DO imatrix = 1, n_matrices
1112 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1113
1114 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1115 use_sp_b = matrix_b(imatrix) %use_sp
1116
1117 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1118 IF (use_sp_a .AND. use_sp_b) THEN
1119 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1120 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1121 IF (use_accurate_sum) THEN
1122 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1123 ELSE
1124 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1125 END IF
1126 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1127 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1128 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1129 IF (use_accurate_sum) THEN
1130 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1131 ELSE
1132 trace(imatrix) = sum(ldata_a*ldata_b)
1133 END IF
1134 ELSE
1135 cpabort("Matrices A and B are of different types")
1136 END IF
1137 END DO
1138!$OMP END PARALLEL DO
1139
1140 group = matrix_a(1) %matrix%matrix_struct%para_env
1141 CALL group%sum(trace)
1142
1143 CALL timestop(handle)
1144 END SUBROUTINE cp_fm_trace_a1b1t1_pa
1145 SUBROUTINE cp_fm_trace_a1b1t1_pp (matrix_a, matrix_b, trace, accurate)
1146 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
1147 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_b
1148 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1149 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1150
1151 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pp'
1152
1153 INTEGER :: handle, imatrix, n_matrices, &
1154 ncols_local, nrows_local
1155 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1156 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1157 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1158 TYPE(mp_comm_type) :: group
1159
1160 CALL timeset(routinen, handle)
1161
1162 n_matrices = SIZE(trace)
1163 cpassert(SIZE(matrix_a) == n_matrices)
1164 cpassert(SIZE(matrix_b) == n_matrices)
1165
1166 use_accurate_sum = .true.
1167 IF (PRESENT(accurate)) use_accurate_sum = accurate
1168
1169!$OMP PARALLEL DO DEFAULT(NONE), &
1170!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1171!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1172!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1173 DO imatrix = 1, n_matrices
1174 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1175
1176 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1177 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1178
1179 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1180 IF (use_sp_a .AND. use_sp_b) THEN
1181 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1182 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1183 IF (use_accurate_sum) THEN
1184 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1185 ELSE
1186 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1187 END IF
1188 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1189 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1190 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1191 IF (use_accurate_sum) THEN
1192 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1193 ELSE
1194 trace(imatrix) = sum(ldata_a*ldata_b)
1195 END IF
1196 ELSE
1197 cpabort("Matrices A and B are of different types")
1198 END IF
1199 END DO
1200!$OMP END PARALLEL DO
1201
1202 group = matrix_a(1) %matrix%matrix_struct%para_env
1203 CALL group%sum(trace)
1204
1205 CALL timestop(handle)
1206 END SUBROUTINE cp_fm_trace_a1b1t1_pp
1207
1208! **************************************************************************************************
1209!> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
1210!> \param matrix_a list of A matrices
1211!> \param matrix_b list of B matrices
1212!> \param trace computed traces
1213!> \param accurate ...
1214! **************************************************************************************************
1215 SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa (matrix_a, matrix_b, trace, accurate)
1216 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1217 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1218 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1219 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1220
1221 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_aa'
1222
1223 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1224 nrows_local, nz
1225 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1226 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1227 REAL(kind=dp) :: t
1228 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1229 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1230 TYPE(mp_comm_type) :: group
1231
1232 CALL timeset(routinen, handle)
1233
1234 nz = SIZE(matrix_a, 1)
1235 cpassert(SIZE(matrix_b, 1) == nz)
1236
1237 na = SIZE(matrix_a, 2)
1238 nb = SIZE(matrix_b, 2)
1239 cpassert(SIZE(trace, 1) == na)
1240 cpassert(SIZE(trace, 2) == nb)
1241
1242 use_accurate_sum = .true.
1243 IF (PRESENT(accurate)) use_accurate_sum = accurate
1244
1245 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1246 ! improve load balance between shared-memory threads
1247 ntraces = na*nb
1248 na8 = int(na, kind=int_8)
1249
1250!$OMP PARALLEL DO DEFAULT(NONE), &
1251!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1252!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1253!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1254 DO itrace = 1, ntraces
1255 ib8 = (itrace - 1)/na8
1256 ia = int(itrace - ib8*na8)
1257 ib = int(ib8) + 1
1258
1259 t = 0.0_dp
1260 DO iz = 1, nz
1261 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1262 use_sp_a = matrix_a(iz, ia) %use_sp
1263 use_sp_b = matrix_b(iz, ib) %use_sp
1264
1265 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1266 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1267 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1268 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1269 IF (use_accurate_sum) THEN
1270 t = t + accurate_dot_product(ldata_a, ldata_b)
1271 ELSE
1272 t = t + sum(ldata_a*ldata_b)
1273 END IF
1274 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1275 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1276 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1277 IF (use_accurate_sum) THEN
1278 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1279 ELSE
1280 t = t + sum(ldata_a_sp*ldata_b_sp)
1281 END IF
1282 ELSE
1283 cpabort("Matrices A and B are of different types")
1284 END IF
1285 END DO
1286 trace(ia, ib) = t
1287 END DO
1288!$OMP END PARALLEL DO
1289
1290 group = matrix_a(1, 1) %matrix_struct%para_env
1291 CALL group%sum(trace)
1292
1293 CALL timestop(handle)
1294 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa
1295 SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap (matrix_a, matrix_b, trace, accurate)
1296 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1297 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1298 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1299 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1300
1301 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_ap'
1302
1303 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1304 nrows_local, nz
1305 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1306 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1307 REAL(kind=dp) :: t
1308 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1309 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1310 TYPE(mp_comm_type) :: group
1311
1312 CALL timeset(routinen, handle)
1313
1314 nz = SIZE(matrix_a, 1)
1315 cpassert(SIZE(matrix_b, 1) == nz)
1316
1317 na = SIZE(matrix_a, 2)
1318 nb = SIZE(matrix_b, 2)
1319 cpassert(SIZE(trace, 1) == na)
1320 cpassert(SIZE(trace, 2) == nb)
1321
1322 use_accurate_sum = .true.
1323 IF (PRESENT(accurate)) use_accurate_sum = accurate
1324
1325 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1326 ! improve load balance between shared-memory threads
1327 ntraces = na*nb
1328 na8 = int(na, kind=int_8)
1329
1330!$OMP PARALLEL DO DEFAULT(NONE), &
1331!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1332!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1333!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1334 DO itrace = 1, ntraces
1335 ib8 = (itrace - 1)/na8
1336 ia = int(itrace - ib8*na8)
1337 ib = int(ib8) + 1
1338
1339 t = 0.0_dp
1340 DO iz = 1, nz
1341 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1342 use_sp_a = matrix_a(iz, ia) %use_sp
1343 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1344
1345 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1346 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1347 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1348 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1349 IF (use_accurate_sum) THEN
1350 t = t + accurate_dot_product(ldata_a, ldata_b)
1351 ELSE
1352 t = t + sum(ldata_a*ldata_b)
1353 END IF
1354 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1355 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1356 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1357 IF (use_accurate_sum) THEN
1358 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1359 ELSE
1360 t = t + sum(ldata_a_sp*ldata_b_sp)
1361 END IF
1362 ELSE
1363 cpabort("Matrices A and B are of different types")
1364 END IF
1365 END DO
1366 trace(ia, ib) = t
1367 END DO
1368!$OMP END PARALLEL DO
1369
1370 group = matrix_a(1, 1) %matrix_struct%para_env
1371 CALL group%sum(trace)
1372
1373 CALL timestop(handle)
1374 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap
1375 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa (matrix_a, matrix_b, trace, accurate)
1376 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1377 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1378 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1379 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1380
1381 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pa'
1382
1383 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1384 nrows_local, nz
1385 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1386 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1387 REAL(kind=dp) :: t
1388 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1389 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1390 TYPE(mp_comm_type) :: group
1391
1392 CALL timeset(routinen, handle)
1393
1394 nz = SIZE(matrix_a, 1)
1395 cpassert(SIZE(matrix_b, 1) == nz)
1396
1397 na = SIZE(matrix_a, 2)
1398 nb = SIZE(matrix_b, 2)
1399 cpassert(SIZE(trace, 1) == na)
1400 cpassert(SIZE(trace, 2) == nb)
1401
1402 use_accurate_sum = .true.
1403 IF (PRESENT(accurate)) use_accurate_sum = accurate
1404
1405 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1406 ! improve load balance between shared-memory threads
1407 ntraces = na*nb
1408 na8 = int(na, kind=int_8)
1409
1410!$OMP PARALLEL DO DEFAULT(NONE), &
1411!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1412!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1413!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1414 DO itrace = 1, ntraces
1415 ib8 = (itrace - 1)/na8
1416 ia = int(itrace - ib8*na8)
1417 ib = int(ib8) + 1
1418
1419 t = 0.0_dp
1420 DO iz = 1, nz
1421 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1422 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1423 use_sp_b = matrix_b(iz, ib) %use_sp
1424
1425 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1426 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1427 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1428 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1429 IF (use_accurate_sum) THEN
1430 t = t + accurate_dot_product(ldata_a, ldata_b)
1431 ELSE
1432 t = t + sum(ldata_a*ldata_b)
1433 END IF
1434 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1435 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1436 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1437 IF (use_accurate_sum) THEN
1438 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1439 ELSE
1440 t = t + sum(ldata_a_sp*ldata_b_sp)
1441 END IF
1442 ELSE
1443 cpabort("Matrices A and B are of different types")
1444 END IF
1445 END DO
1446 trace(ia, ib) = t
1447 END DO
1448!$OMP END PARALLEL DO
1449
1450 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1451 CALL group%sum(trace)
1452
1453 CALL timestop(handle)
1454 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa
1455 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp (matrix_a, matrix_b, trace, accurate)
1456 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1457 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1458 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1459 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1460
1461 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pp'
1462
1463 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1464 nrows_local, nz
1465 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1466 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1467 REAL(kind=dp) :: t
1468 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1469 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1470 TYPE(mp_comm_type) :: group
1471
1472 CALL timeset(routinen, handle)
1473
1474 nz = SIZE(matrix_a, 1)
1475 cpassert(SIZE(matrix_b, 1) == nz)
1476
1477 na = SIZE(matrix_a, 2)
1478 nb = SIZE(matrix_b, 2)
1479 cpassert(SIZE(trace, 1) == na)
1480 cpassert(SIZE(trace, 2) == nb)
1481
1482 use_accurate_sum = .true.
1483 IF (PRESENT(accurate)) use_accurate_sum = accurate
1484
1485 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1486 ! improve load balance between shared-memory threads
1487 ntraces = na*nb
1488 na8 = int(na, kind=int_8)
1489
1490!$OMP PARALLEL DO DEFAULT(NONE), &
1491!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1492!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1493!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1494 DO itrace = 1, ntraces
1495 ib8 = (itrace - 1)/na8
1496 ia = int(itrace - ib8*na8)
1497 ib = int(ib8) + 1
1498
1499 t = 0.0_dp
1500 DO iz = 1, nz
1501 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1502 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1503 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1504
1505 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1506 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1507 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1508 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1509 IF (use_accurate_sum) THEN
1510 t = t + accurate_dot_product(ldata_a, ldata_b)
1511 ELSE
1512 t = t + sum(ldata_a*ldata_b)
1513 END IF
1514 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1515 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1516 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1517 IF (use_accurate_sum) THEN
1518 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1519 ELSE
1520 t = t + sum(ldata_a_sp*ldata_b_sp)
1521 END IF
1522 ELSE
1523 cpabort("Matrices A and B are of different types")
1524 END IF
1525 END DO
1526 trace(ia, ib) = t
1527 END DO
1528!$OMP END PARALLEL DO
1529
1530 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1531 CALL group%sum(trace)
1532
1533 CALL timestop(handle)
1534 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp
1535
1536! **************************************************************************************************
1537!> \brief multiplies in place by a triangular matrix:
1538!> matrix_b = alpha op(triangular_matrix) matrix_b
1539!> or (if side='R')
1540!> matrix_b = alpha matrix_b op(triangular_matrix)
1541!> op(triangular_matrix) is:
1542!> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1543!> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1544!> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1545!> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1546!> \param triangular_matrix the triangular matrix that multiplies the other
1547!> \param matrix_b the matrix that gets multiplied and stores the result
1548!> \param side on which side of matrix_b stays op(triangular_matrix)
1549!> (defaults to 'L')
1550!> \param transpose_tr if the triangular matrix should be transposed
1551!> (defaults to false)
1552!> \param invert_tr if the triangular matrix should be inverted
1553!> (defaults to false)
1554!> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1555!> lower ('L') triangle (defaults to 'U')
1556!> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1557!> be assumed to be 1 (defaults to false)
1558!> \param n_rows the number of rows of the result (defaults to
1559!> size(matrix_b,1))
1560!> \param n_cols the number of columns of the result (defaults to
1561!> size(matrix_b,2))
1562!> \param alpha ...
1563!> \par History
1564!> 08.2002 created [fawzi]
1565!> \author Fawzi Mohamed
1566!> \note
1567!> needs an mpi env
1568! **************************************************************************************************
1569 SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1570 transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1571 alpha)
1572 TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1573 CHARACTER, INTENT(IN), OPTIONAL :: side
1574 LOGICAL, INTENT(IN), OPTIONAL :: transpose_tr, invert_tr
1575 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1576 LOGICAL, INTENT(IN), OPTIONAL :: unit_diag_tr
1577 INTEGER, INTENT(IN), OPTIONAL :: n_rows, n_cols
1578 REAL(kind=dp), INTENT(IN), OPTIONAL :: alpha
1579
1580 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_triangular_multiply'
1581
1582 CHARACTER :: side_char, transa, unit_diag, uplo
1583 INTEGER :: handle, mdim, m, n
1584 LOGICAL :: invert
1585 REAL(kind=dp) :: al
1586
1587 CALL timeset(routinen, handle)
1588 side_char = 'L'
1589 unit_diag = 'N'
1590 uplo = 'U'
1591 transa = 'N'
1592 invert = .false.
1593 al = 1.0_dp
1594 CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1595 IF (PRESENT(side)) side_char = side
1596 mdim = merge(1, 2, 'L' .EQ. side_char)
1597 IF (PRESENT(invert_tr)) invert = invert_tr
1598 IF (PRESENT(uplo_tr)) uplo = uplo_tr
1599 IF (PRESENT(unit_diag_tr)) THEN
1600 IF (unit_diag_tr) THEN
1601 unit_diag = 'U'
1602 ELSE
1603 unit_diag = 'N'
1604 END IF
1605 END IF
1606 IF (PRESENT(transpose_tr)) THEN
1607 IF (transpose_tr) THEN
1608 transa = 'T'
1609 ELSE
1610 transa = 'N'
1611 END IF
1612 END IF
1613 IF (PRESENT(alpha)) al = alpha
1614 IF (PRESENT(n_rows)) m = n_rows
1615 IF (PRESENT(n_cols)) n = n_cols
1616
1617 IF (invert) THEN
1618
1619#if defined(__parallel)
1620 CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1621 triangular_matrix%local_data(1, 1), 1, 1, &
1622 triangular_matrix%matrix_struct%descriptor, &
1623 matrix_b%local_data(1, 1), 1, 1, &
1624 matrix_b%matrix_struct%descriptor(1))
1625#else
1626 CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1627 triangular_matrix%local_data(1, 1), &
1628 SIZE(triangular_matrix%local_data, mdim), &
1629 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1630#endif
1631
1632 ELSE
1633
1634#if defined(__parallel)
1635 CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1636 triangular_matrix%local_data(1, 1), 1, 1, &
1637 triangular_matrix%matrix_struct%descriptor, &
1638 matrix_b%local_data(1, 1), 1, 1, &
1639 matrix_b%matrix_struct%descriptor(1))
1640#else
1641 CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1642 triangular_matrix%local_data(1, 1), &
1643 SIZE(triangular_matrix%local_data, mdim), &
1644 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1645#endif
1646
1647 END IF
1648
1649 CALL timestop(handle)
1650 END SUBROUTINE cp_fm_triangular_multiply
1651
1652! **************************************************************************************************
1653!> \brief scales a matrix
1654!> matrix_a = alpha * matrix_b
1655!> \param alpha ...
1656!> \param matrix_a ...
1657!> \note
1658!> use cp_fm_set_all to zero (avoids problems with nan)
1659! **************************************************************************************************
1660 SUBROUTINE cp_fm_scale(alpha, matrix_a)
1661 REAL(kind=dp), INTENT(IN) :: alpha
1662 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1663
1664 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale'
1665
1666 INTEGER :: handle, size_a
1667 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1668
1669 CALL timeset(routinen, handle)
1670
1671 NULLIFY (a)
1672
1673 a => matrix_a%local_data
1674 size_a = SIZE(a, 1)*SIZE(a, 2)
1675
1676 CALL dscal(size_a, alpha, a, 1)
1677
1678 CALL timestop(handle)
1679
1680 END SUBROUTINE cp_fm_scale
1681
1682! **************************************************************************************************
1683!> \brief transposes a matrix
1684!> matrixt = matrix ^ T
1685!> \param matrix ...
1686!> \param matrixt ...
1687!> \note
1688!> all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
1689! **************************************************************************************************
1690 SUBROUTINE cp_fm_transpose(matrix, matrixt)
1691 TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1692
1693 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_transpose'
1694
1695 INTEGER :: handle, ncol_global, &
1696 nrow_global, ncol_globalt, nrow_globalt
1697 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
1698#if defined(__parallel)
1699 INTEGER, DIMENSION(9) :: desca, descc
1700#elif !defined(__MKL)
1701 INTEGER :: i, j
1702#endif
1703
1704 nrow_global = matrix%matrix_struct%nrow_global
1705 ncol_global = matrix%matrix_struct%ncol_global
1706 nrow_globalt = matrixt%matrix_struct%nrow_global
1707 ncol_globalt = matrixt%matrix_struct%ncol_global
1708 cpassert(nrow_global == ncol_globalt)
1709 cpassert(nrow_globalt == ncol_global)
1710
1711 CALL timeset(routinen, handle)
1712
1713 a => matrix%local_data
1714 c => matrixt%local_data
1715
1716#if defined(__parallel)
1717 desca(:) = matrix%matrix_struct%descriptor(:)
1718 descc(:) = matrixt%matrix_struct%descriptor(:)
1719 CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1720#elif defined(__MKL)
1721 CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
1722#else
1723 DO j = 1, ncol_global
1724 DO i = 1, nrow_global
1725 c(j, i) = a(i, j)
1726 END DO
1727 END DO
1728#endif
1729 CALL timestop(handle)
1730
1731 END SUBROUTINE cp_fm_transpose
1732
1733! **************************************************************************************************
1734!> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
1735!> \param matrix the triangular matrix as input, the full matrix as output
1736!> \param work a matrix of the same size as matrix
1737!> \param uplo triangular format; defaults to 'U'
1738!> \author Matthias Krack
1739!> \note
1740!> the opposite triangular part is irrelevant
1741! **************************************************************************************************
1742 SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
1743
1744 TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1745 CHARACTER, INTENT(IN), OPTIONAL :: uplo
1746
1747 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_uplo_to_full'
1748
1749 CHARACTER :: myuplo
1750 INTEGER :: handle, icol_global, irow_global, &
1751 mypcol, myprow, ncol_global, &
1752 nrow_global
1753 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1754 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1755 TYPE(cp_blacs_env_type), POINTER :: context
1756
1757#if defined(__parallel)
1758 INTEGER :: icol_local, irow_local, &
1759 ncol_block, ncol_local, &
1760 nrow_block, nrow_local
1761 INTEGER, DIMENSION(9) :: desca, descc
1762 REAL(kind=dp), DIMENSION(:, :), POINTER :: c
1763 REAL(kind=sp), DIMENSION(:, :), POINTER :: c_sp
1764#endif
1765
1766 myuplo = 'U'
1767 IF (PRESENT(uplo)) myuplo = uplo
1768
1769 nrow_global = matrix%matrix_struct%nrow_global
1770 ncol_global = matrix%matrix_struct%ncol_global
1771 cpassert(nrow_global == ncol_global)
1772 nrow_global = work%matrix_struct%nrow_global
1773 ncol_global = work%matrix_struct%ncol_global
1774 cpassert(nrow_global == ncol_global)
1775 cpassert(matrix%use_sp .EQV. work%use_sp)
1776
1777 CALL timeset(routinen, handle)
1778
1779 context => matrix%matrix_struct%context
1780 myprow = context%mepos(1)
1781 mypcol = context%mepos(2)
1782
1783#if defined(__parallel)
1784
1785 nrow_block = matrix%matrix_struct%nrow_block
1786 ncol_block = matrix%matrix_struct%ncol_block
1787
1788 nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1789 ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1790
1791 a => work%local_data
1792 a_sp => work%local_data_sp
1793 desca(:) = work%matrix_struct%descriptor(:)
1794 c => matrix%local_data
1795 c_sp => matrix%local_data_sp
1796 descc(:) = matrix%matrix_struct%descriptor(:)
1797
1798 DO icol_local = 1, ncol_local
1799 icol_global = matrix%matrix_struct%col_indices(icol_local)
1800 DO irow_local = 1, nrow_local
1801 irow_global = matrix%matrix_struct%row_indices(irow_local)
1802 IF (merge(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
1803 IF (matrix%use_sp) THEN
1804 c_sp(irow_local, icol_local) = 0.0_sp
1805 ELSE
1806 c(irow_local, icol_local) = 0.0_dp
1807 END IF
1808 ELSE IF (irow_global == icol_global) THEN
1809 IF (matrix%use_sp) THEN
1810 c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1811 ELSE
1812 c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1813 END IF
1814 END IF
1815 END DO
1816 END DO
1817
1818 DO icol_local = 1, ncol_local
1819 DO irow_local = 1, nrow_local
1820 IF (matrix%use_sp) THEN
1821 a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1822 ELSE
1823 a(irow_local, icol_local) = c(irow_local, icol_local)
1824 END IF
1825 END DO
1826 END DO
1827
1828 IF (matrix%use_sp) THEN
1829 CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1830 ELSE
1831 CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1832 END IF
1833
1834#else
1835
1836 a => matrix%local_data
1837 a_sp => matrix%local_data_sp
1838
1839 IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1840 DO irow_global = 1, nrow_global
1841 DO icol_global = irow_global + 1, ncol_global
1842 IF (matrix%use_sp) THEN
1843 a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1844 ELSE
1845 a(icol_global, irow_global) = a(irow_global, icol_global)
1846 END IF
1847 END DO
1848 END DO
1849 ELSE
1850 DO icol_global = 1, ncol_global
1851 DO irow_global = icol_global + 1, nrow_global
1852 IF (matrix%use_sp) THEN
1853 a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
1854 ELSE
1855 a(irow_global, icol_global) = a(icol_global, irow_global)
1856 END IF
1857 END DO
1858 END DO
1859 END IF
1860
1861#endif
1862 CALL timestop(handle)
1863
1864 END SUBROUTINE cp_fm_uplo_to_full
1865
1866! **************************************************************************************************
1867!> \brief scales column i of matrix a with scaling(i)
1868!> \param matrixa ...
1869!> \param scaling : an array used for scaling the columns,
1870!> SIZE(scaling) determines the number of columns to be scaled
1871!> \author Joost VandeVondele
1872!> \note
1873!> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1874!> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1875!> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1876!> where every vector has a different prefactor
1877! **************************************************************************************************
1878 SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1879 TYPE(cp_fm_type), INTENT(IN) :: matrixa
1880 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: scaling
1881
1882 INTEGER :: k, mypcol, myprow, n, ncol_global, &
1883 npcol, nprow
1884 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1885 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1886#if defined(__parallel)
1887 INTEGER :: icol_global, icol_local, &
1888 ipcol, iprow, irow_local
1889#else
1890 INTEGER :: i
1891#endif
1892
1893 myprow = matrixa%matrix_struct%context%mepos(1)
1894 mypcol = matrixa%matrix_struct%context%mepos(2)
1895 nprow = matrixa%matrix_struct%context%num_pe(1)
1896 npcol = matrixa%matrix_struct%context%num_pe(2)
1897
1898 ncol_global = matrixa%matrix_struct%ncol_global
1899
1900 a => matrixa%local_data
1901 a_sp => matrixa%local_data_sp
1902 IF (matrixa%use_sp) THEN
1903 n = SIZE(a_sp, 1)
1904 ELSE
1905 n = SIZE(a, 1)
1906 END IF
1907 k = min(SIZE(scaling), ncol_global)
1908
1909#if defined(__parallel)
1910
1911 DO icol_global = 1, k
1912 CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1913 nprow, npcol, myprow, mypcol, &
1914 irow_local, icol_local, iprow, ipcol)
1915 IF ((ipcol == mypcol)) THEN
1916 IF (matrixa%use_sp) THEN
1917 CALL sscal(n, real(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1918 ELSE
1919 CALL dscal(n, scaling(icol_global), a(:, icol_local), 1)
1920 END IF
1921 END IF
1922 END DO
1923#else
1924 DO i = 1, k
1925 IF (matrixa%use_sp) THEN
1926 CALL sscal(n, real(scaling(i), sp), a_sp(:, i), 1)
1927 ELSE
1928 CALL dscal(n, scaling(i), a(:, i), 1)
1929 END IF
1930 END DO
1931#endif
1932 END SUBROUTINE cp_fm_column_scale
1933
1934! **************************************************************************************************
1935!> \brief scales row i of matrix a with scaling(i)
1936!> \param matrixa ...
1937!> \param scaling : an array used for scaling the columns,
1938!> \author JGH
1939!> \note
1940! **************************************************************************************************
1941 SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1942 TYPE(cp_fm_type), INTENT(IN) :: matrixa
1943 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: scaling
1944
1945 INTEGER :: n, m, nrow_global, nrow_local, ncol_local
1946 INTEGER, DIMENSION(:), POINTER :: row_indices
1947 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1948 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1949#if defined(__parallel)
1950 INTEGER :: irow_global, icol, irow
1951#else
1952 INTEGER :: j
1953#endif
1954
1955 CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1956 nrow_local=nrow_local, ncol_local=ncol_local)
1957 cpassert(SIZE(scaling) == nrow_global)
1958
1959 a => matrixa%local_data
1960 a_sp => matrixa%local_data_sp
1961 IF (matrixa%use_sp) THEN
1962 n = SIZE(a_sp, 1)
1963 m = SIZE(a_sp, 2)
1964 ELSE
1965 n = SIZE(a, 1)
1966 m = SIZE(a, 2)
1967 END IF
1968
1969#if defined(__parallel)
1970 DO icol = 1, ncol_local
1971 IF (matrixa%use_sp) THEN
1972 DO irow = 1, nrow_local
1973 irow_global = row_indices(irow)
1974 a(irow, icol) = real(scaling(irow_global), dp)*a(irow, icol)
1975 END DO
1976 ELSE
1977 DO irow = 1, nrow_local
1978 irow_global = row_indices(irow)
1979 a(irow, icol) = scaling(irow_global)*a(irow, icol)
1980 END DO
1981 END IF
1982 END DO
1983#else
1984 IF (matrixa%use_sp) THEN
1985 DO j = 1, m
1986 a_sp(1:n, j) = real(scaling(1:n), sp)*a_sp(1:n, j)
1987 END DO
1988 ELSE
1989 DO j = 1, m
1990 a(1:n, j) = scaling(1:n)*a(1:n, j)
1991 END DO
1992 END IF
1993#endif
1994 END SUBROUTINE cp_fm_row_scale
1995
1996! **************************************************************************************************
1997!> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
1998!> \param matrix_a the matrix to invert
1999!> \param matrix_inverse the inverse of matrix_a
2000!> \param det_a the determinant of matrix_a
2001!> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
2002!> are screened
2003!> \param eigval optionally return matrix eigenvalues/singular values
2004!> \par History
2005!> note of Jan Wilhelm (12.2015)
2006!> - computation of determinant corrected
2007!> - determinant only computed if det_a is present
2008!> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
2009!> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
2010!> \author Florian Schiffmann(02.2007)
2011! **************************************************************************************************
2012 SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
2013
2014 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
2015 REAL(kind=dp), INTENT(OUT), OPTIONAL :: det_a
2016 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_svd
2017 REAL(kind=dp), DIMENSION(:), POINTER, &
2018 INTENT(INOUT), OPTIONAL :: eigval
2019
2020 INTEGER :: n
2021 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2022 REAL(kind=dp) :: determinant, my_eps_svd
2023 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2024 TYPE(cp_fm_type) :: matrix_lu
2025
2026#if defined(__parallel)
2027 TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
2028 TYPE(mp_comm_type) :: group
2029 INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
2030 INTEGER, DIMENSION(9) :: desca
2031 LOGICAL :: quenched
2032 REAL(kind=dp) :: alpha, beta
2033 REAL(kind=dp), DIMENSION(:), POINTER :: diag
2034 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: work
2035#else
2036 LOGICAL :: sign
2037 REAL(kind=dp) :: eps1
2038#endif
2039
2040 my_eps_svd = 0.0_dp
2041 IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
2042
2043 CALL cp_fm_create(matrix=matrix_lu, &
2044 matrix_struct=matrix_a%matrix_struct, &
2045 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
2046 CALL cp_fm_to_fm(matrix_a, matrix_lu)
2047
2048 a => matrix_lu%local_data
2049 n = matrix_lu%matrix_struct%nrow_global
2050 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2051 ipivot(:) = 0
2052#if defined(__parallel)
2053 IF (my_eps_svd .EQ. 0.0_dp) THEN
2054 ! Use LU decomposition
2055 lwork = 3*n
2056 liwork = 3*n
2057 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2058 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2059
2060 IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
2061
2062 ALLOCATE (diag(n))
2063 diag(:) = 0.0_dp
2064 CALL cp_fm_get_diag(matrix_lu, diag)
2065
2066 exponent_of_minus_one = 0
2067 determinant = 1.0_dp
2068 DO i = 1, n
2069 determinant = determinant*diag(i)
2070 IF (ipivot(i) .NE. i) THEN
2071 exponent_of_minus_one = exponent_of_minus_one + 1
2072 END IF
2073 END DO
2074 IF (PRESENT(eigval)) THEN
2075 cpassert(.NOT. ASSOCIATED(eigval))
2076 ALLOCATE (eigval(n))
2077 eigval(:) = diag
2078 END IF
2079 DEALLOCATE (diag)
2080
2081 group = matrix_lu%matrix_struct%para_env
2082 CALL group%sum(exponent_of_minus_one)
2083
2084 determinant = determinant*(-1.0_dp)**exponent_of_minus_one
2085
2086 END IF
2087
2088 alpha = 0.0_dp
2089 beta = 1.0_dp
2090 CALL cp_fm_set_all(matrix_inverse, alpha, beta)
2091 CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
2092 ELSE
2093 ! Use singular value decomposition
2094 CALL cp_fm_create(matrix=u, &
2095 matrix_struct=matrix_a%matrix_struct, &
2096 name="LEFT_SINGULAR_MATRIX")
2097 CALL cp_fm_set_all(u, alpha=0.0_dp)
2098 CALL cp_fm_create(matrix=vt, &
2099 matrix_struct=matrix_a%matrix_struct, &
2100 name="RIGHT_SINGULAR_MATRIX")
2101 CALL cp_fm_set_all(vt, alpha=0.0_dp)
2102 ALLOCATE (diag(n))
2103 diag(:) = 0.0_dp
2104 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2105 ALLOCATE (work(1))
2106 ! Workspace query
2107 lwork = -1
2108 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2109 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2110 lwork = int(work(1))
2111 DEALLOCATE (work)
2112 ALLOCATE (work(lwork))
2113 ! SVD
2114 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2115 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2116 ! info == n+1 implies homogeneity error when the number of procs is large
2117 ! this likely isnt a problem, but maybe we should handle it separately
2118 IF (info /= 0 .AND. info /= n + 1) &
2119 cpabort("Singular value decomposition of matrix failed.")
2120 ! (Pseudo)inverse and (pseudo)determinant
2121 CALL cp_fm_create(matrix=sigma, &
2122 matrix_struct=matrix_a%matrix_struct, &
2123 name="SINGULAR_VALUE_MATRIX")
2124 CALL cp_fm_set_all(sigma, alpha=0.0_dp)
2125 determinant = 1.0_dp
2126 quenched = .false.
2127 IF (PRESENT(eigval)) THEN
2128 cpassert(.NOT. ASSOCIATED(eigval))
2129 ALLOCATE (eigval(n))
2130 eigval(:) = diag
2131 END IF
2132 DO i = 1, n
2133 IF (diag(i) < my_eps_svd) THEN
2134 diag(i) = 0.0_dp
2135 quenched = .true.
2136 ELSE
2137 determinant = determinant*diag(i)
2138 diag(i) = 1.0_dp/diag(i)
2139 END IF
2140 CALL cp_fm_set_element(sigma, i, i, diag(i))
2141 END DO
2142 DEALLOCATE (diag)
2143 IF (quenched) &
2144 CALL cp_warn(__location__, &
2145 "Linear dependencies were detected in the SVD inversion of matrix "//trim(adjustl(matrix_a%name))// &
2146 ". At least one singular value has been quenched.")
2147 ! Sigma^-1 * U^T
2148 CALL cp_fm_create(matrix=inv_sigma_ut, &
2149 matrix_struct=matrix_a%matrix_struct, &
2150 name="SINGULAR_VALUE_MATRIX")
2151 CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
2152 CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
2153 u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
2154 ! A^-1 = V * (Sigma^-1 * U^T)
2155 CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
2156 CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
2157 inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
2158 ! Clean up
2159 DEALLOCATE (work)
2160 CALL cp_fm_release(u)
2161 CALL cp_fm_release(vt)
2162 CALL cp_fm_release(sigma)
2163 CALL cp_fm_release(inv_sigma_ut)
2164 END IF
2165#else
2166 IF (my_eps_svd .EQ. 0.0_dp) THEN
2167 sign = .true.
2168 CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
2169 eval_error=eps1)
2170 CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
2171 IF (PRESENT(eigval)) &
2172 CALL cp_abort(__location__, &
2173 "NYI. Eigenvalues not available for return without SCALAPACK.")
2174 ELSE
2175 CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
2176 determinant, eigval)
2177 END IF
2178#endif
2179 CALL cp_fm_release(matrix_lu)
2180 DEALLOCATE (ipivot)
2181 IF (PRESENT(det_a)) det_a = determinant
2182 END SUBROUTINE cp_fm_invert
2183
2184! **************************************************************************************************
2185!> \brief inverts a triangular matrix
2186!> \param matrix_a ...
2187!> \param uplo_tr triangular format; defaults to 'U'
2188!> \author MI
2189! **************************************************************************************************
2190 SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
2191
2192 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2193 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
2194
2195 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_triangular_invert'
2196
2197 CHARACTER :: unit_diag, uplo
2198 INTEGER :: handle, info, ncol_global
2199 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2200#if defined(__parallel)
2201 INTEGER, DIMENSION(9) :: desca
2202#endif
2203
2204 CALL timeset(routinen, handle)
2205
2206 unit_diag = 'N'
2207 uplo = 'U'
2208 IF (PRESENT(uplo_tr)) uplo = uplo_tr
2209
2210 ncol_global = matrix_a%matrix_struct%ncol_global
2211
2212 a => matrix_a%local_data
2213
2214#if defined(__parallel)
2215 desca(:) = matrix_a%matrix_struct%descriptor(:)
2216
2217 CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
2218
2219#else
2220 CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
2221#endif
2222
2223 CALL timestop(handle)
2224 END SUBROUTINE cp_fm_triangular_invert
2225
2226! **************************************************************************************************
2227!> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
2228!> the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
2229!> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
2230!> \param matrix_a ...
2231!> \param matrix_r ...
2232!> \param nrow_fact ...
2233!> \param ncol_fact ...
2234!> \param first_row ...
2235!> \param first_col ...
2236!> \author MI
2237! **************************************************************************************************
2238 SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
2239 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
2240 INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
2241 first_row, first_col
2242 CHARACTER, INTENT(IN), OPTIONAL :: uplo
2243
2244 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_qr_factorization'
2245
2246 CHARACTER :: myuplo
2247 INTEGER :: handle, i, icol, info, irow, &
2248 j, lda, lwork, ncol, &
2249 ndim, nrow
2250 REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
2251 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
2252 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2253#if defined(__parallel)
2254 INTEGER, DIMENSION(9) :: desca
2255#endif
2256
2257 CALL timeset(routinen, handle)
2258
2259 myuplo = 'U'
2260 IF (PRESENT(uplo)) myuplo = uplo
2261
2262 ncol = matrix_a%matrix_struct%ncol_global
2263 nrow = matrix_a%matrix_struct%nrow_global
2264 lda = nrow
2265
2266 a => matrix_a%local_data
2267
2268 IF (PRESENT(nrow_fact)) nrow = nrow_fact
2269 IF (PRESENT(ncol_fact)) ncol = ncol_fact
2270 irow = 1
2271 IF (PRESENT(first_row)) irow = first_row
2272 icol = 1
2273 IF (PRESENT(first_col)) icol = first_col
2274
2275 cpassert(nrow >= ncol)
2276 ndim = SIZE(a, 2)
2277 ALLOCATE (tau(ndim))
2278
2279#if defined(__parallel)
2280
2281 desca(:) = matrix_a%matrix_struct%descriptor(:)
2282
2283 lwork = -1
2284 ALLOCATE (work(2*ndim))
2285 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2286 lwork = int(work(1))
2287 DEALLOCATE (work)
2288 ALLOCATE (work(lwork))
2289 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2290
2291#else
2292 lwork = -1
2293 ALLOCATE (work(2*ndim))
2294 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2295 lwork = int(work(1))
2296 DEALLOCATE (work)
2297 ALLOCATE (work(lwork))
2298 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2299
2300#endif
2301
2302 ALLOCATE (r_mat(ncol, ncol))
2303 CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
2304 IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
2305 DO i = 1, ncol
2306 DO j = i + 1, ncol
2307 r_mat(j, i) = 0.0_dp
2308 END DO
2309 END DO
2310 ELSE
2311 DO j = 1, ncol
2312 DO i = j + 1, ncol
2313 r_mat(i, j) = 0.0_dp
2314 END DO
2315 END DO
2316 END IF
2317 CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
2318
2319 DEALLOCATE (tau, work, r_mat)
2320
2321 CALL timestop(handle)
2322
2323 END SUBROUTINE cp_fm_qr_factorization
2324
2325! **************************************************************************************************
2326!> \brief computes the the solution to A*b=A_general using lu decomposition
2327!> \param matrix_a input matrix; will be overwritten
2328!> \param general_a contains the result
2329!> \author Florian Schiffmann
2330! **************************************************************************************************
2331 SUBROUTINE cp_fm_solve(matrix_a, general_a)
2332 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
2333
2334 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_solve'
2335
2336 INTEGER :: handle, info, n, nrhs
2337 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2338 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, a_general
2339#if defined(__parallel)
2340 INTEGER, DIMENSION(9) :: desca, descb
2341#else
2342 INTEGER :: lda, ldb
2343#endif
2344
2345 CALL timeset(routinen, handle)
2346
2347 a => matrix_a%local_data
2348 a_general => general_a%local_data
2349 n = matrix_a%matrix_struct%nrow_global
2350 nrhs = general_a%matrix_struct%ncol_global
2351 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2352
2353#if defined(__parallel)
2354 desca(:) = matrix_a%matrix_struct%descriptor(:)
2355 descb(:) = general_a%matrix_struct%descriptor(:)
2356 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2357 CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
2358 1, 1, descb, info)
2359
2360#else
2361 lda = SIZE(a, 1)
2362 ldb = SIZE(a_general, 1)
2363 CALL dgetrf(n, n, a, lda, ipivot, info)
2364 CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
2365
2366#endif
2367 ! info is allowed to be zero
2368 ! this does just signal a zero diagonal element
2369 DEALLOCATE (ipivot)
2370 CALL timestop(handle)
2371 END SUBROUTINE
2372
2373! **************************************************************************************************
2374!> \brief Convenience function. Computes the matrix multiplications needed
2375!> for the multiplication of complex matrices.
2376!> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
2377!> \param transa : 'N' -> normal 'T' -> transpose
2378!> alpha,beta :: can be 0.0_dp and 1.0_dp
2379!> \param transb ...
2380!> \param m ...
2381!> \param n ...
2382!> \param k ...
2383!> \param alpha ...
2384!> \param A_re m x k matrix ( ! for transa = 'N'), real part
2385!> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
2386!> \param B_re k x n matrix ( ! for transa = 'N'), real part
2387!> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
2388!> \param beta ...
2389!> \param C_re m x n matrix, real part
2390!> \param C_im m x n matrix, imaginary part
2391!> \param a_first_col ...
2392!> \param a_first_row ...
2393!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
2394!> \param b_first_row ...
2395!> \param c_first_col ...
2396!> \param c_first_row ...
2397!> \author Samuel Andermatt
2398!> \note
2399!> C should have no overlap with A, B
2400! **************************************************************************************************
2401 SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
2402 C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
2403 c_first_row)
2404 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
2405 INTEGER, INTENT(IN) :: m, n, k
2406 REAL(kind=dp), INTENT(IN) :: alpha
2407 TYPE(cp_fm_type), INTENT(IN) :: a_re, a_im, b_re, b_im
2408 REAL(kind=dp), INTENT(IN) :: beta
2409 TYPE(cp_fm_type), INTENT(IN) :: c_re, c_im
2410 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
2411 b_first_row, c_first_col, c_first_row
2412
2413 CHARACTER(len=*), PARAMETER :: routinen = 'cp_complex_fm_gemm'
2414
2415 INTEGER :: handle
2416
2417 CALL timeset(routinen, handle)
2418
2419 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_re, beta, c_re, &
2420 a_first_col=a_first_col, &
2421 a_first_row=a_first_row, &
2422 b_first_col=b_first_col, &
2423 b_first_row=b_first_row, &
2424 c_first_col=c_first_col, &
2425 c_first_row=c_first_row)
2426 CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, a_im, b_im, 1.0_dp, c_re, &
2427 a_first_col=a_first_col, &
2428 a_first_row=a_first_row, &
2429 b_first_col=b_first_col, &
2430 b_first_row=b_first_row, &
2431 c_first_col=c_first_col, &
2432 c_first_row=c_first_row)
2433 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_im, beta, c_im, &
2434 a_first_col=a_first_col, &
2435 a_first_row=a_first_row, &
2436 b_first_col=b_first_col, &
2437 b_first_row=b_first_row, &
2438 c_first_col=c_first_col, &
2439 c_first_row=c_first_row)
2440 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_im, b_re, 1.0_dp, c_im, &
2441 a_first_col=a_first_col, &
2442 a_first_row=a_first_row, &
2443 b_first_col=b_first_col, &
2444 b_first_row=b_first_row, &
2445 c_first_col=c_first_col, &
2446 c_first_row=c_first_row)
2447
2448 CALL timestop(handle)
2449
2450 END SUBROUTINE cp_complex_fm_gemm
2451
2452! **************************************************************************************************
2453!> \brief inverts a matrix using LU decomposition
2454!> the input matrix will be overwritten
2455!> \param matrix : input a general square non-singular matrix, outputs its inverse
2456!> \param info_out : optional, if present outputs the info from (p)zgetri
2457!> \author Lianheng Tong
2458! **************************************************************************************************
2459 SUBROUTINE cp_fm_lu_invert(matrix, info_out)
2460 TYPE(cp_fm_type), INTENT(IN) :: matrix
2461 INTEGER, INTENT(OUT), OPTIONAL :: info_out
2462
2463 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_invert'
2464
2465 INTEGER :: nrows_global, handle, info, lwork
2466 INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
2467 REAL(kind=dp), DIMENSION(:, :), POINTER :: mat
2468 REAL(kind=sp), DIMENSION(:, :), POINTER :: mat_sp
2469 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2470 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2471#if defined(__parallel)
2472 INTEGER :: liwork
2473 INTEGER, DIMENSION(9) :: desca
2474 INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
2475#else
2476 INTEGER :: lda
2477#endif
2478
2479 CALL timeset(routinen, handle)
2480
2481 mat => matrix%local_data
2482 mat_sp => matrix%local_data_sp
2483 nrows_global = matrix%matrix_struct%nrow_global
2484 cpassert(nrows_global .EQ. matrix%matrix_struct%ncol_global)
2485 ALLOCATE (ipivot(nrows_global))
2486 ! do LU decomposition
2487#if defined(__parallel)
2488 desca = matrix%matrix_struct%descriptor
2489 IF (matrix%use_sp) THEN
2490 CALL psgetrf(nrows_global, nrows_global, &
2491 mat_sp, 1, 1, desca, ipivot, info)
2492 ELSE
2493 CALL pdgetrf(nrows_global, nrows_global, &
2494 mat, 1, 1, desca, ipivot, info)
2495 END IF
2496#else
2497 lda = SIZE(mat, 1)
2498 IF (matrix%use_sp) THEN
2499 CALL sgetrf(nrows_global, nrows_global, &
2500 mat_sp, lda, ipivot, info)
2501 ELSE
2502 CALL dgetrf(nrows_global, nrows_global, &
2503 mat, lda, ipivot, info)
2504 END IF
2505#endif
2506 IF (info /= 0) THEN
2507 CALL cp_abort(__location__, "LU decomposition has failed")
2508 END IF
2509 ! do inversion
2510 IF (matrix%use_sp) THEN
2511 ALLOCATE (work(1))
2512 ELSE
2513 ALLOCATE (work_sp(1))
2514 END IF
2515#if defined(__parallel)
2516 ALLOCATE (iwork(1))
2517 IF (matrix%use_sp) THEN
2518 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2519 ipivot, work_sp, -1, iwork, -1, info)
2520 lwork = int(work_sp(1))
2521 DEALLOCATE (work_sp)
2522 ALLOCATE (work_sp(lwork))
2523 ELSE
2524 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2525 ipivot, work, -1, iwork, -1, info)
2526 lwork = int(work(1))
2527 DEALLOCATE (work)
2528 ALLOCATE (work(lwork))
2529 END IF
2530 liwork = int(iwork(1))
2531 DEALLOCATE (iwork)
2532 ALLOCATE (iwork(liwork))
2533 IF (matrix%use_sp) THEN
2534 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2535 ipivot, work_sp, lwork, iwork, liwork, info)
2536 ELSE
2537 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2538 ipivot, work, lwork, iwork, liwork, info)
2539 END IF
2540 DEALLOCATE (iwork)
2541#else
2542 IF (matrix%use_sp) THEN
2543 CALL sgetri(nrows_global, mat_sp, lda, &
2544 ipivot, work_sp, -1, info)
2545 lwork = int(work_sp(1))
2546 DEALLOCATE (work_sp)
2547 ALLOCATE (work_sp(lwork))
2548 CALL sgetri(nrows_global, mat_sp, lda, &
2549 ipivot, work_sp, lwork, info)
2550 ELSE
2551 CALL dgetri(nrows_global, mat, lda, &
2552 ipivot, work, -1, info)
2553 lwork = int(work(1))
2554 DEALLOCATE (work)
2555 ALLOCATE (work(lwork))
2556 CALL dgetri(nrows_global, mat, lda, &
2557 ipivot, work, lwork, info)
2558 END IF
2559#endif
2560 IF (matrix%use_sp) THEN
2561 DEALLOCATE (work_sp)
2562 ELSE
2563 DEALLOCATE (work)
2564 END IF
2565 DEALLOCATE (ipivot)
2566
2567 IF (PRESENT(info_out)) THEN
2568 info_out = info
2569 ELSE
2570 IF (info /= 0) &
2571 CALL cp_abort(__location__, "LU inversion has failed")
2572 END IF
2573
2574 CALL timestop(handle)
2575
2576 END SUBROUTINE cp_fm_lu_invert
2577
2578! **************************************************************************************************
2579!> \brief norm of matrix using (p)dlange
2580!> \param matrix : input a general matrix
2581!> \param mode : 'M' max abs element value,
2582!> '1' or 'O' one norm, i.e. maximum column sum
2583!> 'I' infinity norm, i.e. maximum row sum
2584!> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2585!> \return : the norm according to mode
2586!> \author Lianheng Tong
2587! **************************************************************************************************
2588 FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2589 TYPE(cp_fm_type), INTENT(IN) :: matrix
2590 CHARACTER, INTENT(IN) :: mode
2591 REAL(kind=dp) :: res
2592
2593 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_norm'
2594
2595 INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2596 REAL(kind=sp) :: res_sp
2597 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2598 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2599 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2600 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2601#if defined(__parallel)
2602 INTEGER, DIMENSION(9) :: desca
2603#else
2604 INTEGER :: lda
2605#endif
2606
2607 CALL timeset(routinen, handle)
2608
2609 CALL cp_fm_get_info(matrix=matrix, &
2610 nrow_global=nrows, &
2611 ncol_global=ncols, &
2612 nrow_local=nrows_local, &
2613 ncol_local=ncols_local)
2614 aa => matrix%local_data
2615 aa_sp => matrix%local_data_sp
2616
2617#if defined(__parallel)
2618 desca = matrix%matrix_struct%descriptor
2619 SELECT CASE (mode)
2620 CASE ('M', 'm')
2621 lwork = 1
2622 CASE ('1', 'O', 'o')
2623 lwork = ncols_local
2624 CASE ('I', 'i')
2625 lwork = nrows_local
2626 CASE ('F', 'f', 'E', 'e')
2627 lwork = 1
2628 CASE DEFAULT
2629 cpabort("mode input is not valid")
2630 END SELECT
2631 IF (matrix%use_sp) THEN
2632 ALLOCATE (work_sp(lwork))
2633 res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2634 DEALLOCATE (work_sp)
2635 res = real(res_sp, kind=dp)
2636 ELSE
2637 ALLOCATE (work(lwork))
2638 res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2639 DEALLOCATE (work)
2640 END IF
2641#else
2642 SELECT CASE (mode)
2643 CASE ('M', 'm')
2644 lwork = 1
2645 CASE ('1', 'O', 'o')
2646 lwork = 1
2647 CASE ('I', 'i')
2648 lwork = nrows
2649 CASE ('F', 'f', 'E', 'e')
2650 lwork = 1
2651 CASE DEFAULT
2652 cpabort("mode input is not valid")
2653 END SELECT
2654 IF (matrix%use_sp) THEN
2655 ALLOCATE (work_sp(lwork))
2656 lda = SIZE(aa_sp, 1)
2657 res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2658 DEALLOCATE (work_sp)
2659 res = real(res_sp, kind=dp)
2660 ELSE
2661 ALLOCATE (work(lwork))
2662 lda = SIZE(aa, 1)
2663 res = dlange(mode, nrows, ncols, aa, lda, work)
2664 DEALLOCATE (work)
2665 END IF
2666#endif
2667
2668 CALL timestop(handle)
2669
2670 END FUNCTION cp_fm_norm
2671
2672! **************************************************************************************************
2673!> \brief trace of a matrix using pdlatra
2674!> \param matrix : input a square matrix
2675!> \return : the trace
2676!> \author Lianheng Tong
2677! **************************************************************************************************
2678 FUNCTION cp_fm_latra(matrix) RESULT(res)
2679 TYPE(cp_fm_type), INTENT(IN) :: matrix
2680 REAL(kind=dp) :: res
2681
2682 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_latra'
2683
2684 INTEGER :: nrows, ncols, handle
2685 REAL(kind=sp) :: res_sp
2686 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2687 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2688#if defined(__parallel)
2689 INTEGER, DIMENSION(9) :: desca
2690#else
2691 INTEGER :: ii
2692#endif
2693
2694 CALL timeset(routinen, handle)
2695
2696 nrows = matrix%matrix_struct%nrow_global
2697 ncols = matrix%matrix_struct%ncol_global
2698 cpassert(nrows .EQ. ncols)
2699 aa => matrix%local_data
2700 aa_sp => matrix%local_data_sp
2701
2702#if defined(__parallel)
2703 desca = matrix%matrix_struct%descriptor
2704 IF (matrix%use_sp) THEN
2705 res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2706 res = real(res_sp, kind=dp)
2707 ELSE
2708 res = pdlatra(nrows, aa, 1, 1, desca)
2709 END IF
2710#else
2711 IF (matrix%use_sp) THEN
2712 res_sp = 0.0_sp
2713 DO ii = 1, nrows
2714 res_sp = res_sp + aa_sp(ii, ii)
2715 END DO
2716 res = real(res_sp, kind=dp)
2717 ELSE
2718 res = 0.0_dp
2719 DO ii = 1, nrows
2720 res = res + aa(ii, ii)
2721 END DO
2722 END IF
2723#endif
2724
2725 CALL timestop(handle)
2726
2727 END FUNCTION cp_fm_latra
2728
2729! **************************************************************************************************
2730!> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2731!> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2732!> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2733!> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2734!> \param nrow ...
2735!> \param ncol ...
2736!> \param first_row ...
2737!> \param first_col ...
2738!> \author MI
2739! **************************************************************************************************
2740 SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2741
2742 TYPE(cp_fm_type), INTENT(IN) :: matrix
2743 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2744 INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2745
2746 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdgeqpf'
2747
2748 INTEGER :: handle
2749 INTEGER :: info, lwork
2750 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2751 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2752 REAL(kind=dp), DIMENSION(:), POINTER :: work
2753#if defined(__parallel)
2754 INTEGER, DIMENSION(9) :: descc
2755#else
2756 INTEGER :: lda
2757#endif
2758
2759 CALL timeset(routinen, handle)
2760
2761 a => matrix%local_data
2762 lwork = -1
2763 ALLOCATE (work(2*nrow))
2764 ALLOCATE (ipiv(ncol))
2765 info = 0
2766
2767#if defined(__parallel)
2768 descc(:) = matrix%matrix_struct%descriptor(:)
2769 ! Call SCALAPACK routine to get optimal work dimension
2770 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2771 lwork = int(work(1))
2772 DEALLOCATE (work)
2773 ALLOCATE (work(lwork))
2774 tau = 0.0_dp
2775 ipiv = 0
2776
2777 ! Call SCALAPACK routine to get QR decomposition of CTs
2778 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2779#else
2780 cpassert(first_row == 1 .AND. first_col == 1)
2781 lda = SIZE(a, 1)
2782 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2783 lwork = int(work(1))
2784 DEALLOCATE (work)
2785 ALLOCATE (work(lwork))
2786 tau = 0.0_dp
2787 ipiv = 0
2788 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2789#endif
2790 cpassert(info == 0)
2791
2792 DEALLOCATE (work)
2793 DEALLOCATE (ipiv)
2794
2795 CALL timestop(handle)
2796
2797 END SUBROUTINE cp_fm_pdgeqpf
2798
2799! **************************************************************************************************
2800!> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2801!> with orthonormal columns, which is defined as the first N columns of a product of K
2802!> elementary reflectors of order M
2803!> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2804!> as returned from PDGEQRF
2805!> On exit it contains the M-by-N distributed matrix Q
2806!> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2807!> \param nrow ...
2808!> \param first_row ...
2809!> \param first_col ...
2810!> \author MI
2811! **************************************************************************************************
2812 SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2813
2814 TYPE(cp_fm_type), INTENT(IN) :: matrix
2815 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2816 INTEGER, INTENT(IN) :: nrow, first_row, first_col
2817
2818 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdorgqr'
2819
2820 INTEGER :: handle
2821 INTEGER :: info, lwork
2822 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2823 REAL(kind=dp), DIMENSION(:), POINTER :: work
2824#if defined(__parallel)
2825 INTEGER, DIMENSION(9) :: descc
2826#else
2827 INTEGER :: lda
2828#endif
2829
2830 CALL timeset(routinen, handle)
2831
2832 a => matrix%local_data
2833 lwork = -1
2834 ALLOCATE (work(2*nrow))
2835 info = 0
2836
2837#if defined(__parallel)
2838 descc(:) = matrix%matrix_struct%descriptor(:)
2839
2840 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2841 cpassert(info == 0)
2842 lwork = int(work(1))
2843 DEALLOCATE (work)
2844 ALLOCATE (work(lwork))
2845
2846 ! Call SCALAPACK routine to get Q
2847 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2848#else
2849 cpassert(first_row == 1 .AND. first_col == 1)
2850 lda = SIZE(a, 1)
2851 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2852 lwork = int(work(1))
2853 DEALLOCATE (work)
2854 ALLOCATE (work(lwork))
2855 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2856#endif
2857 cpassert(info == 0)
2858
2859 DEALLOCATE (work)
2860 CALL timestop(handle)
2861
2862 END SUBROUTINE cp_fm_pdorgqr
2863
2864! **************************************************************************************************
2865!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2866!> \param matrix ...
2867!> \param irow ...
2868!> \param jrow ...
2869!> \param cs cosine of the rotation angle
2870!> \param sn sinus of the rotation angle
2871!> \author Ole Schuett
2872! **************************************************************************************************
2873 SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2874 TYPE(cp_fm_type), INTENT(IN) :: matrix
2875 INTEGER, INTENT(IN) :: irow, jrow
2876 REAL(dp), INTENT(IN) :: cs, sn
2877
2878 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_rows'
2879 INTEGER :: handle, ncol
2880
2881#if defined(__parallel)
2882 INTEGER :: info, lwork
2883 INTEGER, DIMENSION(9) :: desc
2884 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2885#endif
2886 CALL timeset(routinen, handle)
2887 CALL cp_fm_get_info(matrix, ncol_global=ncol)
2888#if defined(__parallel)
2889 IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2890 lwork = 2*ncol + 1
2891 ALLOCATE (work(lwork))
2892 desc(:) = matrix%matrix_struct%descriptor(:)
2893 CALL pdrot(ncol, &
2894 matrix%local_data(1, 1), irow, 1, desc, ncol, &
2895 matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2896 cs, sn, work, lwork, info)
2897 cpassert(info == 0)
2898 DEALLOCATE (work)
2899 ELSE
2900#endif
2901 CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2902#if defined(__parallel)
2903 END IF
2904#endif
2905 CALL timestop(handle)
2906 END SUBROUTINE cp_fm_rot_rows
2907
2908! **************************************************************************************************
2909!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2910!> \param matrix ...
2911!> \param icol ...
2912!> \param jcol ...
2913!> \param cs cosine of the rotation angle
2914!> \param sn sinus of the rotation angle
2915!> \author Ole Schuett
2916! **************************************************************************************************
2917 SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2918 TYPE(cp_fm_type), INTENT(IN) :: matrix
2919 INTEGER, INTENT(IN) :: icol, jcol
2920 REAL(dp), INTENT(IN) :: cs, sn
2921
2922 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_cols'
2923 INTEGER :: handle, nrow
2924
2925#if defined(__parallel)
2926 INTEGER :: info, lwork
2927 INTEGER, DIMENSION(9) :: desc
2928 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2929#endif
2930 CALL timeset(routinen, handle)
2931 CALL cp_fm_get_info(matrix, nrow_global=nrow)
2932#if defined(__parallel)
2933 IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2934 lwork = 2*nrow + 1
2935 ALLOCATE (work(lwork))
2936 desc(:) = matrix%matrix_struct%descriptor(:)
2937 CALL pdrot(nrow, &
2938 matrix%local_data(1, 1), 1, icol, desc, 1, &
2939 matrix%local_data(1, 1), 1, jcol, desc, 1, &
2940 cs, sn, work, lwork, info)
2941 cpassert(info == 0)
2942 DEALLOCATE (work)
2943 ELSE
2944#endif
2945 CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
2946#if defined(__parallel)
2947 END IF
2948#endif
2949 CALL timestop(handle)
2950 END SUBROUTINE cp_fm_rot_cols
2951
2952! **************************************************************************************************
2953!> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
2954!> \param matrix_a ...
2955!> \param B ...
2956!> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
2957!> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
2958!> \param start_row starting index of rows, optional, defaults to 1
2959!> \param start_col starting index of columns, optional, defaults to 1
2960!> \param do_norm ...
2961!> \param do_print ...
2962! **************************************************************************************************
2963 SUBROUTINE cp_fm_gram_schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
2964 do_norm, do_print)
2965
2966 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2967 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: b
2968 INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
2969 LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
2970
2971 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_Gram_Schmidt_orthonorm'
2972
2973 INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
2974 j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
2975 start_col_local, start_row_global, start_row_local, this_col, unit_nr
2976 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2977 LOGICAL :: my_do_norm, my_do_print
2978 REAL(kind=dp) :: norm
2979 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2980
2981 CALL timeset(routinen, handle)
2982
2983 my_do_norm = .true.
2984 IF (PRESENT(do_norm)) my_do_norm = do_norm
2985
2986 my_do_print = .false.
2987 IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
2988
2989 unit_nr = -1
2990 IF (my_do_print) THEN
2992 IF (unit_nr < 1) my_do_print = .false.
2993 END IF
2994
2995 IF (SIZE(b) /= 0) THEN
2996 IF (PRESENT(nrows)) THEN
2997 nrow_global = nrows
2998 ELSE
2999 nrow_global = SIZE(b, 1)
3000 END IF
3001
3002 IF (PRESENT(ncols)) THEN
3003 ncol_global = ncols
3004 ELSE
3005 ncol_global = SIZE(b, 2)
3006 END IF
3007
3008 IF (PRESENT(start_row)) THEN
3009 start_row_global = start_row
3010 ELSE
3011 start_row_global = 1
3012 END IF
3013
3014 IF (PRESENT(start_col)) THEN
3015 start_col_global = start_col
3016 ELSE
3017 start_col_global = 1
3018 END IF
3019
3020 end_row_global = start_row_global + nrow_global - 1
3021 end_col_global = start_col_global + ncol_global - 1
3022
3023 CALL cp_fm_get_info(matrix=matrix_a, &
3024 nrow_global=nrow_global, ncol_global=ncol_global, &
3025 nrow_local=nrow_local, ncol_local=ncol_local, &
3026 row_indices=row_indices, col_indices=col_indices)
3027 IF (end_row_global > nrow_global) THEN
3028 end_row_global = nrow_global
3029 END IF
3030 IF (end_col_global > ncol_global) THEN
3031 end_col_global = ncol_global
3032 END IF
3033
3034 ! find out row/column indices of locally stored matrix elements that
3035 ! needs to be copied.
3036 ! Arrays row_indices and col_indices are assumed to be sorted in
3037 ! ascending order
3038 DO start_row_local = 1, nrow_local
3039 IF (row_indices(start_row_local) >= start_row_global) EXIT
3040 END DO
3041
3042 DO end_row_local = start_row_local, nrow_local
3043 IF (row_indices(end_row_local) > end_row_global) EXIT
3044 END DO
3045 end_row_local = end_row_local - 1
3046
3047 DO start_col_local = 1, ncol_local
3048 IF (col_indices(start_col_local) >= start_col_global) EXIT
3049 END DO
3050
3051 DO end_col_local = start_col_local, ncol_local
3052 IF (col_indices(end_col_local) > end_col_global) EXIT
3053 END DO
3054 end_col_local = end_col_local - 1
3055
3056 a => matrix_a%local_data
3057
3058 this_col = col_indices(start_col_local) - start_col_global + 1
3059
3060 b(:, this_col) = a(:, start_col_local)
3061
3062 IF (my_do_norm) THEN
3063 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3064 b(:, this_col) = b(:, this_col)/norm
3065 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3066 END IF
3067
3068 DO i = start_col_local + 1, end_col_local
3069 this_col = col_indices(i) - start_col_global + 1
3070 b(:, this_col) = a(:, i)
3071 DO j = start_col_local, i - 1
3072 j_col = col_indices(j) - start_col_global + 1
3073 b(:, this_col) = b(:, this_col) - &
3074 accurate_dot_product(b(:, j_col), b(:, this_col))* &
3075 b(:, j_col)/accurate_dot_product(b(:, j_col), b(:, j_col))
3076 END DO
3077
3078 IF (my_do_norm) THEN
3079 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3080 b(:, this_col) = b(:, this_col)/norm
3081 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3082 END IF
3083
3084 END DO
3085 CALL matrix_a%matrix_struct%para_env%sum(b)
3086 END IF
3087
3088 CALL timestop(handle)
3089
3090 END SUBROUTINE cp_fm_gram_schmidt_orthonorm
3091
3092! **************************************************************************************************
3093!> \brief Cholesky decomposition
3094!> \param fm_matrix ...
3095!> \param n ...
3096!> \param uplo triangular format; defaults to 'U'
3097! **************************************************************************************************
3098 SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
3099 TYPE(cp_fm_type) :: fm_matrix
3100 INTEGER, INTENT(IN) :: n
3101 CHARACTER, INTENT(IN), OPTIONAL :: uplo
3102
3103 CHARACTER :: myuplo
3104 INTEGER :: info
3105 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3106 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3107#if defined(__parallel)
3108 INTEGER, DIMENSION(9) :: desca
3109#endif
3110
3111 myuplo = 'U'
3112 IF (PRESENT(uplo)) myuplo = uplo
3113
3114 a => fm_matrix%local_data
3115 a_sp => fm_matrix%local_data_sp
3116#if defined(__parallel)
3117 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3118 IF (fm_matrix%use_sp) THEN
3119 CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
3120 ELSE
3121 CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
3122 END IF
3123#else
3124 IF (fm_matrix%use_sp) THEN
3125 CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
3126 ELSE
3127 CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
3128 END IF
3129#endif
3130 IF (info /= 0) &
3131 cpabort("Cholesky decomposition failed. Matrix ill-conditioned?")
3132
3133 END SUBROUTINE cp_fm_potrf
3134
3135! **************************************************************************************************
3136!> \brief Invert trianguar matrix
3137!> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
3138!> \param n size of the matrix to invert
3139!> \param uplo triangular format; defaults to 'U'
3140! **************************************************************************************************
3141 SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
3142 TYPE(cp_fm_type) :: fm_matrix
3143 INTEGER, INTENT(IN) :: n
3144 CHARACTER, INTENT(IN), OPTIONAL :: uplo
3145
3146 CHARACTER :: myuplo
3147 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3148 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3149 INTEGER :: info
3150#if defined(__parallel)
3151 INTEGER, DIMENSION(9) :: desca
3152#endif
3153
3154 myuplo = 'U'
3155 IF (PRESENT(uplo)) myuplo = uplo
3156
3157 a => fm_matrix%local_data
3158 a_sp => fm_matrix%local_data_sp
3159#if defined(__parallel)
3160 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3161 IF (fm_matrix%use_sp) THEN
3162 CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
3163 ELSE
3164 CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
3165 END IF
3166#else
3167 IF (fm_matrix%use_sp) THEN
3168 CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
3169 ELSE
3170 CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
3171 END IF
3172#endif
3173 cpassert(info == 0)
3174 END SUBROUTINE cp_fm_potri
3175
3176! **************************************************************************************************
3177!> \brief Calculates
3178!> yv = alpha*amat*xv + beta*yv
3179!> where amat: fm matrix
3180!> xv : vector replicated
3181!> yv : vector replicated
3182!> Defaults: alpha = 1, beta = 0
3183! **************************************************************************************************
3184 SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
3185 TYPE(cp_fm_type), INTENT(IN) :: amat
3186 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: xv
3187 REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: yv
3188 REAL(kind=dp), OPTIONAL, INTENT(IN) :: alpha, beta
3189
3190 INTEGER :: na, nc, nx, ny
3191 REAL(kind=dp) :: aval, bval
3192#if defined(__parallel)
3193 INTEGER :: nrl, ncl, ic, ir
3194 INTEGER, DIMENSION(:), POINTER :: rind, cind
3195 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: xvl, yvl, yvm
3196#endif
3197
3198 IF (amat%use_sp) THEN
3199 cpabort("cp_fm_matvec: SP option not available")
3200 END IF
3201 aval = 1.0_dp
3202 IF (PRESENT(alpha)) aval = alpha
3203 bval = 0.0_dp
3204 IF (PRESENT(beta)) bval = beta
3205
3206 CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
3207 nx = SIZE(xv)
3208 ny = SIZE(yv)
3209 IF ((nx /= ny) .OR. (nc /= nx)) THEN
3210 cpabort("cp_fm_matvec: incompatible dimensions")
3211 END IF
3212#if defined(__parallel)
3213 CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
3214 row_indices=rind, col_indices=cind)
3215 ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
3216 DO ic = 1, ncl
3217 xvl(ic) = xv(cind(ic))
3218 END DO
3219 yvl(1:nrl) = matmul(amat%local_data, xvl(1:ncl))
3220 yvm = 0.0_dp
3221 DO ir = 1, nrl
3222 yvm(rind(ir)) = yvl(ir)
3223 END DO
3224 CALL amat%matrix_struct%para_env%sum(yvm)
3225 IF (bval == 0.0_dp) THEN
3226 yv = aval*yvm
3227 ELSE
3228 yv = bval*yv + aval*yvm
3229 END IF
3230#else
3231 IF (bval == 0.0_dp) THEN
3232 yv = aval*matmul(amat%local_data, xv)
3233 ELSE
3234 yv = bval*yv + aval*matmul(amat%local_data, xv)
3235 END IF
3236#endif
3237
3238 END SUBROUTINE cp_fm_matvec
3239
3240END MODULE cp_fm_basic_linalg
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
methods related to the blacs parallel environment
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_fm_row_scale(matrixa, scaling)
scales row i of matrix a with scaling(i)
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_fm_solve(matrix_a, general_a)
computes the the solution to A*b=A_general using lu decomposition
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_det(matrix_a, det_a)
Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix.
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
performs a QR factorization of the input rectangular matrix A or of a submatrix of A the computed tri...
subroutine, public cp_fm_gram_schmidt_orthonorm(matrix_a, b, nrows, ncols, start_row, start_col, do_norm, do_print)
Orthonormalizes selected rows and columns of a full matrix, matrix_a.
subroutine, public cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
performs a rank-k update of a symmetric matrix_c matrix_c = beta * matrix_c + alpha * matrix_a * tran...
subroutine, public cp_fm_potrf(fm_matrix, n, uplo)
Cholesky decomposition.
subroutine, public cp_fm_potri(fm_matrix, n, uplo)
Invert trianguar matrix.
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
computes the schur product of two matrices c_ij = a_ij * b_ij
real(kind=dp) function, public cp_fm_norm(matrix, mode)
norm of matrix using (p)dlange
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
subroutine, public cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix.
subroutine, public cp_complex_fm_gemm(transa, transb, m, n, k, alpha, a_re, a_im, b_re, b_im, beta, c_re, c_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
Convenience function. Computes the matrix multiplications needed for the multiplication of complex ma...
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_invert(matrix_a, uplo_tr)
inverts a triangular matrix
subroutine, public cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b computes matrix_c = beta * matrix_c...
subroutine, public cp_fm_matvec(amat, xv, yv, alpha, beta)
Calculates yv = alpha*amat*xv + beta*yv where amat: fm matrix xv : vector replicated yv : vector repl...
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
represent the structure of a full matrix
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:443
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
subroutine, public get_pseudo_inverse_svd(a, a_pinverse, rskip, determinant, sval)
returns the pseudoinverse of a real, square matrix using singular value decomposition
Definition mathlib.F:938
subroutine, public diag(n, a, d, v)
Diagonalize matrix a. The eigenvalues are returned in vector d and the eigenvectors are returned in m...
Definition mathlib.F:1496
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
just to build arrays of pointers to matrices
represent a full matrix