(git:374b731)
Loading...
Searching...
No Matches
cp_fm_basic_linalg.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief basic linear algebra operations for full matrices
10!> \par History
11!> 08.2002 split out of qs_blacs [fawzi]
12!> \author Fawzi Mohamed
13! **************************************************************************************************
17 USE cp_fm_types, ONLY: &
25 USE kinds, ONLY: dp, &
26 int_8, &
27 sp
28 USE machine, ONLY: m_memory
32#include "../base/base_uses.f90"
33
34 IMPLICIT NONE
35 PRIVATE
36
37 LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .true.
38 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
39
40 PUBLIC :: cp_fm_scale, & ! scale a matrix
41 cp_fm_scale_and_add, & ! scale and add two matrices
42 cp_fm_geadd, & ! general addition
43 cp_fm_column_scale, & ! scale columns of a matrix
44 cp_fm_row_scale, & ! scale rows of a matrix
45 cp_fm_trace, & ! trace of the transpose(A)*B
46 cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
47 cp_fm_norm, & ! different norms of A
48 cp_fm_schur_product, & ! schur product
49 cp_fm_transpose, & ! transpose a matrix
50 cp_fm_upper_to_full, & ! symmetrise an upper symmetric matrix
51 cp_fm_syrk, & ! rank k update
52 cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
53 cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
54 cp_fm_gemm, & ! multiply two matrices
55 cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
56 cp_fm_invert, & ! computes the inverse and determinant
57 cp_fm_frobenius_norm, & ! frobenius norm
58 cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
59 cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
60 cp_fm_solve, & ! solves the equation A*B=C A and C are input
61 cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
62 cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
63 cp_fm_potrf, & ! Cholesky decomposition
64 cp_fm_potri, & ! Invert triangular matrix
65 cp_fm_rot_rows, & ! rotates two rows
66 cp_fm_rot_cols, & ! rotates two columns
67 cp_fm_cholesky_restore, & ! apply Cholesky decomposition
68 cp_fm_gram_schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
69 cp_fm_det ! determinant of a real matrix with correct sign
70
71 REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
72 REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
73
74 INTERFACE cp_fm_trace
75 MODULE PROCEDURE cp_fm_trace_a0b0t0
76 MODULE PROCEDURE cp_fm_trace_a1b0t1_a
77 MODULE PROCEDURE cp_fm_trace_a1b0t1_p
78 MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
79 MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
80 MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
81 MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
82 END INTERFACE cp_fm_trace
83
85 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
86 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
87 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
88 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
89 END INTERFACE cp_fm_contracted_trace
90CONTAINS
91
92! **************************************************************************************************
93!> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
94!> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
95! **************************************************************************************************
96 SUBROUTINE cp_fm_det(matrix_a, det_a)
97
98 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
99 REAL(kind=dp), INTENT(OUT) :: det_a
100 REAL(kind=dp) :: determinant
101 TYPE(cp_fm_type) :: matrix_lu
102 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
103 INTEGER :: n, i, info, p
104 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
105 REAL(kind=dp), DIMENSION(:), POINTER :: diag
106
107#if defined(__SCALAPACK)
108 INTEGER, EXTERNAL :: indxl2g
109 INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
110 INTEGER, DIMENSION(9) :: desca
111#endif
112
113 CALL cp_fm_create(matrix=matrix_lu, &
114 matrix_struct=matrix_a%matrix_struct, &
115 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
116 CALL cp_fm_to_fm(matrix_a, matrix_lu)
117
118 a => matrix_lu%local_data
119 n = matrix_lu%matrix_struct%nrow_global
120 ALLOCATE (ipivot(n))
121 ipivot(:) = 0
122 p = 0
123 ALLOCATE (diag(n))
124 diag(:) = 0.0_dp
125#if defined(__SCALAPACK)
126 ! Use LU decomposition
127 desca(:) = matrix_lu%matrix_struct%descriptor(:)
128 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
129 CALL cp_fm_get_diag(matrix_lu, diag)
130 determinant = product(diag)
131 myprow = matrix_lu%matrix_struct%context%mepos(1)
132 nprow = matrix_lu%matrix_struct%context%num_pe(1)
133 npcol = matrix_lu%matrix_struct%context%num_pe(2)
134 nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
135 nrow_block = matrix_lu%matrix_struct%nrow_block
136 DO irow_local = 1, nrow_local
137 i = indxl2g(irow_local, nrow_block, myprow, matrix_lu%matrix_struct%first_p_pos(1), nprow)
138 IF (ipivot(irow_local) /= i) p = p + 1
139 END DO
140 CALL matrix_lu%matrix_struct%para_env%sum(p)
141 ! very important fix
142 p = p/npcol
143#else
144 CALL dgetrf(n, n, a, n, ipivot, info)
145 CALL cp_fm_get_diag(matrix_lu, diag)
146 determinant = product(diag)
147 DO i = 1, n
148 IF (ipivot(i) /= i) p = p + 1
149 END DO
150#endif
151 DEALLOCATE (ipivot)
152 DEALLOCATE (diag)
153 CALL cp_fm_release(matrix_lu)
154 det_a = determinant*(-2*mod(p, 2) + 1.0_dp)
155 END SUBROUTINE cp_fm_det
156
157! **************************************************************************************************
158!> \brief calc A <- alpha*A + beta*B
159!> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
160!> scale A)
161!> \param alpha ...
162!> \param matrix_a ...
163!> \param beta ...
164!> \param matrix_b ...
165! **************************************************************************************************
166 SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
167
168 REAL(kind=dp), INTENT(IN) :: alpha
169 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
170 REAL(kind=dp), INTENT(in), OPTIONAL :: beta
171 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
172
173 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale_and_add'
174
175 INTEGER :: handle, size_a, size_b
176 REAL(kind=dp) :: my_beta
177 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
178 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
179
180 CALL timeset(routinen, handle)
181
182 IF (PRESENT(matrix_b)) THEN
183 my_beta = 1.0_dp
184 ELSE
185 my_beta = 0.0_dp
186 END IF
187 IF (PRESENT(beta)) my_beta = beta
188 NULLIFY (a, b)
189
190 IF (PRESENT(beta)) THEN
191 cpassert(PRESENT(matrix_b))
192 IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
193 cpwarn("Bad use of routine. Call cp_fm_scale instead")
194 CALL cp_fm_scale(alpha + beta, matrix_a)
195 CALL timestop(handle)
196 RETURN
197 END IF
198 END IF
199
200 a => matrix_a%local_data
201 a_sp => matrix_a%local_data_sp
202
203 IF (matrix_a%use_sp) THEN
204 size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
205 ELSE
206 size_a = SIZE(a, 1)*SIZE(a, 2)
207 END IF
208
209 IF (alpha /= 1.0_dp) THEN
210 IF (matrix_a%use_sp) THEN
211 CALL sscal(size_a, real(alpha, sp), a_sp, 1)
212 ELSE
213 CALL dscal(size_a, alpha, a, 1)
214 END IF
215 END IF
216 IF (my_beta .NE. 0.0_dp) THEN
217 IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
218 cpabort("matrixes must be in the same blacs context")
219
220 IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
221 matrix_b%matrix_struct)) THEN
222
223 b => matrix_b%local_data
224 b_sp => matrix_b%local_data_sp
225 IF (matrix_b%use_sp) THEN
226 size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
227 ELSE
228 size_b = SIZE(b, 1)*SIZE(b, 2)
229 END IF
230 IF (size_a /= size_b) &
231 cpabort("Matrixes must have same locale sizes")
232
233 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
234 CALL saxpy(size_a, real(my_beta, sp), b_sp, 1, a_sp, 1)
235 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
236 CALL saxpy(size_a, real(my_beta, sp), real(b, sp), 1, a_sp, 1)
237 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
238 CALL daxpy(size_a, my_beta, real(b_sp, dp), 1, a, 1)
239 ELSE
240 CALL daxpy(size_a, my_beta, b, 1, a, 1)
241 END IF
242
243 ELSE
244#ifdef __SCALAPACK
245 cpabort("to do (pdscal,pdcopy,pdaxpy)")
246#else
247 cpabort("")
248#endif
249 END IF
250
251 END IF
252
253 CALL timestop(handle)
254
255 END SUBROUTINE cp_fm_scale_and_add
256
257! **************************************************************************************************
258!> \brief interface to BLACS geadd:
259!> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
260!> where opt(matrix_a) can be either:
261!> 'N': matrix_a
262!> 'T': matrix_a^T
263!> 'C': matrix_a^H (Hermitian conjugate)
264!> note that this is a level three routine, use cp_fm_scale_and_add if that
265!> is sufficient for your needs
266!> \param alpha : complex scalar
267!> \param trans : 'N' normal, 'T' transposed
268!> \param matrix_a : input matrix_a
269!> \param beta : complex scalar
270!> \param matrix_b : input matrix_b, upon out put the updated matrix_b
271!> \author Lianheng Tong
272! **************************************************************************************************
273 SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
274 REAL(kind=dp), INTENT(IN) :: alpha, beta
275 CHARACTER, INTENT(IN) :: trans
276 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
277
278 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geadd'
279
280 INTEGER :: nrow_global, ncol_global, handle
281 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
282#if defined(__SCALAPACK)
283 INTEGER, DIMENSION(9) :: desca, descb
284#else
285 INTEGER :: ii, jj
286#endif
287
288 CALL timeset(routinen, handle)
289
290 nrow_global = matrix_a%matrix_struct%nrow_global
291 ncol_global = matrix_a%matrix_struct%ncol_global
292 cpassert(nrow_global .EQ. matrix_b%matrix_struct%nrow_global)
293 cpassert(ncol_global .EQ. matrix_b%matrix_struct%ncol_global)
294
295 aa => matrix_a%local_data
296 bb => matrix_b%local_data
297
298#if defined(__SCALAPACK)
299 desca = matrix_a%matrix_struct%descriptor
300 descb = matrix_b%matrix_struct%descriptor
301 CALL pdgeadd(trans, &
302 nrow_global, &
303 ncol_global, &
304 alpha, &
305 aa, &
306 1, 1, &
307 desca, &
308 beta, &
309 bb, &
310 1, 1, &
311 descb)
312#else
313 ! dgeadd is not a standard BLAS function, although is implemented
314 ! in some libraries like OpenBLAS, so not going to use it here
315 SELECT CASE (trans)
316 CASE ('T')
317 DO jj = 1, ncol_global
318 DO ii = 1, nrow_global
319 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
320 END DO
321 END DO
322 CASE DEFAULT
323 DO jj = 1, ncol_global
324 DO ii = 1, nrow_global
325 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
326 END DO
327 END DO
328 END SELECT
329#endif
330
331 CALL timestop(handle)
332
333 END SUBROUTINE cp_fm_geadd
334
335! **************************************************************************************************
336!> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
337!> IMPORTANT : the sign of the determinant is not defined correctly yet ....
338!> \param matrix_a ...
339!> \param almost_determinant ...
340!> \param correct_sign ...
341!> \par History
342!> added correct_sign 02.07 (fschiff)
343!> \author Joost VandeVondele
344!> \note
345!> - matrix_a is overwritten
346!> - the sign of the determinant might be wrong
347!> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
348!> - one should be able to find out if ipivot is an even or an odd permutation...
349!> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
350!> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
351! **************************************************************************************************
352 SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
353 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
354 REAL(kind=dp), INTENT(OUT) :: almost_determinant
355 LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
356
357 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_decompose'
358
359 INTEGER :: handle, i, info, n
360 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
361 REAL(kind=dp) :: determinant
362 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
363#if defined(__SCALAPACK)
364 INTEGER, DIMENSION(9) :: desca
365 REAL(kind=dp), DIMENSION(:), POINTER :: diag
366#else
367 INTEGER :: lda
368#endif
369
370 CALL timeset(routinen, handle)
371
372 a => matrix_a%local_data
373 n = matrix_a%matrix_struct%nrow_global
374 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
375
376#if defined(__SCALAPACK)
377 mark_used(correct_sign)
378 desca(:) = matrix_a%matrix_struct%descriptor(:)
379 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
380
381 ALLOCATE (diag(n))
382 diag(:) = 0.0_dp
383 CALL cp_fm_get_diag(matrix_a, diag)
384 determinant = 1.0_dp
385 DO i = 1, n
386 determinant = determinant*diag(i)
387 END DO
388 DEALLOCATE (diag)
389#else
390 lda = SIZE(a, 1)
391 CALL dgetrf(n, n, a, lda, ipivot, info)
392 determinant = 1.0_dp
393 IF (correct_sign) THEN
394 DO i = 1, n
395 IF (ipivot(i) .NE. i) THEN
396 determinant = -determinant*a(i, i)
397 ELSE
398 determinant = determinant*a(i, i)
399 END IF
400 END DO
401 ELSE
402 DO i = 1, n
403 determinant = determinant*a(i, i)
404 END DO
405 END IF
406#endif
407 ! info is allowed to be zero
408 ! this does just signal a zero diagonal element
409 DEALLOCATE (ipivot)
410 almost_determinant = determinant ! notice that the sign is random
411 CALL timestop(handle)
412 END SUBROUTINE
413
414! **************************************************************************************************
415!> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
416!> \param transa : 'N' -> normal 'T' -> transpose
417!> alpha,beta :: can be 0.0_dp and 1.0_dp
418!> \param transb ...
419!> \param m ...
420!> \param n ...
421!> \param k ...
422!> \param alpha ...
423!> \param matrix_a : m x k matrix ( ! for transa = 'N')
424!> \param matrix_b : k x n matrix ( ! for transb = 'N')
425!> \param beta ...
426!> \param matrix_c : m x n matrix
427!> \param a_first_col ...
428!> \param a_first_row ...
429!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
430!> \param b_first_row ...
431!> \param c_first_col ...
432!> \param c_first_row ...
433!> \author Matthias Krack
434!> \note
435!> matrix_c should have no overlap with matrix_a, matrix_b
436! **************************************************************************************************
437 SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
438 matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
439 c_first_col, c_first_row)
440
441 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
442 INTEGER, INTENT(IN) :: m, n, k
443 REAL(kind=dp), INTENT(IN) :: alpha
444 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
445 REAL(kind=dp), INTENT(IN) :: beta
446 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
447 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
448 b_first_col, b_first_row, &
449 c_first_col, c_first_row
450
451 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_gemm'
452
453 INTEGER :: handle, i_a, i_b, i_c, j_a, &
454 j_b, j_c
455 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
456 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
457#if defined(__SCALAPACK)
458 INTEGER, DIMENSION(9) :: desca, descb, descc
459#else
460 INTEGER :: lda, ldb, ldc
461#endif
462
463 CALL timeset(routinen, handle)
464
465 !sample peak memory
466 CALL m_memory()
467
468 a => matrix_a%local_data
469 b => matrix_b%local_data
470 c => matrix_c%local_data
471
472 a_sp => matrix_a%local_data_sp
473 b_sp => matrix_b%local_data_sp
474 c_sp => matrix_c%local_data_sp
475
476 IF (PRESENT(a_first_row)) THEN
477 i_a = a_first_row
478 ELSE
479 i_a = 1
480 END IF
481 IF (PRESENT(a_first_col)) THEN
482 j_a = a_first_col
483 ELSE
484 j_a = 1
485 END IF
486 IF (PRESENT(b_first_row)) THEN
487 i_b = b_first_row
488 ELSE
489 i_b = 1
490 END IF
491 IF (PRESENT(b_first_col)) THEN
492 j_b = b_first_col
493 ELSE
494 j_b = 1
495 END IF
496 IF (PRESENT(c_first_row)) THEN
497 i_c = c_first_row
498 ELSE
499 i_c = 1
500 END IF
501 IF (PRESENT(c_first_col)) THEN
502 j_c = c_first_col
503 ELSE
504 j_c = 1
505 END IF
506
507#if defined(__SCALAPACK)
508
509 desca(:) = matrix_a%matrix_struct%descriptor(:)
510 descb(:) = matrix_b%matrix_struct%descriptor(:)
511 descc(:) = matrix_c%matrix_struct%descriptor(:)
512
513 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
514
515 CALL psgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
516 descb, real(beta, sp), c_sp(1, 1), i_c, j_c, descc)
517
518 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
519
520 CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
521 descb, beta, c, i_c, j_c, descc)
522
523 ELSE
524 cpabort("Mixed precision gemm NYI")
525 END IF
526#else
527
528 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
529
530 lda = SIZE(a_sp, 1)
531 ldb = SIZE(b_sp, 1)
532 ldc = SIZE(c_sp, 1)
533
534 CALL sgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
535 REAL(beta, sp), c_sp(i_c, j_c), ldc)
536
537 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
538
539 lda = SIZE(a, 1)
540 ldb = SIZE(b, 1)
541 ldc = SIZE(c, 1)
542
543 CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
544
545 ELSE
546 cpabort("Mixed precision gemm NYI")
547 END IF
548
549#endif
550 CALL timestop(handle)
551
552 END SUBROUTINE cp_fm_gemm
553
554! **************************************************************************************************
555!> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
556!> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
557!> where matrix_a is symmetric
558!> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
559!> alpha,beta :: can be 0.0_dp and 1.0_dp
560!> \param uplo ...
561!> \param m ...
562!> \param n ...
563!> \param alpha ...
564!> \param matrix_a : m x m matrix
565!> \param matrix_b : m x n matrix
566!> \param beta ...
567!> \param matrix_c : m x n matrix
568!> \author Matthias Krack
569!> \note
570!> matrix_c should have no overlap with matrix_a, matrix_b
571!> all matrices in QS are upper triangular, so uplo should be 'U' always
572!> matrix_a is always an m x m matrix
573!> it is typically slower to do cp_fm_symm than cp_fm_gemm (especially in parallel easily 50 percent !)
574! **************************************************************************************************
575 SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
576
577 CHARACTER(LEN=1), INTENT(IN) :: side, uplo
578 INTEGER, INTENT(IN) :: m, n
579 REAL(kind=dp), INTENT(IN) :: alpha
580 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
581 REAL(kind=dp), INTENT(IN) :: beta
582 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
583
584 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_symm'
585
586 INTEGER :: handle
587 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
588#if defined(__SCALAPACK)
589 INTEGER, DIMENSION(9) :: desca, descb, descc
590#else
591 INTEGER :: lda, ldb, ldc
592#endif
593
594 CALL timeset(routinen, handle)
595
596 a => matrix_a%local_data
597 b => matrix_b%local_data
598 c => matrix_c%local_data
599
600#if defined(__SCALAPACK)
601
602 desca(:) = matrix_a%matrix_struct%descriptor(:)
603 descb(:) = matrix_b%matrix_struct%descriptor(:)
604 descc(:) = matrix_c%matrix_struct%descriptor(:)
605
606 CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
607
608#else
609
610 lda = matrix_a%matrix_struct%local_leading_dimension
611 ldb = matrix_b%matrix_struct%local_leading_dimension
612 ldc = matrix_c%matrix_struct%local_leading_dimension
613
614 CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
615
616#endif
617 CALL timestop(handle)
618
619 END SUBROUTINE cp_fm_symm
620
621! **************************************************************************************************
622!> \brief computes the Frobenius norm of matrix_a
623!> \brief computes the Frobenius norm of matrix_a
624!> \param matrix_a : m x n matrix
625!> \return ...
626!> \author VW
627! **************************************************************************************************
628 FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
629 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
630 REAL(kind=dp) :: norm
631
632 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_frobenius_norm'
633
634 INTEGER :: handle, size_a
635 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
636 REAL(kind=dp), EXTERNAL :: ddot
637#if defined(__SCALAPACK)
638 TYPE(mp_comm_type) :: group
639#endif
640
641 CALL timeset(routinen, handle)
642
643 norm = 0.0_dp
644 a => matrix_a%local_data
645 size_a = SIZE(a, 1)*SIZE(a, 2)
646 norm = ddot(size_a, a(1, 1), 1, a(1, 1), 1)
647#if defined(__SCALAPACK)
648 group = matrix_a%matrix_struct%para_env
649 CALL group%sum(norm)
650#endif
651 norm = sqrt(norm)
652
653 CALL timestop(handle)
654
655 END FUNCTION cp_fm_frobenius_norm
656
657! **************************************************************************************************
658!> \brief performs a rank-k update of a symmetric matrix_c
659!> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
660!> \param uplo : 'U' ('L')
661!> \param trans : 'N' ('T')
662!> \param k : number of cols to use in matrix_a
663!> ia,ja :: 1,1 (could be used for selecting subblock of a)
664!> \param alpha ...
665!> \param matrix_a ...
666!> \param ia ...
667!> \param ja ...
668!> \param beta ...
669!> \param matrix_c ...
670!> \author Matthias Krack
671!> \note
672!> In QS uplo should 'U' (upper part updated)
673! **************************************************************************************************
674 SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
675 CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
676 INTEGER, INTENT(IN) :: k
677 REAL(kind=dp), INTENT(IN) :: alpha
678 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
679 INTEGER, INTENT(IN) :: ia, ja
680 REAL(kind=dp), INTENT(IN) :: beta
681 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
682
683 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_syrk'
684
685 INTEGER :: handle, n
686 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
687#if defined(__SCALAPACK)
688 INTEGER, DIMENSION(9) :: desca, descc
689#else
690 INTEGER :: lda, ldc
691#endif
692
693 CALL timeset(routinen, handle)
694
695 n = matrix_c%matrix_struct%nrow_global
696
697 a => matrix_a%local_data
698 c => matrix_c%local_data
699
700#if defined(__SCALAPACK)
701
702 desca(:) = matrix_a%matrix_struct%descriptor(:)
703 descc(:) = matrix_c%matrix_struct%descriptor(:)
704
705 CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
706
707#else
708
709 lda = SIZE(a, 1)
710 ldc = SIZE(c, 1)
711
712 CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
713
714#endif
715 CALL timestop(handle)
716
717 END SUBROUTINE cp_fm_syrk
718
719! **************************************************************************************************
720!> \brief computes the schur product of two matrices
721!> c_ij = a_ij * b_ij
722!> \param matrix_a ...
723!> \param matrix_b ...
724!> \param matrix_c ...
725!> \author Joost VandeVondele
726! **************************************************************************************************
727 SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
728
729 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
730
731 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_schur_product'
732
733 INTEGER :: handle, icol_local, irow_local, mypcol, &
734 myprow, ncol_local, npcol, nprow, &
735 nrow_local
736 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
737 TYPE(cp_blacs_env_type), POINTER :: context
738
739 CALL timeset(routinen, handle)
740
741 context => matrix_a%matrix_struct%context
742 myprow = context%mepos(1)
743 mypcol = context%mepos(2)
744 nprow = context%num_pe(1)
745 npcol = context%num_pe(2)
746
747 a => matrix_a%local_data
748 b => matrix_b%local_data
749 c => matrix_c%local_data
750
751 nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
752 ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
753
754 DO icol_local = 1, ncol_local
755 DO irow_local = 1, nrow_local
756 c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
757 END DO
758 END DO
759
760 CALL timestop(handle)
761
762 END SUBROUTINE cp_fm_schur_product
763
764! **************************************************************************************************
765!> \brief returns the trace of matrix_a^T matrix_b, i.e
766!> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
767!> \param matrix_a a matrix
768!> \param matrix_b another matrix
769!> \param trace ...
770!> \par History
771!> 11.06.2001 Creation (Matthias Krack)
772!> 12.2002 added doc [fawzi]
773!> \author Matthias Krack
774!> \note
775!> note the transposition of matrix_a!
776! **************************************************************************************************
777 SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
778
779 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
780 REAL(KIND=dp), INTENT(OUT) :: trace
781
782 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
783
784 INTEGER :: handle, mypcol, myprow, ncol_local, &
785 npcol, nprow, nrow_local
786 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
787 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
788 TYPE(cp_blacs_env_type), POINTER :: context
789 TYPE(mp_comm_type) :: group
790
791 CALL timeset(routinen, handle)
792
793 context => matrix_a%matrix_struct%context
794 myprow = context%mepos(1)
795 mypcol = context%mepos(2)
796 nprow = context%num_pe(1)
797 npcol = context%num_pe(2)
798
799 group = matrix_a%matrix_struct%para_env
800
801 a => matrix_a%local_data
802 b => matrix_b%local_data
803
804 a_sp => matrix_a%local_data_sp
805 b_sp => matrix_b%local_data_sp
806
807 nrow_local = min(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
808 ncol_local = min(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
809
810 ! cries for an accurate_dot_product
811 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
812 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local)* &
813 b_sp(1:nrow_local, 1:ncol_local), dp))
814 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
815 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local), dp)* &
816 b(1:nrow_local, 1:ncol_local))
817 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
818 trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
819 REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
820 ELSE
821 trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
822 b(1:nrow_local, 1:ncol_local))
823 END IF
824
825 CALL group%sum(trace)
826
827 CALL timestop(handle)
828
829 END SUBROUTINE cp_fm_trace_a0b0t0
830
831
832! **************************************************************************************************
833!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
834!> \param matrix_a list of A matrices
835!> \param matrix_b B matrix
836!> \param trace computed traces
837!> \par History
838!> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
839!> \note \parblock
840!> Computing the trace requires collective communication between involved MPI processes
841!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
842!> the amount of time wasted in such synchronisation by performing one large collective
843!> operation which involves all the matrices in question.
844!>
845!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
846!> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
847!> 'matrix_b' is a single matrix.
848!> \endparblock
849! **************************************************************************************************
850 SUBROUTINE cp_fm_trace_a1b0t1_a (matrix_a, matrix_b, trace)
851 TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_a
852 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
853 REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
854
855 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_a'
856
857 INTEGER :: handle, imatrix, n_matrices, &
858 ncols_local, nrows_local
859 LOGICAL :: use_sp_a, use_sp_b
860 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
861 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
862 TYPE(mp_comm_type) :: group
863
864 CALL timeset(routinen, handle)
865
866 n_matrices = SIZE(trace)
867 cpassert(SIZE(matrix_a) == n_matrices)
868
869 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
870 use_sp_b = matrix_b%use_sp
871
872 IF (use_sp_b) THEN
873 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
874 ELSE
875 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
876 END IF
877
878!$OMP PARALLEL DO DEFAULT(NONE), &
879!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
880!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
881!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
882
883 DO imatrix = 1, n_matrices
884
885 use_sp_a = matrix_a(imatrix) %use_sp
886
887 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
888 IF (use_sp_a .AND. use_sp_b) THEN
889 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
890 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
891 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
892 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
893 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
894 ELSE
895 cpabort("Matrices A and B are of different types")
896 END IF
897 END DO
898!$OMP END PARALLEL DO
899
900 group = matrix_b%matrix_struct%para_env
901 CALL group%sum(trace)
902
903 CALL timestop(handle)
904 END SUBROUTINE cp_fm_trace_a1b0t1_a
905 SUBROUTINE cp_fm_trace_a1b0t1_p (matrix_a, matrix_b, trace)
906 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_a
907 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
908 REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
909
910 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_p'
911
912 INTEGER :: handle, imatrix, n_matrices, &
913 ncols_local, nrows_local
914 LOGICAL :: use_sp_a, use_sp_b
915 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
916 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
917 TYPE(mp_comm_type) :: group
918
919 CALL timeset(routinen, handle)
920
921 n_matrices = SIZE(trace)
922 cpassert(SIZE(matrix_a) == n_matrices)
923
924 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
925 use_sp_b = matrix_b%use_sp
926
927 IF (use_sp_b) THEN
928 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
929 ELSE
930 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
931 END IF
932
933!$OMP PARALLEL DO DEFAULT(NONE), &
934!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
935!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
936!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
937
938 DO imatrix = 1, n_matrices
939
940 use_sp_a = matrix_a(imatrix) %matrix%use_sp
941
942 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
943 IF (use_sp_a .AND. use_sp_b) THEN
944 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
945 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
946 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
947 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
948 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
949 ELSE
950 cpabort("Matrices A and B are of different types")
951 END IF
952 END DO
953!$OMP END PARALLEL DO
954
955 group = matrix_b%matrix_struct%para_env
956 CALL group%sum(trace)
957
958 CALL timestop(handle)
959 END SUBROUTINE cp_fm_trace_a1b0t1_p
960
961! **************************************************************************************************
962!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
963!> \param matrix_a list of A matrices
964!> \param matrix_b list of B matrices
965!> \param trace computed traces
966!> \param accurate ...
967!> \par History
968!> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
969!> \note \parblock
970!> Computing the trace requires collective communication between involved MPI processes
971!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
972!> the amount of time wasted in such synchronisation by performing one large collective
973!> operation which involves all the matrices in question.
974!>
975!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
976!> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
977!> \endparblock
978! **************************************************************************************************
979 SUBROUTINE cp_fm_trace_a1b1t1_aa (matrix_a, matrix_b, trace, accurate)
980 TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_a
981 TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_b
982 REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
983 LOGICAL, INTENT(IN), OPTIONAL :: accurate
984
985 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_aa'
986
987 INTEGER :: handle, imatrix, n_matrices, &
988 ncols_local, nrows_local
989 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
990 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
991 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
992 TYPE(mp_comm_type) :: group
993
994 CALL timeset(routinen, handle)
995
996 n_matrices = SIZE(trace)
997 cpassert(SIZE(matrix_a) == n_matrices)
998 cpassert(SIZE(matrix_b) == n_matrices)
999
1000 use_accurate_sum = .true.
1001 IF (PRESENT(accurate)) use_accurate_sum = accurate
1002
1003!$OMP PARALLEL DO DEFAULT(NONE), &
1004!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1005!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1006!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1007 DO imatrix = 1, n_matrices
1008 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1009
1010 use_sp_a = matrix_a(imatrix) %use_sp
1011 use_sp_b = matrix_b(imatrix) %use_sp
1012
1013 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1014 IF (use_sp_a .AND. use_sp_b) THEN
1015 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1016 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1017 IF (use_accurate_sum) THEN
1018 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1019 ELSE
1020 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1021 END IF
1022 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1023 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1024 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1025 IF (use_accurate_sum) THEN
1026 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1027 ELSE
1028 trace(imatrix) = sum(ldata_a*ldata_b)
1029 END IF
1030 ELSE
1031 cpabort("Matrices A and B are of different types")
1032 END IF
1033 END DO
1034!$OMP END PARALLEL DO
1035
1036 group = matrix_a(1) %matrix_struct%para_env
1037 CALL group%sum(trace)
1038
1039 CALL timestop(handle)
1040 END SUBROUTINE cp_fm_trace_a1b1t1_aa
1041 SUBROUTINE cp_fm_trace_a1b1t1_ap (matrix_a, matrix_b, trace, accurate)
1042 TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_a
1043 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_b
1044 REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
1045 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1046
1047 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_ap'
1048
1049 INTEGER :: handle, imatrix, n_matrices, &
1050 ncols_local, nrows_local
1051 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1052 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1053 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1054 TYPE(mp_comm_type) :: group
1055
1056 CALL timeset(routinen, handle)
1057
1058 n_matrices = SIZE(trace)
1059 cpassert(SIZE(matrix_a) == n_matrices)
1060 cpassert(SIZE(matrix_b) == n_matrices)
1061
1062 use_accurate_sum = .true.
1063 IF (PRESENT(accurate)) use_accurate_sum = accurate
1064
1065!$OMP PARALLEL DO DEFAULT(NONE), &
1066!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1067!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1068!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1069 DO imatrix = 1, n_matrices
1070 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1071
1072 use_sp_a = matrix_a(imatrix) %use_sp
1073 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1074
1075 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1076 IF (use_sp_a .AND. use_sp_b) THEN
1077 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1078 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1079 IF (use_accurate_sum) THEN
1080 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1081 ELSE
1082 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1083 END IF
1084 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1085 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1086 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1087 IF (use_accurate_sum) THEN
1088 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1089 ELSE
1090 trace(imatrix) = sum(ldata_a*ldata_b)
1091 END IF
1092 ELSE
1093 cpabort("Matrices A and B are of different types")
1094 END IF
1095 END DO
1096!$OMP END PARALLEL DO
1097
1098 group = matrix_a(1) %matrix_struct%para_env
1099 CALL group%sum(trace)
1100
1101 CALL timestop(handle)
1102 END SUBROUTINE cp_fm_trace_a1b1t1_ap
1103 SUBROUTINE cp_fm_trace_a1b1t1_pa (matrix_a, matrix_b, trace, accurate)
1104 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_a
1105 TYPE(cp_fm_type), DIMENSION(:), INTENT(in) :: matrix_b
1106 REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
1107 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1108
1109 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pa'
1110
1111 INTEGER :: handle, imatrix, n_matrices, &
1112 ncols_local, nrows_local
1113 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1114 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1115 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1116 TYPE(mp_comm_type) :: group
1117
1118 CALL timeset(routinen, handle)
1119
1120 n_matrices = SIZE(trace)
1121 cpassert(SIZE(matrix_a) == n_matrices)
1122 cpassert(SIZE(matrix_b) == n_matrices)
1123
1124 use_accurate_sum = .true.
1125 IF (PRESENT(accurate)) use_accurate_sum = accurate
1126
1127!$OMP PARALLEL DO DEFAULT(NONE), &
1128!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1129!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1130!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1131 DO imatrix = 1, n_matrices
1132 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1133
1134 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1135 use_sp_b = matrix_b(imatrix) %use_sp
1136
1137 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1138 IF (use_sp_a .AND. use_sp_b) THEN
1139 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1140 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1141 IF (use_accurate_sum) THEN
1142 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1143 ELSE
1144 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1145 END IF
1146 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1147 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1148 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1149 IF (use_accurate_sum) THEN
1150 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1151 ELSE
1152 trace(imatrix) = sum(ldata_a*ldata_b)
1153 END IF
1154 ELSE
1155 cpabort("Matrices A and B are of different types")
1156 END IF
1157 END DO
1158!$OMP END PARALLEL DO
1159
1160 group = matrix_a(1) %matrix%matrix_struct%para_env
1161 CALL group%sum(trace)
1162
1163 CALL timestop(handle)
1164 END SUBROUTINE cp_fm_trace_a1b1t1_pa
1165 SUBROUTINE cp_fm_trace_a1b1t1_pp (matrix_a, matrix_b, trace, accurate)
1166 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_a
1167 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_b
1168 REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
1169 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1170
1171 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pp'
1172
1173 INTEGER :: handle, imatrix, n_matrices, &
1174 ncols_local, nrows_local
1175 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1176 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1177 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1178 TYPE(mp_comm_type) :: group
1179
1180 CALL timeset(routinen, handle)
1181
1182 n_matrices = SIZE(trace)
1183 cpassert(SIZE(matrix_a) == n_matrices)
1184 cpassert(SIZE(matrix_b) == n_matrices)
1185
1186 use_accurate_sum = .true.
1187 IF (PRESENT(accurate)) use_accurate_sum = accurate
1188
1189!$OMP PARALLEL DO DEFAULT(NONE), &
1190!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1191!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1192!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1193 DO imatrix = 1, n_matrices
1194 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1195
1196 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1197 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1198
1199 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1200 IF (use_sp_a .AND. use_sp_b) THEN
1201 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1202 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1203 IF (use_accurate_sum) THEN
1204 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1205 ELSE
1206 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1207 END IF
1208 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1209 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1210 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1211 IF (use_accurate_sum) THEN
1212 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1213 ELSE
1214 trace(imatrix) = sum(ldata_a*ldata_b)
1215 END IF
1216 ELSE
1217 cpabort("Matrices A and B are of different types")
1218 END IF
1219 END DO
1220!$OMP END PARALLEL DO
1221
1222 group = matrix_a(1) %matrix%matrix_struct%para_env
1223 CALL group%sum(trace)
1224
1225 CALL timestop(handle)
1226 END SUBROUTINE cp_fm_trace_a1b1t1_pp
1227
1228! **************************************************************************************************
1229!> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
1230!> \param matrix_a list of A matrices
1231!> \param matrix_b list of B matrices
1232!> \param trace computed traces
1233!> \param accurate ...
1234! **************************************************************************************************
1235 SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa (matrix_a, matrix_b, trace, accurate)
1236 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(in) :: matrix_a
1237 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(in) :: matrix_b
1238 REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace
1239 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1240
1241 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_aa'
1242
1243 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1244 nrows_local, nz
1245 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1246 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1247 REAL(kind=dp) :: t
1248 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1249 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1250 TYPE(mp_comm_type) :: group
1251
1252 CALL timeset(routinen, handle)
1253
1254 nz = SIZE(matrix_a, 1)
1255 cpassert(SIZE(matrix_b, 1) == nz)
1256
1257 na = SIZE(matrix_a, 2)
1258 nb = SIZE(matrix_b, 2)
1259 cpassert(SIZE(trace, 1) == na)
1260 cpassert(SIZE(trace, 2) == nb)
1261
1262 use_accurate_sum = .true.
1263 IF (PRESENT(accurate)) use_accurate_sum = accurate
1264
1265 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1266 ! improve load balance between shared-memory threads
1267 ntraces = na*nb
1268 na8 = int(na, kind=int_8)
1269
1270!$OMP PARALLEL DO DEFAULT(NONE), &
1271!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1272!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1273!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1274 DO itrace = 1, ntraces
1275 ib8 = (itrace - 1)/na8
1276 ia = int(itrace - ib8*na8)
1277 ib = int(ib8) + 1
1278
1279 t = 0.0_dp
1280 DO iz = 1, nz
1281 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1282 use_sp_a = matrix_a(iz, ia) %use_sp
1283 use_sp_b = matrix_b(iz, ib) %use_sp
1284
1285 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1286 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1287 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1288 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1289 IF (use_accurate_sum) THEN
1290 t = t + accurate_dot_product(ldata_a, ldata_b)
1291 ELSE
1292 t = t + sum(ldata_a*ldata_b)
1293 END IF
1294 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1295 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1296 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1297 IF (use_accurate_sum) THEN
1298 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1299 ELSE
1300 t = t + sum(ldata_a_sp*ldata_b_sp)
1301 END IF
1302 ELSE
1303 cpabort("Matrices A and B are of different types")
1304 END IF
1305 END DO
1306 trace(ia, ib) = t
1307 END DO
1308!$OMP END PARALLEL DO
1309
1310 group = matrix_a(1, 1) %matrix_struct%para_env
1311 CALL group%sum(trace)
1312
1313 CALL timestop(handle)
1314 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa
1315 SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap (matrix_a, matrix_b, trace, accurate)
1316 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(in) :: matrix_a
1317 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in) :: matrix_b
1318 REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace
1319 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1320
1321 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_ap'
1322
1323 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1324 nrows_local, nz
1325 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1326 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1327 REAL(kind=dp) :: t
1328 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1329 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1330 TYPE(mp_comm_type) :: group
1331
1332 CALL timeset(routinen, handle)
1333
1334 nz = SIZE(matrix_a, 1)
1335 cpassert(SIZE(matrix_b, 1) == nz)
1336
1337 na = SIZE(matrix_a, 2)
1338 nb = SIZE(matrix_b, 2)
1339 cpassert(SIZE(trace, 1) == na)
1340 cpassert(SIZE(trace, 2) == nb)
1341
1342 use_accurate_sum = .true.
1343 IF (PRESENT(accurate)) use_accurate_sum = accurate
1344
1345 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1346 ! improve load balance between shared-memory threads
1347 ntraces = na*nb
1348 na8 = int(na, kind=int_8)
1349
1350!$OMP PARALLEL DO DEFAULT(NONE), &
1351!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1352!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1353!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1354 DO itrace = 1, ntraces
1355 ib8 = (itrace - 1)/na8
1356 ia = int(itrace - ib8*na8)
1357 ib = int(ib8) + 1
1358
1359 t = 0.0_dp
1360 DO iz = 1, nz
1361 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1362 use_sp_a = matrix_a(iz, ia) %use_sp
1363 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1364
1365 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1366 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1367 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1368 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1369 IF (use_accurate_sum) THEN
1370 t = t + accurate_dot_product(ldata_a, ldata_b)
1371 ELSE
1372 t = t + sum(ldata_a*ldata_b)
1373 END IF
1374 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1375 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1376 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1377 IF (use_accurate_sum) THEN
1378 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1379 ELSE
1380 t = t + sum(ldata_a_sp*ldata_b_sp)
1381 END IF
1382 ELSE
1383 cpabort("Matrices A and B are of different types")
1384 END IF
1385 END DO
1386 trace(ia, ib) = t
1387 END DO
1388!$OMP END PARALLEL DO
1389
1390 group = matrix_a(1, 1) %matrix_struct%para_env
1391 CALL group%sum(trace)
1392
1393 CALL timestop(handle)
1394 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap
1395 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa (matrix_a, matrix_b, trace, accurate)
1396 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in) :: matrix_a
1397 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(in) :: matrix_b
1398 REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace
1399 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1400
1401 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pa'
1402
1403 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1404 nrows_local, nz
1405 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1406 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1407 REAL(kind=dp) :: t
1408 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1409 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1410 TYPE(mp_comm_type) :: group
1411
1412 CALL timeset(routinen, handle)
1413
1414 nz = SIZE(matrix_a, 1)
1415 cpassert(SIZE(matrix_b, 1) == nz)
1416
1417 na = SIZE(matrix_a, 2)
1418 nb = SIZE(matrix_b, 2)
1419 cpassert(SIZE(trace, 1) == na)
1420 cpassert(SIZE(trace, 2) == nb)
1421
1422 use_accurate_sum = .true.
1423 IF (PRESENT(accurate)) use_accurate_sum = accurate
1424
1425 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1426 ! improve load balance between shared-memory threads
1427 ntraces = na*nb
1428 na8 = int(na, kind=int_8)
1429
1430!$OMP PARALLEL DO DEFAULT(NONE), &
1431!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1432!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1433!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1434 DO itrace = 1, ntraces
1435 ib8 = (itrace - 1)/na8
1436 ia = int(itrace - ib8*na8)
1437 ib = int(ib8) + 1
1438
1439 t = 0.0_dp
1440 DO iz = 1, nz
1441 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1442 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1443 use_sp_b = matrix_b(iz, ib) %use_sp
1444
1445 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1446 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1447 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1448 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1449 IF (use_accurate_sum) THEN
1450 t = t + accurate_dot_product(ldata_a, ldata_b)
1451 ELSE
1452 t = t + sum(ldata_a*ldata_b)
1453 END IF
1454 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1455 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1456 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1457 IF (use_accurate_sum) THEN
1458 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1459 ELSE
1460 t = t + sum(ldata_a_sp*ldata_b_sp)
1461 END IF
1462 ELSE
1463 cpabort("Matrices A and B are of different types")
1464 END IF
1465 END DO
1466 trace(ia, ib) = t
1467 END DO
1468!$OMP END PARALLEL DO
1469
1470 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1471 CALL group%sum(trace)
1472
1473 CALL timestop(handle)
1474 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa
1475 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp (matrix_a, matrix_b, trace, accurate)
1476 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in) :: matrix_a
1477 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in) :: matrix_b
1478 REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace
1479 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1480
1481 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pp'
1482
1483 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1484 nrows_local, nz
1485 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1486 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1487 REAL(kind=dp) :: t
1488 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1489 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1490 TYPE(mp_comm_type) :: group
1491
1492 CALL timeset(routinen, handle)
1493
1494 nz = SIZE(matrix_a, 1)
1495 cpassert(SIZE(matrix_b, 1) == nz)
1496
1497 na = SIZE(matrix_a, 2)
1498 nb = SIZE(matrix_b, 2)
1499 cpassert(SIZE(trace, 1) == na)
1500 cpassert(SIZE(trace, 2) == nb)
1501
1502 use_accurate_sum = .true.
1503 IF (PRESENT(accurate)) use_accurate_sum = accurate
1504
1505 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1506 ! improve load balance between shared-memory threads
1507 ntraces = na*nb
1508 na8 = int(na, kind=int_8)
1509
1510!$OMP PARALLEL DO DEFAULT(NONE), &
1511!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1512!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1513!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1514 DO itrace = 1, ntraces
1515 ib8 = (itrace - 1)/na8
1516 ia = int(itrace - ib8*na8)
1517 ib = int(ib8) + 1
1518
1519 t = 0.0_dp
1520 DO iz = 1, nz
1521 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1522 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1523 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1524
1525 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1526 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1527 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1528 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1529 IF (use_accurate_sum) THEN
1530 t = t + accurate_dot_product(ldata_a, ldata_b)
1531 ELSE
1532 t = t + sum(ldata_a*ldata_b)
1533 END IF
1534 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1535 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1536 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1537 IF (use_accurate_sum) THEN
1538 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1539 ELSE
1540 t = t + sum(ldata_a_sp*ldata_b_sp)
1541 END IF
1542 ELSE
1543 cpabort("Matrices A and B are of different types")
1544 END IF
1545 END DO
1546 trace(ia, ib) = t
1547 END DO
1548!$OMP END PARALLEL DO
1549
1550 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1551 CALL group%sum(trace)
1552
1553 CALL timestop(handle)
1554 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp
1555
1556! **************************************************************************************************
1557!> \brief multiplies in place by a triangular matrix:
1558!> matrix_b = alpha op(triangular_matrix) matrix_b
1559!> or (if side='R')
1560!> matrix_b = alpha matrix_b op(triangular_matrix)
1561!> op(triangular_matrix) is:
1562!> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1563!> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1564!> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1565!> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1566!> \param triangular_matrix the triangular matrix that multiplies the other
1567!> \param matrix_b the matrix that gets multiplied and stores the result
1568!> \param side on which side of matrix_b stays op(triangular_matrix)
1569!> (defaults to 'L')
1570!> \param transpose_tr if the triangular matrix should be transposed
1571!> (defaults to false)
1572!> \param invert_tr if the triangular matrix should be inverted
1573!> (defaults to false)
1574!> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1575!> lower ('L') triangle (defaults to 'U')
1576!> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1577!> be assumed to be 1 (defaults to false)
1578!> \param n_rows the number of rows of the result (defaults to
1579!> size(matrix_b,1))
1580!> \param n_cols the number of columns of the result (defaults to
1581!> size(matrix_b,2))
1582!> \param alpha ...
1583!> \par History
1584!> 08.2002 created [fawzi]
1585!> \author Fawzi Mohamed
1586!> \note
1587!> needs an mpi env
1588! **************************************************************************************************
1589 SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1590 transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1591 alpha)
1592 TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1593 CHARACTER, INTENT(in), OPTIONAL :: side
1594 LOGICAL, INTENT(in), OPTIONAL :: transpose_tr, invert_tr
1595 CHARACTER, INTENT(in), OPTIONAL :: uplo_tr
1596 LOGICAL, INTENT(in), OPTIONAL :: unit_diag_tr
1597 INTEGER, INTENT(in), OPTIONAL :: n_rows, n_cols
1598 REAL(kind=dp), INTENT(in), OPTIONAL :: alpha
1599
1600 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_triangular_multiply'
1601
1602 CHARACTER :: side_char, transa, unit_diag, uplo
1603 INTEGER :: handle, m, n
1604 LOGICAL :: invert
1605 REAL(kind=dp) :: al
1606
1607 CALL timeset(routinen, handle)
1608 side_char = 'L'
1609 unit_diag = 'N'
1610 uplo = 'U'
1611 transa = 'N'
1612 invert = .false.
1613 al = 1.0_dp
1614 CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1615 IF (PRESENT(side)) side_char = side
1616 IF (PRESENT(invert_tr)) invert = invert_tr
1617 IF (PRESENT(uplo_tr)) uplo = uplo_tr
1618 IF (PRESENT(unit_diag_tr)) THEN
1619 IF (unit_diag_tr) THEN
1620 unit_diag = 'U'
1621 ELSE
1622 unit_diag = 'N'
1623 END IF
1624 END IF
1625 IF (PRESENT(transpose_tr)) THEN
1626 IF (transpose_tr) THEN
1627 transa = 'T'
1628 ELSE
1629 transa = 'N'
1630 END IF
1631 END IF
1632 IF (PRESENT(alpha)) al = alpha
1633 IF (PRESENT(n_rows)) m = n_rows
1634 IF (PRESENT(n_cols)) n = n_cols
1635
1636 IF (invert) THEN
1637
1638#if defined(__SCALAPACK)
1639 CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1640 triangular_matrix%local_data(1, 1), 1, 1, &
1641 triangular_matrix%matrix_struct%descriptor, &
1642 matrix_b%local_data(1, 1), 1, 1, &
1643 matrix_b%matrix_struct%descriptor(1))
1644#else
1645 CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1646 triangular_matrix%local_data(1, 1), &
1647 SIZE(triangular_matrix%local_data, 1), &
1648 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1649#endif
1650
1651 ELSE
1652
1653#if defined(__SCALAPACK)
1654 CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1655 triangular_matrix%local_data(1, 1), 1, 1, &
1656 triangular_matrix%matrix_struct%descriptor, &
1657 matrix_b%local_data(1, 1), 1, 1, &
1658 matrix_b%matrix_struct%descriptor(1))
1659#else
1660 CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1661 triangular_matrix%local_data(1, 1), &
1662 SIZE(triangular_matrix%local_data, 1), &
1663 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1664#endif
1665
1666 END IF
1667
1668 CALL timestop(handle)
1669 END SUBROUTINE cp_fm_triangular_multiply
1670
1671! **************************************************************************************************
1672!> \brief scales a matrix
1673!> matrix_a = alpha * matrix_b
1674!> \param alpha ...
1675!> \param matrix_a ...
1676!> \note
1677!> use cp_fm_set_all to zero (avoids problems with nan)
1678! **************************************************************************************************
1679 SUBROUTINE cp_fm_scale(alpha, matrix_a)
1680 REAL(kind=dp), INTENT(IN) :: alpha
1681 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1682
1683 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale'
1684
1685 INTEGER :: handle, size_a
1686 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1687
1688 CALL timeset(routinen, handle)
1689
1690 NULLIFY (a)
1691
1692 a => matrix_a%local_data
1693 size_a = SIZE(a, 1)*SIZE(a, 2)
1694
1695 CALL dscal(size_a, alpha, a, 1)
1696
1697 CALL timestop(handle)
1698
1699 END SUBROUTINE cp_fm_scale
1700
1701! **************************************************************************************************
1702!> \brief transposes a matrix
1703!> matrixt = matrix ^ T
1704!> \param matrix ...
1705!> \param matrixt ...
1706!> \note
1707!> all matrix elements are transposed (see cp_fm_upper_to_half to symmetrise a matrix)
1708! **************************************************************************************************
1709 SUBROUTINE cp_fm_transpose(matrix, matrixt)
1710 TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1711
1712 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_transpose'
1713
1714 INTEGER :: handle, ncol_global, &
1715 nrow_global, ncol_globalt, nrow_globalt
1716 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
1717#if defined(__SCALAPACK)
1718 INTEGER, DIMENSION(9) :: desca, descc
1719#else
1720 INTEGER :: i, j
1721#endif
1722
1723 nrow_global = matrix%matrix_struct%nrow_global
1724 ncol_global = matrix%matrix_struct%ncol_global
1725 nrow_globalt = matrixt%matrix_struct%nrow_global
1726 ncol_globalt = matrixt%matrix_struct%ncol_global
1727 cpassert(nrow_global == ncol_globalt)
1728 cpassert(nrow_globalt == ncol_global)
1729
1730 CALL timeset(routinen, handle)
1731
1732 a => matrix%local_data
1733 c => matrixt%local_data
1734
1735#if defined(__SCALAPACK)
1736 desca(:) = matrix%matrix_struct%descriptor(:)
1737 descc(:) = matrixt%matrix_struct%descriptor(:)
1738 CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1739#else
1740 DO j = 1, ncol_global
1741 DO i = 1, nrow_global
1742 c(j, i) = a(i, j)
1743 END DO
1744 END DO
1745#endif
1746 CALL timestop(handle)
1747
1748 END SUBROUTINE cp_fm_transpose
1749
1750! **************************************************************************************************
1751!> \brief given an upper triangular matrix computes the corresponding full matrix
1752!> \param matrix the upper triangular matrix as input, the full matrix as output
1753!> \param work a matrix of the same size as matrix
1754!> \author Matthias Krack
1755!> \note
1756!> the lower triangular part is irrelevant
1757! **************************************************************************************************
1758 SUBROUTINE cp_fm_upper_to_full(matrix, work)
1759
1760 TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1761
1762 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_upper_to_full'
1763
1764 INTEGER :: handle, icol_global, irow_global, &
1765 mypcol, myprow, ncol_global, &
1766 npcol, nprow, nrow_global
1767 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1768 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1769 TYPE(cp_blacs_env_type), POINTER :: context
1770
1771#if defined(__SCALAPACK)
1772 INTEGER :: icol_local, irow_local, &
1773 ncol_block, ncol_local, &
1774 nrow_block, nrow_local
1775 INTEGER, DIMENSION(9) :: desca, descc
1776 INTEGER, EXTERNAL :: indxl2g
1777 REAL(kind=dp), DIMENSION(:, :), POINTER :: c
1778 REAL(kind=sp), DIMENSION(:, :), POINTER :: c_sp
1779#endif
1780
1781 nrow_global = matrix%matrix_struct%nrow_global
1782 ncol_global = matrix%matrix_struct%ncol_global
1783 cpassert(nrow_global == ncol_global)
1784 nrow_global = work%matrix_struct%nrow_global
1785 ncol_global = work%matrix_struct%ncol_global
1786 cpassert(nrow_global == ncol_global)
1787 cpassert(matrix%use_sp .EQV. work%use_sp)
1788
1789 CALL timeset(routinen, handle)
1790
1791 context => matrix%matrix_struct%context
1792 myprow = context%mepos(1)
1793 mypcol = context%mepos(2)
1794 nprow = context%num_pe(1)
1795 npcol = context%num_pe(2)
1796
1797#if defined(__SCALAPACK)
1798
1799 nrow_block = matrix%matrix_struct%nrow_block
1800 ncol_block = matrix%matrix_struct%ncol_block
1801
1802 nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1803 ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1804
1805 a => work%local_data
1806 a_sp => work%local_data_sp
1807 desca(:) = work%matrix_struct%descriptor(:)
1808 c => matrix%local_data
1809 c_sp => matrix%local_data_sp
1810 descc(:) = matrix%matrix_struct%descriptor(:)
1811
1812 DO icol_local = 1, ncol_local
1813 icol_global = indxl2g(icol_local, ncol_block, mypcol, &
1814 matrix%matrix_struct%first_p_pos(2), npcol)
1815 DO irow_local = 1, nrow_local
1816 irow_global = indxl2g(irow_local, nrow_block, myprow, &
1817 matrix%matrix_struct%first_p_pos(1), nprow)
1818 IF (irow_global > icol_global) THEN
1819 IF (matrix%use_sp) THEN
1820 c_sp(irow_local, icol_local) = 0.0_sp
1821 ELSE
1822 c(irow_local, icol_local) = 0.0_dp
1823 END IF
1824 ELSE IF (irow_global == icol_global) THEN
1825 IF (matrix%use_sp) THEN
1826 c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1827 ELSE
1828 c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1829 END IF
1830 END IF
1831 END DO
1832 END DO
1833
1834 DO icol_local = 1, ncol_local
1835 DO irow_local = 1, nrow_local
1836 IF (matrix%use_sp) THEN
1837 a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1838 ELSE
1839 a(irow_local, icol_local) = c(irow_local, icol_local)
1840 END IF
1841 END DO
1842 END DO
1843
1844 IF (matrix%use_sp) THEN
1845 CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1846 ELSE
1847 CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1848 END IF
1849
1850#else
1851
1852 a => matrix%local_data
1853 a_sp => matrix%local_data_sp
1854 DO irow_global = 1, nrow_global
1855 DO icol_global = irow_global + 1, ncol_global
1856 IF (matrix%use_sp) THEN
1857 a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1858 ELSE
1859 a(icol_global, irow_global) = a(irow_global, icol_global)
1860 END IF
1861 END DO
1862 END DO
1863
1864#endif
1865 CALL timestop(handle)
1866
1867 END SUBROUTINE cp_fm_upper_to_full
1868
1869! **************************************************************************************************
1870!> \brief scales column i of matrix a with scaling(i)
1871!> \param matrixa ...
1872!> \param scaling : an array used for scaling the columns,
1873!> SIZE(scaling) determines the number of columns to be scaled
1874!> \author Joost VandeVondele
1875!> \note
1876!> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1877!> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1878!> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1879!> where every vector has a different prefactor
1880! **************************************************************************************************
1881 SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1882 TYPE(cp_fm_type), INTENT(IN) :: matrixa
1883 REAL(kind=dp), DIMENSION(:), INTENT(in) :: scaling
1884
1885 INTEGER :: k, mypcol, myprow, n, ncol_global, &
1886 npcol, nprow
1887 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1888 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1889#if defined(__SCALAPACK)
1890 INTEGER :: icol_global, icol_local, &
1891 ipcol, iprow, irow_local
1892#else
1893 INTEGER :: i
1894#endif
1895
1896 myprow = matrixa%matrix_struct%context%mepos(1)
1897 mypcol = matrixa%matrix_struct%context%mepos(2)
1898 nprow = matrixa%matrix_struct%context%num_pe(1)
1899 npcol = matrixa%matrix_struct%context%num_pe(2)
1900
1901 ncol_global = matrixa%matrix_struct%ncol_global
1902
1903 a => matrixa%local_data
1904 a_sp => matrixa%local_data_sp
1905 IF (matrixa%use_sp) THEN
1906 n = SIZE(a_sp, 1)
1907 ELSE
1908 n = SIZE(a, 1)
1909 END IF
1910 k = min(SIZE(scaling), ncol_global)
1911
1912#if defined(__SCALAPACK)
1913
1914 DO icol_global = 1, k
1915 CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1916 nprow, npcol, myprow, mypcol, &
1917 irow_local, icol_local, iprow, ipcol)
1918 IF ((ipcol == mypcol)) THEN
1919 IF (matrixa%use_sp) THEN
1920 CALL sscal(n, real(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1921 ELSE
1922 CALL dscal(n, scaling(icol_global), a(:, icol_local), 1)
1923 END IF
1924 END IF
1925 END DO
1926#else
1927 DO i = 1, k
1928 IF (matrixa%use_sp) THEN
1929 CALL sscal(n, real(scaling(i), sp), a_sp(:, i), 1)
1930 ELSE
1931 CALL dscal(n, scaling(i), a(:, i), 1)
1932 END IF
1933 END DO
1934#endif
1935 END SUBROUTINE cp_fm_column_scale
1936
1937! **************************************************************************************************
1938!> \brief scales row i of matrix a with scaling(i)
1939!> \param matrixa ...
1940!> \param scaling : an array used for scaling the columns,
1941!> \author JGH
1942!> \note
1943! **************************************************************************************************
1944 SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1945 TYPE(cp_fm_type), INTENT(IN) :: matrixa
1946 REAL(kind=dp), DIMENSION(:), INTENT(in) :: scaling
1947
1948 INTEGER :: n, m, nrow_global, nrow_local, ncol_local
1949 INTEGER, DIMENSION(:), POINTER :: row_indices
1950 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1951 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1952#if defined(__SCALAPACK)
1953 INTEGER :: irow_global, icol, irow
1954#else
1955 INTEGER :: j
1956#endif
1957
1958 CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1959 nrow_local=nrow_local, ncol_local=ncol_local)
1960 cpassert(SIZE(scaling) == nrow_global)
1961
1962 a => matrixa%local_data
1963 a_sp => matrixa%local_data_sp
1964 IF (matrixa%use_sp) THEN
1965 n = SIZE(a_sp, 1)
1966 m = SIZE(a_sp, 2)
1967 ELSE
1968 n = SIZE(a, 1)
1969 m = SIZE(a, 2)
1970 END IF
1971
1972#if defined(__SCALAPACK)
1973 DO icol = 1, ncol_local
1974 IF (matrixa%use_sp) THEN
1975 DO irow = 1, nrow_local
1976 irow_global = row_indices(irow)
1977 a(irow, icol) = real(scaling(irow_global), dp)*a(irow, icol)
1978 END DO
1979 ELSE
1980 DO irow = 1, nrow_local
1981 irow_global = row_indices(irow)
1982 a(irow, icol) = scaling(irow_global)*a(irow, icol)
1983 END DO
1984 END IF
1985 END DO
1986#else
1987 IF (matrixa%use_sp) THEN
1988 DO j = 1, m
1989 a_sp(1:n, j) = real(scaling(1:n), sp)*a_sp(1:n, j)
1990 END DO
1991 ELSE
1992 DO j = 1, m
1993 a(1:n, j) = scaling(1:n)*a(1:n, j)
1994 END DO
1995 END IF
1996#endif
1997 END SUBROUTINE cp_fm_row_scale
1998! **************************************************************************************************
1999!> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
2000!> \param matrix_a the matrix to invert
2001!> \param matrix_inverse the inverse of matrix_a
2002!> \param det_a the determinant of matrix_a
2003!> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
2004!> are screened
2005!> \param eigval optionally return matrix eigenvalues/singular values
2006!> \par History
2007!> note of Jan Wilhelm (12.2015)
2008!> - computation of determinant corrected
2009!> - determinant only computed if det_a is present
2010!> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
2011!> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
2012!> \author Florian Schiffmann(02.2007)
2013! **************************************************************************************************
2014 SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
2015
2016 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
2017 REAL(kind=dp), INTENT(OUT), OPTIONAL :: det_a
2018 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_svd
2019 REAL(kind=dp), DIMENSION(:), POINTER, &
2020 INTENT(INOUT), OPTIONAL :: eigval
2021
2022 INTEGER :: n
2023 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2024 REAL(kind=dp) :: determinant, my_eps_svd
2025 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2026 TYPE(cp_fm_type) :: matrix_lu
2027
2028#if defined(__SCALAPACK)
2029 TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
2030 TYPE(mp_comm_type) :: group
2031 INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
2032 INTEGER, DIMENSION(9) :: desca
2033 LOGICAL :: quenched
2034 REAL(kind=dp) :: alpha, beta
2035 REAL(kind=dp), DIMENSION(:), POINTER :: diag
2036 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: work
2037#else
2038 LOGICAL :: sign
2039 REAL(kind=dp) :: eps1
2040#endif
2041
2042 my_eps_svd = 0.0_dp
2043 IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
2044
2045 CALL cp_fm_create(matrix=matrix_lu, &
2046 matrix_struct=matrix_a%matrix_struct, &
2047 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
2048 CALL cp_fm_to_fm(matrix_a, matrix_lu)
2049
2050 a => matrix_lu%local_data
2051 n = matrix_lu%matrix_struct%nrow_global
2052 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2053 ipivot(:) = 0
2054#if defined(__SCALAPACK)
2055 IF (my_eps_svd .EQ. 0.0_dp) THEN
2056 ! Use LU decomposition
2057 lwork = 3*n
2058 liwork = 3*n
2059 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2060 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2061
2062 IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
2063
2064 ALLOCATE (diag(n))
2065 diag(:) = 0.0_dp
2066 CALL cp_fm_get_diag(matrix_lu, diag)
2067
2068 exponent_of_minus_one = 0
2069 determinant = 1.0_dp
2070 DO i = 1, n
2071 determinant = determinant*diag(i)
2072 IF (ipivot(i) .NE. i) THEN
2073 exponent_of_minus_one = exponent_of_minus_one + 1
2074 END IF
2075 END DO
2076 IF (PRESENT(eigval)) THEN
2077 cpassert(.NOT. ASSOCIATED(eigval))
2078 ALLOCATE (eigval(n))
2079 eigval(:) = diag
2080 END IF
2081 DEALLOCATE (diag)
2082
2083 group = matrix_lu%matrix_struct%para_env
2084 CALL group%sum(exponent_of_minus_one)
2085
2086 determinant = determinant*(-1.0_dp)**exponent_of_minus_one
2087
2088 END IF
2089
2090 alpha = 0.0_dp
2091 beta = 1.0_dp
2092 CALL cp_fm_set_all(matrix_inverse, alpha, beta)
2093 CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
2094 ELSE
2095 ! Use singular value decomposition
2096 CALL cp_fm_create(matrix=u, &
2097 matrix_struct=matrix_a%matrix_struct, &
2098 name="LEFT_SINGULAR_MATRIX")
2099 CALL cp_fm_set_all(u, alpha=0.0_dp)
2100 CALL cp_fm_create(matrix=vt, &
2101 matrix_struct=matrix_a%matrix_struct, &
2102 name="RIGHT_SINGULAR_MATRIX")
2103 CALL cp_fm_set_all(vt, alpha=0.0_dp)
2104 ALLOCATE (diag(n))
2105 diag(:) = 0.0_dp
2106 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2107 ALLOCATE (work(1))
2108 ! Workspace query
2109 lwork = -1
2110 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2111 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2112 lwork = int(work(1))
2113 DEALLOCATE (work)
2114 ALLOCATE (work(lwork))
2115 ! SVD
2116 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2117 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2118 ! info == n+1 implies homogeneity error when the number of procs is large
2119 ! this likely isnt a problem, but maybe we should handle it separately
2120 IF (info /= 0 .AND. info /= n + 1) &
2121 cpabort("Singular value decomposition of matrix failed.")
2122 ! (Pseudo)inverse and (pseudo)determinant
2123 CALL cp_fm_create(matrix=sigma, &
2124 matrix_struct=matrix_a%matrix_struct, &
2125 name="SINGULAR_VALUE_MATRIX")
2126 CALL cp_fm_set_all(sigma, alpha=0.0_dp)
2127 determinant = 1.0_dp
2128 quenched = .false.
2129 IF (PRESENT(eigval)) THEN
2130 cpassert(.NOT. ASSOCIATED(eigval))
2131 ALLOCATE (eigval(n))
2132 eigval(:) = diag
2133 END IF
2134 DO i = 1, n
2135 IF (diag(i) < my_eps_svd) THEN
2136 diag(i) = 0.0_dp
2137 quenched = .true.
2138 ELSE
2139 determinant = determinant*diag(i)
2140 diag(i) = 1.0_dp/diag(i)
2141 END IF
2142 CALL cp_fm_set_element(sigma, i, i, diag(i))
2143 END DO
2144 DEALLOCATE (diag)
2145 IF (quenched) &
2146 CALL cp_warn(__location__, &
2147 "Linear dependencies were detected in the SVD inversion of matrix "//trim(adjustl(matrix_a%name))// &
2148 ". At least one singular value has been quenched.")
2149 ! Sigma^-1 * U^T
2150 CALL cp_fm_create(matrix=inv_sigma_ut, &
2151 matrix_struct=matrix_a%matrix_struct, &
2152 name="SINGULAR_VALUE_MATRIX")
2153 CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
2154 CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
2155 u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
2156 ! A^-1 = V * (Sigma^-1 * U^T)
2157 CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
2158 CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
2159 inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
2160 ! Clean up
2161 DEALLOCATE (work)
2162 CALL cp_fm_release(u)
2163 CALL cp_fm_release(vt)
2164 CALL cp_fm_release(sigma)
2165 CALL cp_fm_release(inv_sigma_ut)
2166 END IF
2167#else
2168 IF (my_eps_svd .EQ. 0.0_dp) THEN
2169 sign = .true.
2170 CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
2171 eval_error=eps1)
2172 CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
2173 IF (PRESENT(eigval)) &
2174 CALL cp_abort(__location__, &
2175 "NYI. Eigenvalues not available for return without SCALAPACK.")
2176 ELSE
2177 CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
2178 determinant, eigval)
2179 END IF
2180#endif
2181 CALL cp_fm_release(matrix_lu)
2182 DEALLOCATE (ipivot)
2183 IF (PRESENT(det_a)) det_a = determinant
2184 END SUBROUTINE cp_fm_invert
2185
2186! **************************************************************************************************
2187!> \brief inverts a triangular matrix
2188!> \param matrix_a ...
2189!> \param uplo_tr ...
2190!> \author MI
2191! **************************************************************************************************
2192 SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
2193
2194 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2195 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
2196
2197 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_triangular_invert'
2198
2199 CHARACTER :: unit_diag, uplo
2200 INTEGER :: handle, info, ncol_global
2201 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2202#if defined(__SCALAPACK)
2203 INTEGER, DIMENSION(9) :: desca
2204#endif
2205
2206 CALL timeset(routinen, handle)
2207
2208 unit_diag = 'N'
2209 uplo = 'U'
2210 IF (PRESENT(uplo_tr)) uplo = uplo_tr
2211
2212 ncol_global = matrix_a%matrix_struct%ncol_global
2213
2214 a => matrix_a%local_data
2215
2216#if defined(__SCALAPACK)
2217
2218 desca(:) = matrix_a%matrix_struct%descriptor(:)
2219
2220 CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
2221
2222#else
2223 CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
2224#endif
2225
2226 CALL timestop(handle)
2227 END SUBROUTINE cp_fm_triangular_invert
2228
2229! **************************************************************************************************
2230!> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
2231!> the computed upper triangular matrix R is in output in the submatrix sub(A) of size NxN
2232!> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
2233!> \param matrix_a ...
2234!> \param matrix_r ...
2235!> \param nrow_fact ...
2236!> \param ncol_fact ...
2237!> \param first_row ...
2238!> \param first_col ...
2239!> \author MI
2240! **************************************************************************************************
2241 SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col)
2242 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
2243 INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
2244 first_row, first_col
2245
2246 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_qr_factorization'
2247
2248 INTEGER :: handle, i, icol, info, irow, &
2249 j, lda, lwork, ncol, &
2250 ndim, nrow
2251 REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
2252 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
2253 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2254#if defined(__SCALAPACK)
2255 INTEGER, DIMENSION(9) :: desca
2256#endif
2257
2258 CALL timeset(routinen, handle)
2259
2260 ncol = matrix_a%matrix_struct%ncol_global
2261 nrow = matrix_a%matrix_struct%nrow_global
2262 lda = nrow
2263
2264 a => matrix_a%local_data
2265
2266 IF (PRESENT(nrow_fact)) nrow = nrow_fact
2267 IF (PRESENT(ncol_fact)) ncol = ncol_fact
2268 irow = 1
2269 IF (PRESENT(first_row)) irow = first_row
2270 icol = 1
2271 IF (PRESENT(first_col)) icol = first_col
2272
2273 cpassert(nrow >= ncol)
2274 ndim = SIZE(a, 2)
2275! ALLOCATE(ipiv(ndim))
2276 ALLOCATE (tau(ndim))
2277
2278#if defined(__SCALAPACK)
2279
2280 desca(:) = matrix_a%matrix_struct%descriptor(:)
2281
2282 lwork = -1
2283 ALLOCATE (work(2*ndim))
2284 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2285 lwork = int(work(1))
2286 DEALLOCATE (work)
2287 ALLOCATE (work(lwork))
2288 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2289
2290#else
2291 lwork = -1
2292 ALLOCATE (work(2*ndim))
2293 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2294 lwork = int(work(1))
2295 DEALLOCATE (work)
2296 ALLOCATE (work(lwork))
2297 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2298
2299#endif
2300
2301 ALLOCATE (r_mat(ncol, ncol))
2302 CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
2303 DO i = 1, ncol
2304 DO j = i + 1, ncol
2305 r_mat(j, i) = 0.0_dp
2306 END DO
2307 END DO
2308 CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
2309
2310 DEALLOCATE (tau, work, r_mat)
2311
2312 CALL timestop(handle)
2313
2314 END SUBROUTINE cp_fm_qr_factorization
2315
2316! **************************************************************************************************
2317!> \brief computes the the solution to A*b=A_general using lu decomposition
2318!> pay attention, both matrices are overwritten, a_general contais the result
2319!> \param matrix_a ...
2320!> \param general_a ...
2321!> \author Florian Schiffmann
2322! **************************************************************************************************
2323 SUBROUTINE cp_fm_solve(matrix_a, general_a)
2324 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
2325
2326 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_solve'
2327
2328 INTEGER :: handle, info, n
2329 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2330 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, a_general
2331#if defined(__SCALAPACK)
2332 INTEGER, DIMENSION(9) :: desca, descb
2333#else
2334 INTEGER :: lda, ldb
2335#endif
2336
2337 CALL timeset(routinen, handle)
2338
2339 a => matrix_a%local_data
2340 a_general => general_a%local_data
2341 n = matrix_a%matrix_struct%nrow_global
2342 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2343
2344#if defined(__SCALAPACK)
2345 desca(:) = matrix_a%matrix_struct%descriptor(:)
2346 descb(:) = general_a%matrix_struct%descriptor(:)
2347 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2348 CALL pdgetrs("N", n, n, a, 1, 1, desca, ipivot, a_general, &
2349 1, 1, descb, info)
2350
2351#else
2352 lda = SIZE(a, 1)
2353 ldb = SIZE(a_general, 1)
2354 CALL dgetrf(n, n, a, lda, ipivot, info)
2355 CALL dgetrs("N", n, n, a, lda, ipivot, a_general, ldb, info)
2356
2357#endif
2358 ! info is allowed to be zero
2359 ! this does just signal a zero diagonal element
2360 DEALLOCATE (ipivot)
2361 CALL timestop(handle)
2362 END SUBROUTINE
2363
2364! **************************************************************************************************
2365!> \brief Convenience function. Computes the matrix multiplications needed
2366!> for the multiplication of complex matrices.
2367!> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
2368!> \param transa : 'N' -> normal 'T' -> transpose
2369!> alpha,beta :: can be 0.0_dp and 1.0_dp
2370!> \param transb ...
2371!> \param m ...
2372!> \param n ...
2373!> \param k ...
2374!> \param alpha ...
2375!> \param A_re m x k matrix ( ! for transa = 'N'), real part
2376!> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
2377!> \param B_re k x n matrix ( ! for transa = 'N'), real part
2378!> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
2379!> \param beta ...
2380!> \param C_re m x n matrix, real part
2381!> \param C_im m x n matrix, imaginary part
2382!> \param a_first_col ...
2383!> \param a_first_row ...
2384!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
2385!> \param b_first_row ...
2386!> \param c_first_col ...
2387!> \param c_first_row ...
2388!> \author Samuel Andermatt
2389!> \note
2390!> C should have no overlap with A, B
2391! **************************************************************************************************
2392 SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
2393 C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
2394 c_first_row)
2395 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
2396 INTEGER, INTENT(IN) :: m, n, k
2397 REAL(kind=dp), INTENT(IN) :: alpha
2398 TYPE(cp_fm_type), INTENT(IN) :: a_re, a_im, b_re, b_im
2399 REAL(kind=dp), INTENT(IN) :: beta
2400 TYPE(cp_fm_type), INTENT(IN) :: c_re, c_im
2401 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
2402 b_first_row, c_first_col, c_first_row
2403
2404 CHARACTER(len=*), PARAMETER :: routinen = 'cp_complex_fm_gemm'
2405
2406 INTEGER :: handle
2407
2408 CALL timeset(routinen, handle)
2409
2410 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_re, beta, c_re, &
2411 a_first_col=a_first_col, &
2412 a_first_row=a_first_row, &
2413 b_first_col=b_first_col, &
2414 b_first_row=b_first_row, &
2415 c_first_col=c_first_col, &
2416 c_first_row=c_first_row)
2417 CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, a_im, b_im, 1.0_dp, c_re, &
2418 a_first_col=a_first_col, &
2419 a_first_row=a_first_row, &
2420 b_first_col=b_first_col, &
2421 b_first_row=b_first_row, &
2422 c_first_col=c_first_col, &
2423 c_first_row=c_first_row)
2424 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_im, beta, c_im, &
2425 a_first_col=a_first_col, &
2426 a_first_row=a_first_row, &
2427 b_first_col=b_first_col, &
2428 b_first_row=b_first_row, &
2429 c_first_col=c_first_col, &
2430 c_first_row=c_first_row)
2431 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_im, b_re, 1.0_dp, c_im, &
2432 a_first_col=a_first_col, &
2433 a_first_row=a_first_row, &
2434 b_first_col=b_first_col, &
2435 b_first_row=b_first_row, &
2436 c_first_col=c_first_col, &
2437 c_first_row=c_first_row)
2438
2439 CALL timestop(handle)
2440
2441 END SUBROUTINE cp_complex_fm_gemm
2442
2443! **************************************************************************************************
2444!> \brief inverts a matrix using LU decomposition
2445!> the input matrix will be overwritten
2446!> \param matrix : input a general square non-singular matrix, outputs its inverse
2447!> \param info_out : optional, if present outputs the info from (p)zgetri
2448!> \author Lianheng Tong
2449! **************************************************************************************************
2450 SUBROUTINE cp_fm_lu_invert(matrix, info_out)
2451 TYPE(cp_fm_type), INTENT(IN) :: matrix
2452 INTEGER, INTENT(OUT), OPTIONAL :: info_out
2453
2454 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_invert'
2455
2456 INTEGER :: nrows_global, handle, info, lwork
2457 INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
2458 REAL(kind=dp), DIMENSION(:, :), POINTER :: mat
2459 REAL(kind=sp), DIMENSION(:, :), POINTER :: mat_sp
2460 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2461 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2462#if defined(__SCALAPACK)
2463 INTEGER :: liwork
2464 INTEGER, DIMENSION(9) :: desca
2465 INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
2466#else
2467 INTEGER :: lda
2468#endif
2469
2470 CALL timeset(routinen, handle)
2471
2472 mat => matrix%local_data
2473 mat_sp => matrix%local_data_sp
2474 nrows_global = matrix%matrix_struct%nrow_global
2475 cpassert(nrows_global .EQ. matrix%matrix_struct%ncol_global)
2476 ALLOCATE (ipivot(nrows_global))
2477 ! do LU decomposition
2478#if defined(__SCALAPACK)
2479 desca = matrix%matrix_struct%descriptor
2480 IF (matrix%use_sp) THEN
2481 CALL psgetrf(nrows_global, nrows_global, &
2482 mat_sp, 1, 1, desca, ipivot, info)
2483 ELSE
2484 CALL pdgetrf(nrows_global, nrows_global, &
2485 mat, 1, 1, desca, ipivot, info)
2486 END IF
2487#else
2488 lda = SIZE(mat, 1)
2489 IF (matrix%use_sp) THEN
2490 CALL sgetrf(nrows_global, nrows_global, &
2491 mat_sp, lda, ipivot, info)
2492 ELSE
2493 CALL dgetrf(nrows_global, nrows_global, &
2494 mat, lda, ipivot, info)
2495 END IF
2496#endif
2497 IF (info /= 0) THEN
2498 CALL cp_abort(__location__, "LU decomposition has failed")
2499 END IF
2500 ! do inversion
2501 IF (matrix%use_sp) THEN
2502 ALLOCATE (work(1))
2503 ELSE
2504 ALLOCATE (work_sp(1))
2505 END IF
2506#if defined(__SCALAPACK)
2507 ALLOCATE (iwork(1))
2508 IF (matrix%use_sp) THEN
2509 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2510 ipivot, work_sp, -1, iwork, -1, info)
2511 lwork = int(work_sp(1))
2512 DEALLOCATE (work_sp)
2513 ALLOCATE (work_sp(lwork))
2514 ELSE
2515 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2516 ipivot, work, -1, iwork, -1, info)
2517 lwork = int(work(1))
2518 DEALLOCATE (work)
2519 ALLOCATE (work(lwork))
2520 END IF
2521 liwork = int(iwork(1))
2522 DEALLOCATE (iwork)
2523 ALLOCATE (iwork(liwork))
2524 IF (matrix%use_sp) THEN
2525 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2526 ipivot, work_sp, lwork, iwork, liwork, info)
2527 ELSE
2528 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2529 ipivot, work, lwork, iwork, liwork, info)
2530 END IF
2531 DEALLOCATE (iwork)
2532#else
2533 IF (matrix%use_sp) THEN
2534 CALL sgetri(nrows_global, mat_sp, lda, &
2535 ipivot, work_sp, -1, info)
2536 lwork = int(work_sp(1))
2537 DEALLOCATE (work_sp)
2538 ALLOCATE (work_sp(lwork))
2539 CALL sgetri(nrows_global, mat_sp, lda, &
2540 ipivot, work_sp, lwork, info)
2541 ELSE
2542 CALL dgetri(nrows_global, mat, lda, &
2543 ipivot, work, -1, info)
2544 lwork = int(work(1))
2545 DEALLOCATE (work)
2546 ALLOCATE (work(lwork))
2547 CALL dgetri(nrows_global, mat, lda, &
2548 ipivot, work, lwork, info)
2549 END IF
2550#endif
2551 IF (matrix%use_sp) THEN
2552 DEALLOCATE (work_sp)
2553 ELSE
2554 DEALLOCATE (work)
2555 END IF
2556 DEALLOCATE (ipivot)
2557
2558 IF (PRESENT(info_out)) THEN
2559 info_out = info
2560 ELSE
2561 IF (info /= 0) &
2562 CALL cp_abort(__location__, "LU inversion has failed")
2563 END IF
2564
2565 CALL timestop(handle)
2566
2567 END SUBROUTINE cp_fm_lu_invert
2568
2569! **************************************************************************************************
2570!> \brief norm of matrix using (p)dlange
2571!> \param matrix : input a general matrix
2572!> \param mode : 'M' max abs element value,
2573!> '1' or 'O' one norm, i.e. maximum column sum
2574!> 'I' infinity norm, i.e. maximum row sum
2575!> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2576!> \return : the norm according to mode
2577!> \author Lianheng Tong
2578! **************************************************************************************************
2579 FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2580 TYPE(cp_fm_type), INTENT(IN) :: matrix
2581 CHARACTER, INTENT(IN) :: mode
2582 REAL(kind=dp) :: res
2583
2584 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_norm'
2585
2586 INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2587 REAL(kind=sp) :: res_sp
2588 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2589 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2590 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2591 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2592#if defined(__SCALAPACK)
2593 INTEGER, DIMENSION(9) :: desca
2594#else
2595 INTEGER :: lda
2596#endif
2597
2598 CALL timeset(routinen, handle)
2599
2600 CALL cp_fm_get_info(matrix=matrix, &
2601 nrow_global=nrows, &
2602 ncol_global=ncols, &
2603 nrow_local=nrows_local, &
2604 ncol_local=ncols_local)
2605 aa => matrix%local_data
2606 aa_sp => matrix%local_data_sp
2607
2608#if defined(__SCALAPACK)
2609 desca = matrix%matrix_struct%descriptor
2610 SELECT CASE (mode)
2611 CASE ('M', 'm')
2612 lwork = 1
2613 CASE ('1', 'O', 'o')
2614 lwork = ncols_local
2615 CASE ('I', 'i')
2616 lwork = nrows_local
2617 CASE ('F', 'f', 'E', 'e')
2618 lwork = 1
2619 CASE DEFAULT
2620 cpabort("mode input is not valid")
2621 END SELECT
2622 IF (matrix%use_sp) THEN
2623 ALLOCATE (work_sp(lwork))
2624 res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2625 DEALLOCATE (work_sp)
2626 res = real(res_sp, kind=dp)
2627 ELSE
2628 ALLOCATE (work(lwork))
2629 res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2630 DEALLOCATE (work)
2631 END IF
2632#else
2633 SELECT CASE (mode)
2634 CASE ('M', 'm')
2635 lwork = 1
2636 CASE ('1', 'O', 'o')
2637 lwork = 1
2638 CASE ('I', 'i')
2639 lwork = nrows
2640 CASE ('F', 'f', 'E', 'e')
2641 lwork = 1
2642 CASE DEFAULT
2643 cpabort("mode input is not valid")
2644 END SELECT
2645 IF (matrix%use_sp) THEN
2646 ALLOCATE (work_sp(lwork))
2647 lda = SIZE(aa_sp, 1)
2648 res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2649 DEALLOCATE (work_sp)
2650 res = real(res_sp, kind=dp)
2651 ELSE
2652 ALLOCATE (work(lwork))
2653 lda = SIZE(aa, 1)
2654 res = dlange(mode, nrows, ncols, aa, lda, work)
2655 DEALLOCATE (work)
2656 END IF
2657#endif
2658
2659 CALL timestop(handle)
2660
2661 END FUNCTION cp_fm_norm
2662
2663! **************************************************************************************************
2664!> \brief trace of a matrix using pdlatra
2665!> \param matrix : input a square matrix
2666!> \return : the trace
2667!> \author Lianheng Tong
2668! **************************************************************************************************
2669 FUNCTION cp_fm_latra(matrix) RESULT(res)
2670 TYPE(cp_fm_type), INTENT(IN) :: matrix
2671 REAL(kind=dp) :: res
2672
2673 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_latra'
2674
2675 INTEGER :: nrows, ncols, handle
2676 REAL(kind=sp) :: res_sp
2677 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2678 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2679#if defined(__SCALAPACK)
2680 INTEGER, DIMENSION(9) :: desca
2681#else
2682 INTEGER :: ii
2683#endif
2684
2685 CALL timeset(routinen, handle)
2686
2687 nrows = matrix%matrix_struct%nrow_global
2688 ncols = matrix%matrix_struct%ncol_global
2689 cpassert(nrows .EQ. ncols)
2690 aa => matrix%local_data
2691 aa_sp => matrix%local_data_sp
2692
2693#if defined(__SCALAPACK)
2694 desca = matrix%matrix_struct%descriptor
2695 IF (matrix%use_sp) THEN
2696 res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2697 res = real(res_sp, kind=dp)
2698 ELSE
2699 res = pdlatra(nrows, aa, 1, 1, desca)
2700 END IF
2701#else
2702 IF (matrix%use_sp) THEN
2703 res_sp = 0.0_sp
2704 DO ii = 1, nrows
2705 res_sp = res_sp + aa_sp(ii, ii)
2706 END DO
2707 res = real(res_sp, kind=dp)
2708 ELSE
2709 res = 0.0_dp
2710 DO ii = 1, nrows
2711 res = res + aa(ii, ii)
2712 END DO
2713 END IF
2714#endif
2715
2716 CALL timestop(handle)
2717
2718 END FUNCTION cp_fm_latra
2719
2720! **************************************************************************************************
2721!> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2722!> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2723!> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2724!> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2725!> \param nrow ...
2726!> \param ncol ...
2727!> \param first_row ...
2728!> \param first_col ...
2729!> \author MI
2730! **************************************************************************************************
2731 SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2732
2733 TYPE(cp_fm_type), INTENT(IN) :: matrix
2734 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2735 INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2736
2737 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdgeqpf'
2738
2739 INTEGER :: handle
2740 INTEGER :: info, lwork
2741 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2742 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2743 REAL(kind=dp), DIMENSION(:), POINTER :: work
2744#if defined(__SCALAPACK)
2745 INTEGER, DIMENSION(9) :: descc
2746#else
2747 INTEGER :: lda
2748#endif
2749
2750 CALL timeset(routinen, handle)
2751
2752 a => matrix%local_data
2753 lwork = -1
2754 ALLOCATE (work(2*nrow))
2755 ALLOCATE (ipiv(ncol))
2756 info = 0
2757
2758#if defined(__SCALAPACK)
2759 descc(:) = matrix%matrix_struct%descriptor(:)
2760 ! Call SCALAPACK routine to get optimal work dimension
2761 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2762 lwork = int(work(1))
2763 DEALLOCATE (work)
2764 ALLOCATE (work(lwork))
2765 tau = 0.0_dp
2766 ipiv = 0
2767
2768 ! Call SCALAPACK routine to get QR decomposition of CTs
2769 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2770#else
2771 cpassert(first_row == 1 .AND. first_col == 1)
2772 lda = SIZE(a, 1)
2773 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2774 lwork = int(work(1))
2775 DEALLOCATE (work)
2776 ALLOCATE (work(lwork))
2777 tau = 0.0_dp
2778 ipiv = 0
2779 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2780#endif
2781 cpassert(info == 0)
2782
2783 DEALLOCATE (work)
2784 DEALLOCATE (ipiv)
2785
2786 CALL timestop(handle)
2787
2788 END SUBROUTINE cp_fm_pdgeqpf
2789
2790! **************************************************************************************************
2791!> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2792!> with orthonormal columns, which is defined as the first N columns of a product of K
2793!> elementary reflectors of order M
2794!> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2795!> as returned from PDGEQRF
2796!> On exit it contains the M-by-N distributed matrix Q
2797!> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2798!> \param nrow ...
2799!> \param first_row ...
2800!> \param first_col ...
2801!> \author MI
2802! **************************************************************************************************
2803 SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2804
2805 TYPE(cp_fm_type), INTENT(IN) :: matrix
2806 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2807 INTEGER, INTENT(IN) :: nrow, first_row, first_col
2808
2809 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdorgqr'
2810
2811 INTEGER :: handle
2812 INTEGER :: info, lwork
2813 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2814 REAL(kind=dp), DIMENSION(:), POINTER :: work
2815#if defined(__SCALAPACK)
2816 INTEGER, DIMENSION(9) :: descc
2817#else
2818 INTEGER :: lda
2819#endif
2820
2821 CALL timeset(routinen, handle)
2822
2823 a => matrix%local_data
2824 lwork = -1
2825 ALLOCATE (work(2*nrow))
2826 info = 0
2827
2828#if defined(__SCALAPACK)
2829 descc(:) = matrix%matrix_struct%descriptor(:)
2830
2831 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2832 cpassert(info == 0)
2833 lwork = int(work(1))
2834 DEALLOCATE (work)
2835 ALLOCATE (work(lwork))
2836
2837 ! Call SCALAPACK routine to get Q
2838 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2839#else
2840 cpassert(first_row == 1 .AND. first_col == 1)
2841 lda = SIZE(a, 1)
2842 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2843 lwork = int(work(1))
2844 DEALLOCATE (work)
2845 ALLOCATE (work(lwork))
2846 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2847#endif
2848 cpassert(info == 0)
2849
2850 DEALLOCATE (work)
2851 CALL timestop(handle)
2852
2853 END SUBROUTINE cp_fm_pdorgqr
2854
2855! **************************************************************************************************
2856!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2857!> \param cs cosine of the rotation angle
2858!> \param sn sinus of the rotation angle
2859!> \param irow ...
2860!> \param jrow ...
2861!> \author Ole Schuett
2862! **************************************************************************************************
2863 SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2864 TYPE(cp_fm_type), INTENT(IN) :: matrix
2865 INTEGER, INTENT(IN) :: irow, jrow
2866 REAL(dp), INTENT(IN) :: cs, sn
2867
2868 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_rows'
2869 INTEGER :: handle, nrow, ncol
2870
2871#if defined(__SCALAPACK)
2872 INTEGER :: info, lwork
2873 INTEGER, DIMENSION(9) :: desc
2874 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2875#endif
2876
2877 CALL timeset(routinen, handle)
2878 CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2879
2880#if defined(__SCALAPACK)
2881 lwork = 2*ncol + 1
2882 ALLOCATE (work(lwork))
2883 desc(:) = matrix%matrix_struct%descriptor(:)
2884 CALL pdrot(ncol, &
2885 matrix%local_data(1, 1), irow, 1, desc, ncol, &
2886 matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2887 cs, sn, work, lwork, info)
2888 cpassert(info == 0)
2889 DEALLOCATE (work)
2890#else
2891 CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2892#endif
2893
2894 CALL timestop(handle)
2895 END SUBROUTINE cp_fm_rot_rows
2896
2897! **************************************************************************************************
2898!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2899!> \param cs cosine of the rotation angle
2900!> \param sn sinus of the rotation angle
2901!> \param icol ...
2902!> \param jcol ...
2903!> \author Ole Schuett
2904! **************************************************************************************************
2905 SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2906 TYPE(cp_fm_type), INTENT(IN) :: matrix
2907 INTEGER, INTENT(IN) :: icol, jcol
2908 REAL(dp), INTENT(IN) :: cs, sn
2909
2910 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_cols'
2911 INTEGER :: handle, nrow, ncol
2912
2913#if defined(__SCALAPACK)
2914 INTEGER :: info, lwork
2915 INTEGER, DIMENSION(9) :: desc
2916 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2917#endif
2918
2919 CALL timeset(routinen, handle)
2920 CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2921
2922#if defined(__SCALAPACK)
2923 lwork = 2*nrow + 1
2924 ALLOCATE (work(lwork))
2925 desc(:) = matrix%matrix_struct%descriptor(:)
2926 CALL pdrot(nrow, &
2927 matrix%local_data(1, 1), 1, icol, desc, 1, &
2928 matrix%local_data(1, 1), 1, jcol, desc, 1, &
2929 cs, sn, work, lwork, info)
2930 cpassert(info == 0)
2931 DEALLOCATE (work)
2932#else
2933 CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
2934#endif
2935
2936 CALL timestop(handle)
2937 END SUBROUTINE cp_fm_rot_cols
2938
2939! **************************************************************************************************
2940!> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
2941!> \param matrix_a ...
2942!> \param B ...
2943!> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
2944!> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
2945!> \param start_row starting index of rows, optional, defaults to 1
2946!> \param start_col starting index of columns, optional, defaults to 1
2947!> \param do_norm ...
2948!> \param do_print ...
2949! **************************************************************************************************
2950 SUBROUTINE cp_fm_gram_schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
2951 do_norm, do_print)
2952
2953 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2954 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: b
2955 INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
2956 LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
2957
2958 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_Gram_Schmidt_orthonorm'
2959
2960 INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
2961 j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
2962 start_col_local, start_row_global, start_row_local, this_col, unit_nr
2963 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2964 LOGICAL :: my_do_norm, my_do_print
2965 REAL(kind=dp) :: norm
2966 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2967
2968 CALL timeset(routinen, handle)
2969
2970 my_do_norm = .true.
2971 IF (PRESENT(do_norm)) my_do_norm = do_norm
2972
2973 my_do_print = .false.
2974 IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
2975
2976 unit_nr = -1
2977 IF (my_do_print) THEN
2979 IF (unit_nr < 1) my_do_print = .false.
2980 END IF
2981
2982 IF (SIZE(b) /= 0) THEN
2983 IF (PRESENT(nrows)) THEN
2984 nrow_global = nrows
2985 ELSE
2986 nrow_global = SIZE(b, 1)
2987 END IF
2988
2989 IF (PRESENT(ncols)) THEN
2990 ncol_global = ncols
2991 ELSE
2992 ncol_global = SIZE(b, 2)
2993 END IF
2994
2995 IF (PRESENT(start_row)) THEN
2996 start_row_global = start_row
2997 ELSE
2998 start_row_global = 1
2999 END IF
3000
3001 IF (PRESENT(start_col)) THEN
3002 start_col_global = start_col
3003 ELSE
3004 start_col_global = 1
3005 END IF
3006
3007 end_row_global = start_row_global + nrow_global - 1
3008 end_col_global = start_col_global + ncol_global - 1
3009
3010 CALL cp_fm_get_info(matrix=matrix_a, &
3011 nrow_global=nrow_global, ncol_global=ncol_global, &
3012 nrow_local=nrow_local, ncol_local=ncol_local, &
3013 row_indices=row_indices, col_indices=col_indices)
3014 IF (end_row_global > nrow_global) THEN
3015 end_row_global = nrow_global
3016 END IF
3017 IF (end_col_global > ncol_global) THEN
3018 end_col_global = ncol_global
3019 END IF
3020
3021 ! find out row/column indices of locally stored matrix elements that
3022 ! needs to be copied.
3023 ! Arrays row_indices and col_indices are assumed to be sorted in
3024 ! ascending order
3025 DO start_row_local = 1, nrow_local
3026 IF (row_indices(start_row_local) >= start_row_global) EXIT
3027 END DO
3028
3029 DO end_row_local = start_row_local, nrow_local
3030 IF (row_indices(end_row_local) > end_row_global) EXIT
3031 END DO
3032 end_row_local = end_row_local - 1
3033
3034 DO start_col_local = 1, ncol_local
3035 IF (col_indices(start_col_local) >= start_col_global) EXIT
3036 END DO
3037
3038 DO end_col_local = start_col_local, ncol_local
3039 IF (col_indices(end_col_local) > end_col_global) EXIT
3040 END DO
3041 end_col_local = end_col_local - 1
3042
3043 a => matrix_a%local_data
3044
3045 this_col = col_indices(start_col_local) - start_col_global + 1
3046
3047 b(:, this_col) = a(:, start_col_local)
3048
3049 IF (my_do_norm) THEN
3050 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3051 b(:, this_col) = b(:, this_col)/norm
3052 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3053 END IF
3054
3055 DO i = start_col_local + 1, end_col_local
3056 this_col = col_indices(i) - start_col_global + 1
3057 b(:, this_col) = a(:, i)
3058 DO j = start_col_local, i - 1
3059 j_col = col_indices(j) - start_col_global + 1
3060 b(:, this_col) = b(:, this_col) - &
3061 accurate_dot_product(b(:, j_col), b(:, this_col))* &
3062 b(:, j_col)/accurate_dot_product(b(:, j_col), b(:, j_col))
3063 END DO
3064
3065 IF (my_do_norm) THEN
3066 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3067 b(:, this_col) = b(:, this_col)/norm
3068 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3069 END IF
3070
3071 END DO
3072 CALL matrix_a%matrix_struct%para_env%sum(b)
3073 END IF
3074
3075 CALL timestop(handle)
3076
3077 END SUBROUTINE cp_fm_gram_schmidt_orthonorm
3078
3079! **************************************************************************************************
3080!> \brief Cholesky decomposition
3081!> \param fm_matrix ...
3082!> \param n ...
3083! **************************************************************************************************
3084 SUBROUTINE cp_fm_potrf(fm_matrix, n)
3085 TYPE(cp_fm_type) :: fm_matrix
3086 INTEGER, INTENT(in) :: n
3087
3088 INTEGER :: info
3089 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3090 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3091#if defined(__SCALAPACK)
3092 INTEGER, DIMENSION(9) :: desca
3093#endif
3094
3095 a => fm_matrix%local_data
3096 a_sp => fm_matrix%local_data_sp
3097#if defined(__SCALAPACK)
3098 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3099 IF (fm_matrix%use_sp) THEN
3100 CALL pspotrf('U', n, a_sp(1, 1), 1, 1, desca, info)
3101 ELSE
3102 CALL pdpotrf('U', n, a(1, 1), 1, 1, desca, info)
3103 END IF
3104#else
3105 IF (fm_matrix%use_sp) THEN
3106 CALL spotrf('U', n, a_sp(1, 1), SIZE(a_sp, 1), info)
3107 ELSE
3108 CALL dpotrf('U', n, a(1, 1), SIZE(a, 1), info)
3109 END IF
3110#endif
3111 IF (info /= 0) &
3112 cpabort("Cholesky decomposition failed. Matrix ill conditioned ?")
3113
3114 END SUBROUTINE cp_fm_potrf
3115
3116! **************************************************************************************************
3117!> \brief Invert trianguar matrix
3118!> \param fm_matrix the matrix to invert (must be an upper triangular matrix)
3119!> \param n size of the matrix to invert
3120! **************************************************************************************************
3121 SUBROUTINE cp_fm_potri(fm_matrix, n)
3122 TYPE(cp_fm_type) :: fm_matrix
3123 INTEGER, INTENT(in) :: n
3124
3125 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3126 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3127 INTEGER :: info
3128#if defined(__SCALAPACK)
3129 INTEGER, DIMENSION(9) :: desca
3130#endif
3131
3132 a => fm_matrix%local_data
3133 a_sp => fm_matrix%local_data_sp
3134#if defined(__SCALAPACK)
3135 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3136 IF (fm_matrix%use_sp) THEN
3137 CALL pspotri('U', n, a_sp(1, 1), 1, 1, desca, info)
3138 ELSE
3139 CALL pdpotri('U', n, a(1, 1), 1, 1, desca, info)
3140 END IF
3141#else
3142 IF (fm_matrix%use_sp) THEN
3143 CALL spotri('U', n, a_sp(1, 1), SIZE(a_sp, 1), info)
3144 ELSE
3145 CALL dpotri('U', n, a(1, 1), SIZE(a, 1), info)
3146 END IF
3147#endif
3148 cpassert(info == 0)
3149 END SUBROUTINE cp_fm_potri
3150
3151! **************************************************************************************************
3152!> \brief ...
3153!> \param fm_matrix ...
3154!> \param neig ...
3155!> \param fm_matrixb ...
3156!> \param fm_matrixout ...
3157!> \param op ...
3158!> \param pos ...
3159!> \param transa ...
3160! **************************************************************************************************
3161 SUBROUTINE cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
3162 TYPE(cp_fm_type) :: fm_matrix
3163 TYPE(cp_fm_type) :: fm_matrixb
3164 TYPE(cp_fm_type) :: fm_matrixout
3165 INTEGER, INTENT(IN) :: neig
3166 CHARACTER(LEN=*), INTENT(IN) :: op
3167 CHARACTER(LEN=*), INTENT(IN) :: pos
3168 CHARACTER(LEN=*), INTENT(IN) :: transa
3169
3170 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, outm
3171 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, outm_sp
3172 INTEGER :: n, itype
3173 REAL(kind=dp) :: alpha
3174#if defined(__SCALAPACK)
3175 INTEGER :: i
3176 INTEGER, DIMENSION(9) :: desca, descb, descout
3177#endif
3178
3179 ! notice b is the cholesky guy
3180 a => fm_matrix%local_data
3181 b => fm_matrixb%local_data
3182 outm => fm_matrixout%local_data
3183 a_sp => fm_matrix%local_data_sp
3184 b_sp => fm_matrixb%local_data_sp
3185 outm_sp => fm_matrixout%local_data_sp
3186
3187 n = fm_matrix%matrix_struct%nrow_global
3188 itype = 1
3189
3190#if defined(__SCALAPACK)
3191 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3192 descb(:) = fm_matrixb%matrix_struct%descriptor(:)
3193 descout(:) = fm_matrixout%matrix_struct%descriptor(:)
3194 alpha = 1.0_dp
3195 DO i = 1, neig
3196 IF (fm_matrix%use_sp) THEN
3197 CALL pscopy(n, a_sp(1, 1), 1, i, desca, 1, outm_sp(1, 1), 1, i, descout, 1)
3198 ELSE
3199 CALL pdcopy(n, a(1, 1), 1, i, desca, 1, outm(1, 1), 1, i, descout, 1)
3200 END IF
3201 END DO
3202 IF (op .EQ. "SOLVE") THEN
3203 IF (fm_matrix%use_sp) THEN
3204 CALL pstrsm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), 1, 1, descb, &
3205 outm_sp(1, 1), 1, 1, descout)
3206 ELSE
3207 CALL pdtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
3208 END IF
3209 ELSE
3210 IF (fm_matrix%use_sp) THEN
3211 CALL pstrmm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), 1, 1, descb, &
3212 outm_sp(1, 1), 1, 1, descout)
3213 ELSE
3214 CALL pdtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
3215 END IF
3216 END IF
3217#else
3218 alpha = 1.0_dp
3219 IF (fm_matrix%use_sp) THEN
3220 CALL scopy(neig*n, a_sp(1, 1), 1, outm_sp(1, 1), 1)
3221 ELSE
3222 CALL dcopy(neig*n, a(1, 1), 1, outm(1, 1), 1)
3223 END IF
3224 IF (op .EQ. "SOLVE") THEN
3225 IF (fm_matrix%use_sp) THEN
3226 CALL strsm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), SIZE(b_sp, 1), outm_sp(1, 1), n)
3227 ELSE
3228 CALL dtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), SIZE(b, 1), outm(1, 1), n)
3229 END IF
3230 ELSE
3231 IF (fm_matrix%use_sp) THEN
3232 CALL strmm(pos, 'U', transa, 'N', n, neig, real(alpha, sp), b_sp(1, 1), n, outm_sp(1, 1), n)
3233 ELSE
3234 CALL dtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), n, outm(1, 1), n)
3235 END IF
3236 END IF
3237#endif
3238
3239 END SUBROUTINE cp_fm_cholesky_restore
3240
3241END MODULE cp_fm_basic_linalg
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
methods related to the blacs parallel environment
basic linear algebra operations for full matrices
subroutine, public cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_fm_row_scale(matrixa, scaling)
scales row i of matrix a with scaling(i)
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_fm_potrf(fm_matrix, n)
Cholesky decomposition.
subroutine, public cp_fm_solve(matrix_a, general_a)
computes the the solution to A*b=A_general using lu decomposition pay attention, both matrices are ov...
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_det(matrix_a, det_a)
Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix.
subroutine, public cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col)
performs a QR factorization of the input rectangular matrix A or of a submatrix of A the computed upp...
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_gram_schmidt_orthonorm(matrix_a, b, nrows, ncols, start_row, start_col, do_norm, do_print)
Orthonormalizes selected rows and columns of a full matrix, matrix_a.
subroutine, public cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
performs a rank-k update of a symmetric matrix_c matrix_c = beta * matrix_c + alpha * matrix_a * tran...
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_upper_to_full(matrix, work)
given an upper triangular matrix computes the corresponding full matrix
subroutine, public cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
computes the schur product of two matrices c_ij = a_ij * b_ij
real(kind=dp) function, public cp_fm_norm(matrix, mode)
norm of matrix using (p)dlange
subroutine, public cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
...
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix.
subroutine, public cp_complex_fm_gemm(transa, transb, m, n, k, alpha, a_re, a_im, b_re, b_im, beta, c_re, c_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
Convenience function. Computes the matrix multiplications needed for the multiplication of complex ma...
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_invert(matrix_a, uplo_tr)
inverts a triangular matrix
subroutine, public cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b computes matrix_c = beta * matrix_c...
subroutine, public cp_fm_potri(fm_matrix, n)
Invert trianguar matrix.
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
represent the structure of a full matrix
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:332
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
subroutine, public get_pseudo_inverse_svd(a, a_pinverse, rskip, determinant, sval)
returns the pseudoinverse of a real, square matrix using singular value decomposition
Definition mathlib.F:949
subroutine, public diag(n, a, d, v)
Diagonalize matrix a. The eigenvalues are returned in vector d and the eigenvectors are returned in m...
Definition mathlib.F:1507
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
just to build arrays of pointers to matrices
represent a full matrix