(git:9ea9339)
Loading...
Searching...
No Matches
cp_fm_basic_linalg.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Basic linear algebra operations for full matrices.
10!> \par History
11!> 08.2002 split out of qs_blacs [fawzi]
12!> \author Fawzi Mohamed
13! **************************************************************************************************
17 USE cp_fm_types, ONLY: &
25 USE kinds, ONLY: dp, &
26 int_8, &
27 sp
28 USE machine, ONLY: m_memory
32#if defined (__HAS_IEEE_EXCEPTIONS)
33 USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
34 ieee_set_halting_mode, &
35 ieee_all
36#endif
37#include "../base/base_uses.f90"
38
39 IMPLICIT NONE
40 PRIVATE
41
42 LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .true.
43 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
44
45 PUBLIC :: cp_fm_scale, & ! scale a matrix
46 cp_fm_scale_and_add, & ! scale and add two matrices
47 cp_fm_geadd, & ! general addition
48 cp_fm_add_columns, & ! daxpy type column add
49 cp_fm_column_scale, & ! scale columns of a matrix
50 cp_fm_row_scale, & ! scale rows of a matrix
51 cp_fm_trace, & ! trace of the transpose(A)*B
52 cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
53 cp_fm_norm, & ! different norms of A
54 cp_fm_schur_product, & ! schur product
55 cp_fm_transpose, & ! transpose a matrix
56 cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
57 cp_fm_syrk, & ! rank k update
58 cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
59 cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
60 cp_fm_gemm, & ! multiply two matrices
61 cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
62 cp_fm_invert, & ! computes the inverse and determinant
63 cp_fm_frobenius_norm, & ! frobenius norm
64 cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
65 cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
66 cp_fm_solve, & ! solves the equation A*B=C A and C are input
67 cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
68 cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
69 cp_fm_potrf, & ! Cholesky decomposition
70 cp_fm_potri, & ! Invert triangular matrix
71 cp_fm_rot_rows, & ! rotates two rows
72 cp_fm_rot_cols, & ! rotates two columns
73 cp_fm_gram_schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
74 cp_fm_det, & ! determinant of a real matrix with correct sign
75 cp_fm_matvec ! matrix-vector multiplication (vector replicated)
76
77 REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
78 REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
79
80 INTERFACE cp_fm_trace
81 MODULE PROCEDURE cp_fm_trace_a0b0t0
82 MODULE PROCEDURE cp_fm_trace_a1b0t1_a
83 MODULE PROCEDURE cp_fm_trace_a1b0t1_p
84 MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
85 MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
86 MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
87 MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
88 END INTERFACE cp_fm_trace
89
91 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
92 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
93 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
94 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
95 END INTERFACE cp_fm_contracted_trace
96CONTAINS
97
98! **************************************************************************************************
99!> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
100!> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
101! **************************************************************************************************
102 SUBROUTINE cp_fm_det(matrix_a, det_a)
103
104 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
105 REAL(kind=dp), INTENT(OUT) :: det_a
106 REAL(kind=dp) :: determinant
107 TYPE(cp_fm_type) :: matrix_lu
108 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
109 INTEGER :: n, i, info, p
110 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
111 REAL(kind=dp), DIMENSION(:), POINTER :: diag
112
113#if defined(__parallel)
114 INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
115 INTEGER, DIMENSION(9) :: desca
116#endif
117
118 CALL cp_fm_create(matrix=matrix_lu, &
119 matrix_struct=matrix_a%matrix_struct, &
120 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
121 CALL cp_fm_to_fm(matrix_a, matrix_lu)
122
123 a => matrix_lu%local_data
124 n = matrix_lu%matrix_struct%nrow_global
125 ALLOCATE (ipivot(n))
126 ipivot(:) = 0
127 p = 0
128 ALLOCATE (diag(n))
129 diag(:) = 0.0_dp
130#if defined(__parallel)
131 ! Use LU decomposition
132 desca(:) = matrix_lu%matrix_struct%descriptor(:)
133 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
134 CALL cp_fm_get_diag(matrix_lu, diag)
135 determinant = product(diag)
136 myprow = matrix_lu%matrix_struct%context%mepos(1)
137 nprow = matrix_lu%matrix_struct%context%num_pe(1)
138 npcol = matrix_lu%matrix_struct%context%num_pe(2)
139 nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
140 nrow_block = matrix_lu%matrix_struct%nrow_block
141 DO irow_local = 1, nrow_local
142 i = matrix_lu%matrix_struct%row_indices(irow_local)
143 IF (ipivot(irow_local) /= i) p = p + 1
144 END DO
145 CALL matrix_lu%matrix_struct%para_env%sum(p)
146 ! very important fix
147 p = p/npcol
148#else
149 CALL dgetrf(n, n, a, n, ipivot, info)
150 CALL cp_fm_get_diag(matrix_lu, diag)
151 determinant = product(diag)
152 DO i = 1, n
153 IF (ipivot(i) /= i) p = p + 1
154 END DO
155#endif
156 DEALLOCATE (ipivot)
157 DEALLOCATE (diag)
158 CALL cp_fm_release(matrix_lu)
159 det_a = determinant*(-2*mod(p, 2) + 1.0_dp)
160 END SUBROUTINE cp_fm_det
161
162! **************************************************************************************************
163!> \brief calc A <- alpha*A + beta*B
164!> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
165!> scale A)
166!> \param alpha ...
167!> \param matrix_a ...
168!> \param beta ...
169!> \param matrix_b ...
170! **************************************************************************************************
171 SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
172
173 REAL(kind=dp), INTENT(IN) :: alpha
174 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
175 REAL(kind=dp), INTENT(IN), OPTIONAL :: beta
176 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
177
178 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale_and_add'
179
180 INTEGER :: handle, size_a, size_b
181 REAL(kind=dp) :: my_beta
182 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
183 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
184
185 CALL timeset(routinen, handle)
186
187 my_beta = 0.0_dp
188 IF (PRESENT(matrix_b)) my_beta = 1.0_dp
189 IF (PRESENT(beta)) my_beta = beta
190 NULLIFY (a, b)
191
192 IF (PRESENT(beta)) THEN
193 cpassert(PRESENT(matrix_b))
194 IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
195 cpwarn("Bad use of routine. Call cp_fm_scale instead")
196 CALL cp_fm_scale(alpha + beta, matrix_a)
197 CALL timestop(handle)
198 RETURN
199 END IF
200 END IF
201
202 a => matrix_a%local_data
203 a_sp => matrix_a%local_data_sp
204
205 IF (matrix_a%use_sp) THEN
206 size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
207 ELSE
208 size_a = SIZE(a, 1)*SIZE(a, 2)
209 END IF
210
211 IF (alpha /= 1.0_dp) THEN
212 IF (matrix_a%use_sp) THEN
213 CALL sscal(size_a, real(alpha, sp), a_sp, 1)
214 ELSE
215 CALL dscal(size_a, alpha, a, 1)
216 END IF
217 END IF
218 IF (my_beta /= 0.0_dp) THEN
219 IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
220 cpabort("Matrices must be in the same blacs context")
221
222 IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
223 matrix_b%matrix_struct)) THEN
224
225 b => matrix_b%local_data
226 b_sp => matrix_b%local_data_sp
227 IF (matrix_b%use_sp) THEN
228 size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
229 ELSE
230 size_b = SIZE(b, 1)*SIZE(b, 2)
231 END IF
232 IF (size_a /= size_b) &
233 cpabort("Matrices must have same local sizes")
234
235 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
236 CALL saxpy(size_a, real(my_beta, sp), b_sp, 1, a_sp, 1)
237 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
238 CALL saxpy(size_a, real(my_beta, sp), real(b, sp), 1, a_sp, 1)
239 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
240 CALL daxpy(size_a, my_beta, real(b_sp, dp), 1, a, 1)
241 ELSE
242 CALL daxpy(size_a, my_beta, b, 1, a, 1)
243 END IF
244
245 ELSE
246#ifdef __parallel
247 cpabort("to do (pdscal,pdcopy,pdaxpy)")
248#else
249 cpabort("")
250#endif
251 END IF
252
253 END IF
254
255 CALL timestop(handle)
256
257 END SUBROUTINE cp_fm_scale_and_add
258
259! **************************************************************************************************
260!> \brief interface to BLACS geadd:
261!> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
262!> where opt(matrix_a) can be either:
263!> 'N': matrix_a
264!> 'T': matrix_a^T
265!> 'C': matrix_a^H (Hermitian conjugate)
266!> note that this is a level three routine, use cp_fm_scale_and_add if that
267!> is sufficient for your needs
268!> \param alpha : complex scalar
269!> \param trans : 'N' normal, 'T' transposed
270!> \param matrix_a : input matrix_a
271!> \param beta : complex scalar
272!> \param matrix_b : input matrix_b, upon out put the updated matrix_b
273!> \author Lianheng Tong
274! **************************************************************************************************
275 SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
276 REAL(kind=dp), INTENT(IN) :: alpha, beta
277 CHARACTER, INTENT(IN) :: trans
278 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
279
280 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geadd'
281
282 INTEGER :: nrow_global, ncol_global, handle
283 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
284#if defined(__parallel)
285 INTEGER, DIMENSION(9) :: desca, descb
286#elif !defined(__MKL)
287 INTEGER :: ii, jj
288#endif
289
290 CALL timeset(routinen, handle)
291
292 nrow_global = matrix_a%matrix_struct%nrow_global
293 ncol_global = matrix_a%matrix_struct%ncol_global
294 cpassert(nrow_global == matrix_b%matrix_struct%nrow_global)
295 cpassert(ncol_global == matrix_b%matrix_struct%ncol_global)
296
297 aa => matrix_a%local_data
298 bb => matrix_b%local_data
299
300#if defined(__parallel)
301 desca = matrix_a%matrix_struct%descriptor
302 descb = matrix_b%matrix_struct%descriptor
303 CALL pdgeadd(trans, &
304 nrow_global, &
305 ncol_global, &
306 alpha, &
307 aa, &
308 1, 1, &
309 desca, &
310 beta, &
311 bb, &
312 1, 1, &
313 descb)
314#elif defined(__MKL)
315 CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
316 alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
317#else
318 ! dgeadd is not a standard BLAS function, although it is implemented
319 ! in some libraries like OpenBLAS, so not going to use it here
320 SELECT CASE (trans)
321 CASE ('T')
322 DO jj = 1, ncol_global
323 DO ii = 1, nrow_global
324 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
325 END DO
326 END DO
327 CASE DEFAULT
328 DO jj = 1, ncol_global
329 DO ii = 1, nrow_global
330 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
331 END DO
332 END DO
333 END SELECT
334#endif
335
336 CALL timestop(handle)
337
338 END SUBROUTINE cp_fm_geadd
339
340! **************************************************************************************************
341!> \brief Add (and scale) a subset of columns of a fm to a fm
342!> b = alpha*a + b
343!> \param msource ...
344!> \param mtarget ...
345!> \param ncol ...
346!> \param alpha ...
347!> \param source_start ...
348!> \param target_start ...
349! **************************************************************************************************
350 SUBROUTINE cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
351
352 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
353 INTEGER, INTENT(IN) :: ncol
354 REAL(kind=dp), INTENT(IN), OPTIONAL :: alpha
355 INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
356
357 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_add_columns'
358
359 INTEGER :: handle, n, ss, ts, i
360 REAL(kind=dp) :: fscale
361 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
362#if defined(__parallel)
363 INTEGER, DIMENSION(9) :: desca, descb
364#endif
365
366 CALL timeset(routinen, handle)
367
368 ss = 1
369 ts = 1
370
371 IF (PRESENT(source_start)) ss = source_start
372 IF (PRESENT(target_start)) ts = target_start
373
374 fscale = 1.0_dp
375 IF (PRESENT(alpha)) fscale = alpha
376
377 n = msource%matrix_struct%nrow_global
378
379 a => msource%local_data
380 b => mtarget%local_data
381
382#if defined(__parallel)
383 mark_used(i)
384 desca(:) = msource%matrix_struct%descriptor(:)
385 descb(:) = mtarget%matrix_struct%descriptor(:)
386 CALL pdgeadd("N", n, ncol, fscale, a, 1, ss, desca, 1.0_dp, b, 1, ts, descb)
387#else
388 DO i = 0, ncol - 1
389 b(1:n, ts + i) = b(1:n, ts + i) + fscale*a(1:n, ss + i)
390 END DO
391#endif
392
393 CALL timestop(handle)
394
395 END SUBROUTINE cp_fm_add_columns
396
397! **************************************************************************************************
398!> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
399!> IMPORTANT : the sign of the determinant is not defined correctly yet ....
400!> \param matrix_a ...
401!> \param almost_determinant ...
402!> \param correct_sign ...
403!> \par History
404!> added correct_sign 02.07 (fschiff)
405!> \author Joost VandeVondele
406!> \note
407!> - matrix_a is overwritten
408!> - the sign of the determinant might be wrong
409!> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
410!> - one should be able to find out if ipivot is an even or an odd permutation...
411!> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
412!> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
413! **************************************************************************************************
414 SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
415 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
416 REAL(kind=dp), INTENT(OUT) :: almost_determinant
417 LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
418
419 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_decompose'
420
421 INTEGER :: handle, i, info, n
422 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
423 REAL(kind=dp) :: determinant
424 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
425#if defined(__parallel)
426 INTEGER, DIMENSION(9) :: desca
427 REAL(kind=dp), DIMENSION(:), POINTER :: diag
428#else
429 INTEGER :: lda
430#endif
431
432 CALL timeset(routinen, handle)
433
434 a => matrix_a%local_data
435 n = matrix_a%matrix_struct%nrow_global
436 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
437
438#if defined(__parallel)
439 mark_used(correct_sign)
440 desca(:) = matrix_a%matrix_struct%descriptor(:)
441 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
442
443 ALLOCATE (diag(n))
444 diag(:) = 0.0_dp
445 CALL cp_fm_get_diag(matrix_a, diag)
446 determinant = 1.0_dp
447 DO i = 1, n
448 determinant = determinant*diag(i)
449 END DO
450 DEALLOCATE (diag)
451#else
452 lda = SIZE(a, 1)
453 CALL dgetrf(n, n, a, lda, ipivot, info)
454 determinant = 1.0_dp
455 IF (correct_sign) THEN
456 DO i = 1, n
457 IF (ipivot(i) /= i) THEN
458 determinant = -determinant*a(i, i)
459 ELSE
460 determinant = determinant*a(i, i)
461 END IF
462 END DO
463 ELSE
464 DO i = 1, n
465 determinant = determinant*a(i, i)
466 END DO
467 END IF
468#endif
469 ! info is allowed to be zero
470 ! this does just signal a zero diagonal element
471 DEALLOCATE (ipivot)
472 almost_determinant = determinant ! notice that the sign is random
473 CALL timestop(handle)
474 END SUBROUTINE cp_fm_lu_decompose
475
476! **************************************************************************************************
477!> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
478!> \param transa : 'N' -> normal 'T' -> transpose
479!> alpha,beta :: can be 0.0_dp and 1.0_dp
480!> \param transb ...
481!> \param m ...
482!> \param n ...
483!> \param k ...
484!> \param alpha ...
485!> \param matrix_a : m x k matrix ( ! for transa = 'N')
486!> \param matrix_b : k x n matrix ( ! for transb = 'N')
487!> \param beta ...
488!> \param matrix_c : m x n matrix
489!> \param a_first_col ...
490!> \param a_first_row ...
491!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
492!> \param b_first_row ...
493!> \param c_first_col ...
494!> \param c_first_row ...
495!> \author Matthias Krack
496!> \note
497!> matrix_c should have no overlap with matrix_a, matrix_b
498! **************************************************************************************************
499 SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
500 matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
501 c_first_col, c_first_row)
502
503 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
504 INTEGER, INTENT(IN) :: m, n, k
505 REAL(kind=dp), INTENT(IN) :: alpha
506 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
507 REAL(kind=dp), INTENT(IN) :: beta
508 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
509 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
510 b_first_col, b_first_row, &
511 c_first_col, c_first_row
512
513 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_gemm'
514
515 INTEGER :: handle, i_a, i_b, i_c, j_a, &
516 j_b, j_c
517 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
518 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
519#if defined(__parallel)
520 INTEGER, DIMENSION(9) :: desca, descb, descc
521#else
522 INTEGER :: lda, ldb, ldc
523#endif
524
525 CALL timeset(routinen, handle)
526
527 !sample peak memory
528 CALL m_memory()
529
530 a => matrix_a%local_data
531 b => matrix_b%local_data
532 c => matrix_c%local_data
533
534 a_sp => matrix_a%local_data_sp
535 b_sp => matrix_b%local_data_sp
536 c_sp => matrix_c%local_data_sp
537
538 i_a = 1
539 IF (PRESENT(a_first_row)) i_a = a_first_row
540
541 j_a = 1
542 IF (PRESENT(a_first_col)) j_a = a_first_col
543
544 i_b = 1
545 IF (PRESENT(b_first_row)) i_b = b_first_row
546
547 j_b = 1
548 IF (PRESENT(b_first_col)) j_b = b_first_col
549
550 i_c = 1
551 IF (PRESENT(c_first_row)) i_c = c_first_row
552
553 j_c = 1
554 IF (PRESENT(c_first_col)) j_c = c_first_col
555
556#if defined(__parallel)
557
558 desca(:) = matrix_a%matrix_struct%descriptor(:)
559 descb(:) = matrix_b%matrix_struct%descriptor(:)
560 descc(:) = matrix_c%matrix_struct%descriptor(:)
561
562 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
563
564 CALL psgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
565 descb, real(beta, sp), c_sp(1, 1), i_c, j_c, descc)
566
567 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
568
569 CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
570 descb, beta, c, i_c, j_c, descc)
571
572 ELSE
573 cpabort("Mixed precision gemm NYI")
574 END IF
575#else
576
577 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
578
579 lda = SIZE(a_sp, 1)
580 ldb = SIZE(b_sp, 1)
581 ldc = SIZE(c_sp, 1)
582
583 CALL sgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
584 REAL(beta, sp), c_sp(i_c, j_c), ldc)
585
586 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
587
588 lda = SIZE(a, 1)
589 ldb = SIZE(b, 1)
590 ldc = SIZE(c, 1)
591
592 CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
593
594 ELSE
595 cpabort("Mixed precision gemm NYI")
596 END IF
597
598#endif
599 CALL timestop(handle)
600
601 END SUBROUTINE cp_fm_gemm
602
603! **************************************************************************************************
604!> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
605!> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
606!> where matrix_a is symmetric
607!> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
608!> alpha,beta :: can be 0.0_dp and 1.0_dp
609!> \param uplo triangular format
610!> \param m ...
611!> \param n ...
612!> \param alpha ...
613!> \param matrix_a : m x m matrix
614!> \param matrix_b : m x n matrix
615!> \param beta ...
616!> \param matrix_c : m x n matrix
617!> \author Matthias Krack
618!> \note
619!> matrix_c should have no overlap with matrix_a, matrix_b
620!> all matrices in QS are triangular according to uplo
621!> matrix_a is always an m x m matrix
622!> typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
623! **************************************************************************************************
624 SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
625
626 CHARACTER(LEN=1), INTENT(IN) :: side, uplo
627 INTEGER, INTENT(IN) :: m, n
628 REAL(kind=dp), INTENT(IN) :: alpha
629 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
630 REAL(kind=dp), INTENT(IN) :: beta
631 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
632
633 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_symm'
634
635 INTEGER :: handle
636 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
637#if defined(__parallel)
638 INTEGER, DIMENSION(9) :: desca, descb, descc
639#else
640 INTEGER :: lda, ldb, ldc
641#endif
642
643 CALL timeset(routinen, handle)
644
645 a => matrix_a%local_data
646 b => matrix_b%local_data
647 c => matrix_c%local_data
648
649#if defined(__parallel)
650
651 desca(:) = matrix_a%matrix_struct%descriptor(:)
652 descb(:) = matrix_b%matrix_struct%descriptor(:)
653 descc(:) = matrix_c%matrix_struct%descriptor(:)
654
655 CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
656
657#else
658
659 lda = matrix_a%matrix_struct%local_leading_dimension
660 ldb = matrix_b%matrix_struct%local_leading_dimension
661 ldc = matrix_c%matrix_struct%local_leading_dimension
662
663 CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
664
665#endif
666 CALL timestop(handle)
667
668 END SUBROUTINE cp_fm_symm
669
670! **************************************************************************************************
671!> \brief computes the Frobenius norm of matrix_a
672!> \brief computes the Frobenius norm of matrix_a
673!> \param matrix_a : m x n matrix
674!> \return ...
675!> \author VW
676! **************************************************************************************************
677 FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
678 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
679 REAL(kind=dp) :: norm
680
681 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_frobenius_norm'
682
683 INTEGER :: handle, size_a
684 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
685 REAL(kind=dp), EXTERNAL :: ddot
686#if defined(__parallel)
687 TYPE(mp_comm_type) :: group
688#endif
689
690 CALL timeset(routinen, handle)
691
692 norm = 0.0_dp
693 a => matrix_a%local_data
694 size_a = SIZE(a, 1)*SIZE(a, 2)
695 norm = ddot(size_a, a(1, 1), 1, a(1, 1), 1)
696#if defined(__parallel)
697 group = matrix_a%matrix_struct%para_env
698 CALL group%sum(norm)
699#endif
700 norm = sqrt(norm)
701
702 CALL timestop(handle)
703
704 END FUNCTION cp_fm_frobenius_norm
705
706! **************************************************************************************************
707!> \brief performs a rank-k update of a symmetric matrix_c
708!> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
709!> \param uplo : 'U' ('L')
710!> \param trans : 'N' ('T')
711!> \param k : number of cols to use in matrix_a
712!> ia,ja :: 1,1 (could be used for selecting subblock of a)
713!> \param alpha ...
714!> \param matrix_a ...
715!> \param ia ...
716!> \param ja ...
717!> \param beta ...
718!> \param matrix_c ...
719!> \author Matthias Krack
720! **************************************************************************************************
721 SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
722 CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
723 INTEGER, INTENT(IN) :: k
724 REAL(kind=dp), INTENT(IN) :: alpha
725 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
726 INTEGER, INTENT(IN) :: ia, ja
727 REAL(kind=dp), INTENT(IN) :: beta
728 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
729
730 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_syrk'
731
732 INTEGER :: handle, n
733 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
734#if defined(__parallel)
735 INTEGER, DIMENSION(9) :: desca, descc
736#else
737 INTEGER :: lda, ldc
738#endif
739#if defined (__HAS_IEEE_EXCEPTIONS)
740 LOGICAL, DIMENSION(5) :: halt
741#endif
742
743 CALL timeset(routinen, handle)
744
745 n = matrix_c%matrix_struct%nrow_global
746
747 a => matrix_a%local_data
748 c => matrix_c%local_data
749
750#if defined (__HAS_IEEE_EXCEPTIONS)
751 CALL ieee_get_halting_mode(ieee_all, halt)
752 CALL ieee_set_halting_mode(ieee_all, .false.)
753#endif
754#if defined(__parallel)
755 desca(:) = matrix_a%matrix_struct%descriptor(:)
756 descc(:) = matrix_c%matrix_struct%descriptor(:)
757
758 CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
759#else
760 lda = SIZE(a, 1)
761 ldc = SIZE(c, 1)
762
763 CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
764#endif
765#if defined (__HAS_IEEE_EXCEPTIONS)
766 CALL ieee_set_halting_mode(ieee_all, halt)
767#endif
768 CALL timestop(handle)
769
770 END SUBROUTINE cp_fm_syrk
771
772! **************************************************************************************************
773!> \brief computes the schur product of two matrices
774!> c_ij = a_ij * b_ij
775!> \param matrix_a ...
776!> \param matrix_b ...
777!> \param matrix_c ...
778!> \author Joost VandeVondele
779! **************************************************************************************************
780 SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
781
782 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
783
784 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_schur_product'
785
786 INTEGER :: handle, icol_local, irow_local, mypcol, &
787 myprow, ncol_local, &
788 nrow_local
789 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
790 TYPE(cp_blacs_env_type), POINTER :: context
791
792 CALL timeset(routinen, handle)
793
794 context => matrix_a%matrix_struct%context
795 myprow = context%mepos(1)
796 mypcol = context%mepos(2)
797
798 a => matrix_a%local_data
799 b => matrix_b%local_data
800 c => matrix_c%local_data
801
802 nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
803 ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
804
805 DO icol_local = 1, ncol_local
806 DO irow_local = 1, nrow_local
807 c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
808 END DO
809 END DO
810
811 CALL timestop(handle)
812
813 END SUBROUTINE cp_fm_schur_product
814
815! **************************************************************************************************
816!> \brief returns the trace of matrix_a^T matrix_b, i.e
817!> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
818!> \param matrix_a a matrix
819!> \param matrix_b another matrix
820!> \param trace ...
821!> \par History
822!> 11.06.2001 Creation (Matthias Krack)
823!> 12.2002 added doc [fawzi]
824!> \author Matthias Krack
825!> \note
826!> note the transposition of matrix_a!
827! **************************************************************************************************
828 SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
829
830 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
831 REAL(KIND=dp), INTENT(OUT) :: trace
832
833 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
834
835 INTEGER :: handle, mypcol, myprow, ncol_local, &
836 nrow_local
837 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
838 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
839 TYPE(cp_blacs_env_type), POINTER :: context
840 TYPE(mp_comm_type) :: group
841
842 CALL timeset(routinen, handle)
843
844 context => matrix_a%matrix_struct%context
845 myprow = context%mepos(1)
846 mypcol = context%mepos(2)
847
848 group = matrix_a%matrix_struct%para_env
849
850 a => matrix_a%local_data
851 b => matrix_b%local_data
852
853 a_sp => matrix_a%local_data_sp
854 b_sp => matrix_b%local_data_sp
855
856 nrow_local = min(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
857 ncol_local = min(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
858
859 ! cries for an accurate_dot_product
860 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
861 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local)* &
862 b_sp(1:nrow_local, 1:ncol_local), dp))
863 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
864 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local), dp)* &
865 b(1:nrow_local, 1:ncol_local))
866 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
867 trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
868 REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
869 ELSE
870 trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
871 b(1:nrow_local, 1:ncol_local))
872 END IF
873
874 CALL group%sum(trace)
875
876 CALL timestop(handle)
877
878 END SUBROUTINE cp_fm_trace_a0b0t0
879
880
881! **************************************************************************************************
882!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
883!> \param matrix_a list of A matrices
884!> \param matrix_b B matrix
885!> \param trace computed traces
886!> \par History
887!> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
888!> \note \parblock
889!> Computing the trace requires collective communication between involved MPI processes
890!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
891!> the amount of time wasted in such synchronisation by performing one large collective
892!> operation which involves all the matrices in question.
893!>
894!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
895!> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
896!> 'matrix_b' is a single matrix.
897!> \endparblock
898! **************************************************************************************************
899 SUBROUTINE cp_fm_trace_a1b0t1_a (matrix_a, matrix_b, trace)
900 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
901 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
902 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
903
904 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_a'
905
906 INTEGER :: handle, imatrix, n_matrices, &
907 ncols_local, nrows_local
908 LOGICAL :: use_sp_a, use_sp_b
909 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
910 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
911 TYPE(mp_comm_type) :: group
912
913 CALL timeset(routinen, handle)
914
915 n_matrices = SIZE(trace)
916 cpassert(SIZE(matrix_a) == n_matrices)
917
918 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
919 use_sp_b = matrix_b%use_sp
920
921 IF (use_sp_b) THEN
922 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
923 ELSE
924 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
925 END IF
926
927!$OMP PARALLEL DO DEFAULT(NONE), &
928!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
929!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
930!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
931
932 DO imatrix = 1, n_matrices
933
934 use_sp_a = matrix_a(imatrix) %use_sp
935
936 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
937 IF (use_sp_a .AND. use_sp_b) THEN
938 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
939 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
940 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
941 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
942 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
943 ELSE
944 cpabort("Matrices A and B are of different types")
945 END IF
946 END DO
947!$OMP END PARALLEL DO
948
949 group = matrix_b%matrix_struct%para_env
950 CALL group%sum(trace)
951
952 CALL timestop(handle)
953 END SUBROUTINE cp_fm_trace_a1b0t1_a
954 SUBROUTINE cp_fm_trace_a1b0t1_p (matrix_a, matrix_b, trace)
955 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
956 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
957 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
958
959 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_p'
960
961 INTEGER :: handle, imatrix, n_matrices, &
962 ncols_local, nrows_local
963 LOGICAL :: use_sp_a, use_sp_b
964 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
965 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
966 TYPE(mp_comm_type) :: group
967
968 CALL timeset(routinen, handle)
969
970 n_matrices = SIZE(trace)
971 cpassert(SIZE(matrix_a) == n_matrices)
972
973 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
974 use_sp_b = matrix_b%use_sp
975
976 IF (use_sp_b) THEN
977 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
978 ELSE
979 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
980 END IF
981
982!$OMP PARALLEL DO DEFAULT(NONE), &
983!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
984!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
985!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
986
987 DO imatrix = 1, n_matrices
988
989 use_sp_a = matrix_a(imatrix) %matrix%use_sp
990
991 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
992 IF (use_sp_a .AND. use_sp_b) THEN
993 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
994 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
995 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
996 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
997 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
998 ELSE
999 cpabort("Matrices A and B are of different types")
1000 END IF
1001 END DO
1002!$OMP END PARALLEL DO
1003
1004 group = matrix_b%matrix_struct%para_env
1005 CALL group%sum(trace)
1006
1007 CALL timestop(handle)
1008 END SUBROUTINE cp_fm_trace_a1b0t1_p
1009
1010! **************************************************************************************************
1011!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
1012!> \param matrix_a list of A matrices
1013!> \param matrix_b list of B matrices
1014!> \param trace computed traces
1015!> \param accurate ...
1016!> \par History
1017!> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
1018!> \note \parblock
1019!> Computing the trace requires collective communication between involved MPI processes
1020!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
1021!> the amount of time wasted in such synchronisation by performing one large collective
1022!> operation which involves all the matrices in question.
1023!>
1024!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
1025!> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
1026!> \endparblock
1027! **************************************************************************************************
1028 SUBROUTINE cp_fm_trace_a1b1t1_aa (matrix_a, matrix_b, trace, accurate)
1029 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
1030 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_b
1031 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1032 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1033
1034 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_aa'
1035
1036 INTEGER :: handle, imatrix, n_matrices, &
1037 ncols_local, nrows_local
1038 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1039 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1040 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1041 TYPE(mp_comm_type) :: group
1042
1043 CALL timeset(routinen, handle)
1044
1045 n_matrices = SIZE(trace)
1046 cpassert(SIZE(matrix_a) == n_matrices)
1047 cpassert(SIZE(matrix_b) == n_matrices)
1048
1049 use_accurate_sum = .true.
1050 IF (PRESENT(accurate)) use_accurate_sum = accurate
1051
1052!$OMP PARALLEL DO DEFAULT(NONE), &
1053!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1054!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1055!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1056 DO imatrix = 1, n_matrices
1057 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1058
1059 use_sp_a = matrix_a(imatrix) %use_sp
1060 use_sp_b = matrix_b(imatrix) %use_sp
1061
1062 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1063 IF (use_sp_a .AND. use_sp_b) THEN
1064 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1065 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1066 IF (use_accurate_sum) THEN
1067 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1068 ELSE
1069 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1070 END IF
1071 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1072 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1073 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1074 IF (use_accurate_sum) THEN
1075 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1076 ELSE
1077 trace(imatrix) = sum(ldata_a*ldata_b)
1078 END IF
1079 ELSE
1080 cpabort("Matrices A and B are of different types")
1081 END IF
1082 END DO
1083!$OMP END PARALLEL DO
1084
1085 group = matrix_a(1) %matrix_struct%para_env
1086 CALL group%sum(trace)
1087
1088 CALL timestop(handle)
1089 END SUBROUTINE cp_fm_trace_a1b1t1_aa
1090 SUBROUTINE cp_fm_trace_a1b1t1_ap (matrix_a, matrix_b, trace, accurate)
1091 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
1092 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_b
1093 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1094 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1095
1096 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_ap'
1097
1098 INTEGER :: handle, imatrix, n_matrices, &
1099 ncols_local, nrows_local
1100 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1101 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1102 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1103 TYPE(mp_comm_type) :: group
1104
1105 CALL timeset(routinen, handle)
1106
1107 n_matrices = SIZE(trace)
1108 cpassert(SIZE(matrix_a) == n_matrices)
1109 cpassert(SIZE(matrix_b) == n_matrices)
1110
1111 use_accurate_sum = .true.
1112 IF (PRESENT(accurate)) use_accurate_sum = accurate
1113
1114!$OMP PARALLEL DO DEFAULT(NONE), &
1115!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1116!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1117!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1118 DO imatrix = 1, n_matrices
1119 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1120
1121 use_sp_a = matrix_a(imatrix) %use_sp
1122 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1123
1124 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1125 IF (use_sp_a .AND. use_sp_b) THEN
1126 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1127 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1128 IF (use_accurate_sum) THEN
1129 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1130 ELSE
1131 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1132 END IF
1133 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1134 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1135 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1136 IF (use_accurate_sum) THEN
1137 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1138 ELSE
1139 trace(imatrix) = sum(ldata_a*ldata_b)
1140 END IF
1141 ELSE
1142 cpabort("Matrices A and B are of different types")
1143 END IF
1144 END DO
1145!$OMP END PARALLEL DO
1146
1147 group = matrix_a(1) %matrix_struct%para_env
1148 CALL group%sum(trace)
1149
1150 CALL timestop(handle)
1151 END SUBROUTINE cp_fm_trace_a1b1t1_ap
1152 SUBROUTINE cp_fm_trace_a1b1t1_pa (matrix_a, matrix_b, trace, accurate)
1153 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
1154 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_b
1155 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1156 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1157
1158 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pa'
1159
1160 INTEGER :: handle, imatrix, n_matrices, &
1161 ncols_local, nrows_local
1162 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1163 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1164 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1165 TYPE(mp_comm_type) :: group
1166
1167 CALL timeset(routinen, handle)
1168
1169 n_matrices = SIZE(trace)
1170 cpassert(SIZE(matrix_a) == n_matrices)
1171 cpassert(SIZE(matrix_b) == n_matrices)
1172
1173 use_accurate_sum = .true.
1174 IF (PRESENT(accurate)) use_accurate_sum = accurate
1175
1176!$OMP PARALLEL DO DEFAULT(NONE), &
1177!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1178!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1179!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1180 DO imatrix = 1, n_matrices
1181 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1182
1183 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1184 use_sp_b = matrix_b(imatrix) %use_sp
1185
1186 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1187 IF (use_sp_a .AND. use_sp_b) THEN
1188 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1189 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1190 IF (use_accurate_sum) THEN
1191 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1192 ELSE
1193 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1194 END IF
1195 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1196 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1197 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1198 IF (use_accurate_sum) THEN
1199 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1200 ELSE
1201 trace(imatrix) = sum(ldata_a*ldata_b)
1202 END IF
1203 ELSE
1204 cpabort("Matrices A and B are of different types")
1205 END IF
1206 END DO
1207!$OMP END PARALLEL DO
1208
1209 group = matrix_a(1) %matrix%matrix_struct%para_env
1210 CALL group%sum(trace)
1211
1212 CALL timestop(handle)
1213 END SUBROUTINE cp_fm_trace_a1b1t1_pa
1214 SUBROUTINE cp_fm_trace_a1b1t1_pp (matrix_a, matrix_b, trace, accurate)
1215 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
1216 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_b
1217 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1218 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1219
1220 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pp'
1221
1222 INTEGER :: handle, imatrix, n_matrices, &
1223 ncols_local, nrows_local
1224 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1225 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1226 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1227 TYPE(mp_comm_type) :: group
1228
1229 CALL timeset(routinen, handle)
1230
1231 n_matrices = SIZE(trace)
1232 cpassert(SIZE(matrix_a) == n_matrices)
1233 cpassert(SIZE(matrix_b) == n_matrices)
1234
1235 use_accurate_sum = .true.
1236 IF (PRESENT(accurate)) use_accurate_sum = accurate
1237
1238!$OMP PARALLEL DO DEFAULT(NONE), &
1239!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1240!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1241!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1242 DO imatrix = 1, n_matrices
1243 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1244
1245 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1246 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1247
1248 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1249 IF (use_sp_a .AND. use_sp_b) THEN
1250 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1251 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1252 IF (use_accurate_sum) THEN
1253 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1254 ELSE
1255 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1256 END IF
1257 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1258 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1259 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1260 IF (use_accurate_sum) THEN
1261 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1262 ELSE
1263 trace(imatrix) = sum(ldata_a*ldata_b)
1264 END IF
1265 ELSE
1266 cpabort("Matrices A and B are of different types")
1267 END IF
1268 END DO
1269!$OMP END PARALLEL DO
1270
1271 group = matrix_a(1) %matrix%matrix_struct%para_env
1272 CALL group%sum(trace)
1273
1274 CALL timestop(handle)
1275 END SUBROUTINE cp_fm_trace_a1b1t1_pp
1276
1277! **************************************************************************************************
1278!> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
1279!> \param matrix_a list of A matrices
1280!> \param matrix_b list of B matrices
1281!> \param trace computed traces
1282!> \param accurate ...
1283! **************************************************************************************************
1284 SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa (matrix_a, matrix_b, trace, accurate)
1285 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1286 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1287 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1288 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1289
1290 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_aa'
1291
1292 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1293 nrows_local, nz
1294 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1295 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1296 REAL(kind=dp) :: t
1297 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1298 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1299 TYPE(mp_comm_type) :: group
1300
1301 CALL timeset(routinen, handle)
1302
1303 nz = SIZE(matrix_a, 1)
1304 cpassert(SIZE(matrix_b, 1) == nz)
1305
1306 na = SIZE(matrix_a, 2)
1307 nb = SIZE(matrix_b, 2)
1308 cpassert(SIZE(trace, 1) == na)
1309 cpassert(SIZE(trace, 2) == nb)
1310
1311 use_accurate_sum = .true.
1312 IF (PRESENT(accurate)) use_accurate_sum = accurate
1313
1314 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1315 ! improve load balance between shared-memory threads
1316 ntraces = na*nb
1317 na8 = int(na, kind=int_8)
1318
1319!$OMP PARALLEL DO DEFAULT(NONE), &
1320!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1321!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1322!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1323 DO itrace = 1, ntraces
1324 ib8 = (itrace - 1)/na8
1325 ia = int(itrace - ib8*na8)
1326 ib = int(ib8) + 1
1327
1328 t = 0.0_dp
1329 DO iz = 1, nz
1330 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1331 use_sp_a = matrix_a(iz, ia) %use_sp
1332 use_sp_b = matrix_b(iz, ib) %use_sp
1333
1334 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1335 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1336 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1337 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1338 IF (use_accurate_sum) THEN
1339 t = t + accurate_dot_product(ldata_a, ldata_b)
1340 ELSE
1341 t = t + sum(ldata_a*ldata_b)
1342 END IF
1343 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1344 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1345 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1346 IF (use_accurate_sum) THEN
1347 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1348 ELSE
1349 t = t + sum(ldata_a_sp*ldata_b_sp)
1350 END IF
1351 ELSE
1352 cpabort("Matrices A and B are of different types")
1353 END IF
1354 END DO
1355 trace(ia, ib) = t
1356 END DO
1357!$OMP END PARALLEL DO
1358
1359 group = matrix_a(1, 1) %matrix_struct%para_env
1360 CALL group%sum(trace)
1361
1362 CALL timestop(handle)
1363 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa
1364 SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap (matrix_a, matrix_b, trace, accurate)
1365 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1366 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1367 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1368 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1369
1370 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_ap'
1371
1372 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1373 nrows_local, nz
1374 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1375 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1376 REAL(kind=dp) :: t
1377 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1378 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1379 TYPE(mp_comm_type) :: group
1380
1381 CALL timeset(routinen, handle)
1382
1383 nz = SIZE(matrix_a, 1)
1384 cpassert(SIZE(matrix_b, 1) == nz)
1385
1386 na = SIZE(matrix_a, 2)
1387 nb = SIZE(matrix_b, 2)
1388 cpassert(SIZE(trace, 1) == na)
1389 cpassert(SIZE(trace, 2) == nb)
1390
1391 use_accurate_sum = .true.
1392 IF (PRESENT(accurate)) use_accurate_sum = accurate
1393
1394 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1395 ! improve load balance between shared-memory threads
1396 ntraces = na*nb
1397 na8 = int(na, kind=int_8)
1398
1399!$OMP PARALLEL DO DEFAULT(NONE), &
1400!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1401!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1402!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1403 DO itrace = 1, ntraces
1404 ib8 = (itrace - 1)/na8
1405 ia = int(itrace - ib8*na8)
1406 ib = int(ib8) + 1
1407
1408 t = 0.0_dp
1409 DO iz = 1, nz
1410 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1411 use_sp_a = matrix_a(iz, ia) %use_sp
1412 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1413
1414 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1415 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1416 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1417 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1418 IF (use_accurate_sum) THEN
1419 t = t + accurate_dot_product(ldata_a, ldata_b)
1420 ELSE
1421 t = t + sum(ldata_a*ldata_b)
1422 END IF
1423 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1424 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1425 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1426 IF (use_accurate_sum) THEN
1427 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1428 ELSE
1429 t = t + sum(ldata_a_sp*ldata_b_sp)
1430 END IF
1431 ELSE
1432 cpabort("Matrices A and B are of different types")
1433 END IF
1434 END DO
1435 trace(ia, ib) = t
1436 END DO
1437!$OMP END PARALLEL DO
1438
1439 group = matrix_a(1, 1) %matrix_struct%para_env
1440 CALL group%sum(trace)
1441
1442 CALL timestop(handle)
1443 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap
1444 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa (matrix_a, matrix_b, trace, accurate)
1445 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1446 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1447 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1448 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1449
1450 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pa'
1451
1452 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1453 nrows_local, nz
1454 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1455 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1456 REAL(kind=dp) :: t
1457 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1458 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1459 TYPE(mp_comm_type) :: group
1460
1461 CALL timeset(routinen, handle)
1462
1463 nz = SIZE(matrix_a, 1)
1464 cpassert(SIZE(matrix_b, 1) == nz)
1465
1466 na = SIZE(matrix_a, 2)
1467 nb = SIZE(matrix_b, 2)
1468 cpassert(SIZE(trace, 1) == na)
1469 cpassert(SIZE(trace, 2) == nb)
1470
1471 use_accurate_sum = .true.
1472 IF (PRESENT(accurate)) use_accurate_sum = accurate
1473
1474 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1475 ! improve load balance between shared-memory threads
1476 ntraces = na*nb
1477 na8 = int(na, kind=int_8)
1478
1479!$OMP PARALLEL DO DEFAULT(NONE), &
1480!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1481!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1482!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1483 DO itrace = 1, ntraces
1484 ib8 = (itrace - 1)/na8
1485 ia = int(itrace - ib8*na8)
1486 ib = int(ib8) + 1
1487
1488 t = 0.0_dp
1489 DO iz = 1, nz
1490 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1491 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1492 use_sp_b = matrix_b(iz, ib) %use_sp
1493
1494 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1495 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1496 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1497 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1498 IF (use_accurate_sum) THEN
1499 t = t + accurate_dot_product(ldata_a, ldata_b)
1500 ELSE
1501 t = t + sum(ldata_a*ldata_b)
1502 END IF
1503 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1504 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1505 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1506 IF (use_accurate_sum) THEN
1507 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1508 ELSE
1509 t = t + sum(ldata_a_sp*ldata_b_sp)
1510 END IF
1511 ELSE
1512 cpabort("Matrices A and B are of different types")
1513 END IF
1514 END DO
1515 trace(ia, ib) = t
1516 END DO
1517!$OMP END PARALLEL DO
1518
1519 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1520 CALL group%sum(trace)
1521
1522 CALL timestop(handle)
1523 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa
1524 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp (matrix_a, matrix_b, trace, accurate)
1525 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1526 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1527 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1528 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1529
1530 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pp'
1531
1532 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1533 nrows_local, nz
1534 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1535 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1536 REAL(kind=dp) :: t
1537 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1538 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1539 TYPE(mp_comm_type) :: group
1540
1541 CALL timeset(routinen, handle)
1542
1543 nz = SIZE(matrix_a, 1)
1544 cpassert(SIZE(matrix_b, 1) == nz)
1545
1546 na = SIZE(matrix_a, 2)
1547 nb = SIZE(matrix_b, 2)
1548 cpassert(SIZE(trace, 1) == na)
1549 cpassert(SIZE(trace, 2) == nb)
1550
1551 use_accurate_sum = .true.
1552 IF (PRESENT(accurate)) use_accurate_sum = accurate
1553
1554 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1555 ! improve load balance between shared-memory threads
1556 ntraces = na*nb
1557 na8 = int(na, kind=int_8)
1558
1559!$OMP PARALLEL DO DEFAULT(NONE), &
1560!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1561!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1562!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1563 DO itrace = 1, ntraces
1564 ib8 = (itrace - 1)/na8
1565 ia = int(itrace - ib8*na8)
1566 ib = int(ib8) + 1
1567
1568 t = 0.0_dp
1569 DO iz = 1, nz
1570 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1571 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1572 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1573
1574 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1575 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1576 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1577 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1578 IF (use_accurate_sum) THEN
1579 t = t + accurate_dot_product(ldata_a, ldata_b)
1580 ELSE
1581 t = t + sum(ldata_a*ldata_b)
1582 END IF
1583 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1584 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1585 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1586 IF (use_accurate_sum) THEN
1587 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1588 ELSE
1589 t = t + sum(ldata_a_sp*ldata_b_sp)
1590 END IF
1591 ELSE
1592 cpabort("Matrices A and B are of different types")
1593 END IF
1594 END DO
1595 trace(ia, ib) = t
1596 END DO
1597!$OMP END PARALLEL DO
1598
1599 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1600 CALL group%sum(trace)
1601
1602 CALL timestop(handle)
1603 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp
1604
1605! **************************************************************************************************
1606!> \brief multiplies in place by a triangular matrix:
1607!> matrix_b = alpha op(triangular_matrix) matrix_b
1608!> or (if side='R')
1609!> matrix_b = alpha matrix_b op(triangular_matrix)
1610!> op(triangular_matrix) is:
1611!> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1612!> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1613!> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1614!> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1615!> \param triangular_matrix the triangular matrix that multiplies the other
1616!> \param matrix_b the matrix that gets multiplied and stores the result
1617!> \param side on which side of matrix_b stays op(triangular_matrix)
1618!> (defaults to 'L')
1619!> \param transpose_tr if the triangular matrix should be transposed
1620!> (defaults to false)
1621!> \param invert_tr if the triangular matrix should be inverted
1622!> (defaults to false)
1623!> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1624!> lower ('L') triangle (defaults to 'U')
1625!> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1626!> be assumed to be 1 (defaults to false)
1627!> \param n_rows the number of rows of the result (defaults to
1628!> size(matrix_b,1))
1629!> \param n_cols the number of columns of the result (defaults to
1630!> size(matrix_b,2))
1631!> \param alpha ...
1632!> \par History
1633!> 08.2002 created [fawzi]
1634!> \author Fawzi Mohamed
1635!> \note
1636!> needs an mpi env
1637! **************************************************************************************************
1638 SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1639 transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1640 alpha)
1641 TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1642 CHARACTER, INTENT(IN), OPTIONAL :: side
1643 LOGICAL, INTENT(IN), OPTIONAL :: transpose_tr, invert_tr
1644 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1645 LOGICAL, INTENT(IN), OPTIONAL :: unit_diag_tr
1646 INTEGER, INTENT(IN), OPTIONAL :: n_rows, n_cols
1647 REAL(kind=dp), INTENT(IN), OPTIONAL :: alpha
1648
1649 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_triangular_multiply'
1650
1651 CHARACTER :: side_char, transa, unit_diag, uplo
1652 INTEGER :: handle, mdim, m, n
1653 LOGICAL :: invert
1654 REAL(kind=dp) :: al
1655
1656 CALL timeset(routinen, handle)
1657 side_char = 'L'
1658 unit_diag = 'N'
1659 uplo = 'U'
1660 transa = 'N'
1661 invert = .false.
1662 al = 1.0_dp
1663 CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1664 IF (PRESENT(side)) side_char = side
1665 mdim = merge(1, 2, 'L' == side_char)
1666 IF (PRESENT(invert_tr)) invert = invert_tr
1667 IF (PRESENT(uplo_tr)) uplo = uplo_tr
1668 IF (PRESENT(unit_diag_tr)) THEN
1669 IF (unit_diag_tr) THEN
1670 unit_diag = 'U'
1671 ELSE
1672 unit_diag = 'N'
1673 END IF
1674 END IF
1675 IF (PRESENT(transpose_tr)) THEN
1676 IF (transpose_tr) THEN
1677 transa = 'T'
1678 ELSE
1679 transa = 'N'
1680 END IF
1681 END IF
1682 IF (PRESENT(alpha)) al = alpha
1683 IF (PRESENT(n_rows)) m = n_rows
1684 IF (PRESENT(n_cols)) n = n_cols
1685
1686 IF (invert) THEN
1687
1688#if defined(__parallel)
1689 CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1690 triangular_matrix%local_data(1, 1), 1, 1, &
1691 triangular_matrix%matrix_struct%descriptor, &
1692 matrix_b%local_data(1, 1), 1, 1, &
1693 matrix_b%matrix_struct%descriptor(1))
1694#else
1695 CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1696 triangular_matrix%local_data(1, 1), &
1697 SIZE(triangular_matrix%local_data, mdim), &
1698 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1699#endif
1700
1701 ELSE
1702
1703#if defined(__parallel)
1704 CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1705 triangular_matrix%local_data(1, 1), 1, 1, &
1706 triangular_matrix%matrix_struct%descriptor, &
1707 matrix_b%local_data(1, 1), 1, 1, &
1708 matrix_b%matrix_struct%descriptor(1))
1709#else
1710 CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1711 triangular_matrix%local_data(1, 1), &
1712 SIZE(triangular_matrix%local_data, mdim), &
1713 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1714#endif
1715
1716 END IF
1717
1718 CALL timestop(handle)
1719 END SUBROUTINE cp_fm_triangular_multiply
1720
1721! **************************************************************************************************
1722!> \brief scales a matrix
1723!> matrix_a = alpha * matrix_b
1724!> \param alpha ...
1725!> \param matrix_a ...
1726!> \note
1727!> use cp_fm_set_all to zero (avoids problems with nan)
1728! **************************************************************************************************
1729 SUBROUTINE cp_fm_scale(alpha, matrix_a)
1730 REAL(kind=dp), INTENT(IN) :: alpha
1731 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1732
1733 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale'
1734
1735 INTEGER :: handle, size_a
1736 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1737
1738 CALL timeset(routinen, handle)
1739
1740 NULLIFY (a)
1741
1742 a => matrix_a%local_data
1743 size_a = SIZE(a, 1)*SIZE(a, 2)
1744
1745 CALL dscal(size_a, alpha, a, 1)
1746
1747 CALL timestop(handle)
1748
1749 END SUBROUTINE cp_fm_scale
1750
1751! **************************************************************************************************
1752!> \brief transposes a matrix
1753!> matrixt = matrix ^ T
1754!> \param matrix ...
1755!> \param matrixt ...
1756!> \note
1757!> all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
1758! **************************************************************************************************
1759 SUBROUTINE cp_fm_transpose(matrix, matrixt)
1760 TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1761
1762 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_transpose'
1763
1764 INTEGER :: handle, ncol_global, &
1765 nrow_global, ncol_globalt, nrow_globalt
1766 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
1767#if defined(__parallel)
1768 INTEGER, DIMENSION(9) :: desca, descc
1769#elif !defined(__MKL)
1770 INTEGER :: i, j
1771#endif
1772
1773 nrow_global = matrix%matrix_struct%nrow_global
1774 ncol_global = matrix%matrix_struct%ncol_global
1775 nrow_globalt = matrixt%matrix_struct%nrow_global
1776 ncol_globalt = matrixt%matrix_struct%ncol_global
1777 cpassert(nrow_global == ncol_globalt)
1778 cpassert(nrow_globalt == ncol_global)
1779
1780 CALL timeset(routinen, handle)
1781
1782 a => matrix%local_data
1783 c => matrixt%local_data
1784
1785#if defined(__parallel)
1786 desca(:) = matrix%matrix_struct%descriptor(:)
1787 descc(:) = matrixt%matrix_struct%descriptor(:)
1788 CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1789#elif defined(__MKL)
1790 CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
1791#else
1792 DO j = 1, ncol_global
1793 DO i = 1, nrow_global
1794 c(j, i) = a(i, j)
1795 END DO
1796 END DO
1797#endif
1798 CALL timestop(handle)
1799
1800 END SUBROUTINE cp_fm_transpose
1801
1802! **************************************************************************************************
1803!> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
1804!> \param matrix the triangular matrix as input, the full matrix as output
1805!> \param work a matrix of the same size as matrix
1806!> \param uplo triangular format; defaults to 'U'
1807!> \author Matthias Krack
1808!> \note
1809!> the opposite triangular part is irrelevant
1810! **************************************************************************************************
1811 SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
1812
1813 TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1814 CHARACTER, INTENT(IN), OPTIONAL :: uplo
1815
1816 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_uplo_to_full'
1817
1818 CHARACTER :: myuplo
1819 INTEGER :: handle, icol_global, irow_global, &
1820 mypcol, myprow, ncol_global, &
1821 nrow_global
1822 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1823 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1824 TYPE(cp_blacs_env_type), POINTER :: context
1825
1826#if defined(__parallel)
1827 INTEGER :: icol_local, irow_local, &
1828 ncol_block, ncol_local, &
1829 nrow_block, nrow_local
1830 INTEGER, DIMENSION(9) :: desca, descc
1831 REAL(kind=dp), DIMENSION(:, :), POINTER :: c
1832 REAL(kind=sp), DIMENSION(:, :), POINTER :: c_sp
1833#endif
1834
1835 myuplo = 'U'
1836 IF (PRESENT(uplo)) myuplo = uplo
1837
1838 nrow_global = matrix%matrix_struct%nrow_global
1839 ncol_global = matrix%matrix_struct%ncol_global
1840 cpassert(nrow_global == ncol_global)
1841 nrow_global = work%matrix_struct%nrow_global
1842 ncol_global = work%matrix_struct%ncol_global
1843 cpassert(nrow_global == ncol_global)
1844 cpassert(matrix%use_sp .EQV. work%use_sp)
1845
1846 CALL timeset(routinen, handle)
1847
1848 context => matrix%matrix_struct%context
1849 myprow = context%mepos(1)
1850 mypcol = context%mepos(2)
1851
1852#if defined(__parallel)
1853
1854 nrow_block = matrix%matrix_struct%nrow_block
1855 ncol_block = matrix%matrix_struct%ncol_block
1856
1857 nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1858 ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1859
1860 a => work%local_data
1861 a_sp => work%local_data_sp
1862 desca(:) = work%matrix_struct%descriptor(:)
1863 c => matrix%local_data
1864 c_sp => matrix%local_data_sp
1865 descc(:) = matrix%matrix_struct%descriptor(:)
1866
1867 DO icol_local = 1, ncol_local
1868 icol_global = matrix%matrix_struct%col_indices(icol_local)
1869 DO irow_local = 1, nrow_local
1870 irow_global = matrix%matrix_struct%row_indices(irow_local)
1871 IF (merge(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
1872 IF (matrix%use_sp) THEN
1873 c_sp(irow_local, icol_local) = 0.0_sp
1874 ELSE
1875 c(irow_local, icol_local) = 0.0_dp
1876 END IF
1877 ELSE IF (irow_global == icol_global) THEN
1878 IF (matrix%use_sp) THEN
1879 c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1880 ELSE
1881 c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1882 END IF
1883 END IF
1884 END DO
1885 END DO
1886
1887 DO icol_local = 1, ncol_local
1888 DO irow_local = 1, nrow_local
1889 IF (matrix%use_sp) THEN
1890 a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1891 ELSE
1892 a(irow_local, icol_local) = c(irow_local, icol_local)
1893 END IF
1894 END DO
1895 END DO
1896
1897 IF (matrix%use_sp) THEN
1898 CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1899 ELSE
1900 CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1901 END IF
1902
1903#else
1904
1905 a => matrix%local_data
1906 a_sp => matrix%local_data_sp
1907
1908 IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1909 DO irow_global = 1, nrow_global
1910 DO icol_global = irow_global + 1, ncol_global
1911 IF (matrix%use_sp) THEN
1912 a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1913 ELSE
1914 a(icol_global, irow_global) = a(irow_global, icol_global)
1915 END IF
1916 END DO
1917 END DO
1918 ELSE
1919 DO icol_global = 1, ncol_global
1920 DO irow_global = icol_global + 1, nrow_global
1921 IF (matrix%use_sp) THEN
1922 a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
1923 ELSE
1924 a(irow_global, icol_global) = a(icol_global, irow_global)
1925 END IF
1926 END DO
1927 END DO
1928 END IF
1929
1930#endif
1931 CALL timestop(handle)
1932
1933 END SUBROUTINE cp_fm_uplo_to_full
1934
1935! **************************************************************************************************
1936!> \brief scales column i of matrix a with scaling(i)
1937!> \param matrixa ...
1938!> \param scaling : an array used for scaling the columns,
1939!> SIZE(scaling) determines the number of columns to be scaled
1940!> \author Joost VandeVondele
1941!> \note
1942!> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1943!> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1944!> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1945!> where every vector has a different prefactor
1946! **************************************************************************************************
1947 SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1948 TYPE(cp_fm_type), INTENT(IN) :: matrixa
1949 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: scaling
1950
1951 INTEGER :: k, mypcol, myprow, n, ncol_global, &
1952 npcol, nprow
1953 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1954 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1955#if defined(__parallel)
1956 INTEGER :: icol_global, icol_local, &
1957 ipcol, iprow, irow_local
1958#else
1959 INTEGER :: i
1960#endif
1961
1962 myprow = matrixa%matrix_struct%context%mepos(1)
1963 mypcol = matrixa%matrix_struct%context%mepos(2)
1964 nprow = matrixa%matrix_struct%context%num_pe(1)
1965 npcol = matrixa%matrix_struct%context%num_pe(2)
1966
1967 ncol_global = matrixa%matrix_struct%ncol_global
1968
1969 a => matrixa%local_data
1970 a_sp => matrixa%local_data_sp
1971 IF (matrixa%use_sp) THEN
1972 n = SIZE(a_sp, 1)
1973 ELSE
1974 n = SIZE(a, 1)
1975 END IF
1976 k = min(SIZE(scaling), ncol_global)
1977
1978#if defined(__parallel)
1979
1980 DO icol_global = 1, k
1981 CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1982 nprow, npcol, myprow, mypcol, &
1983 irow_local, icol_local, iprow, ipcol)
1984 IF ((ipcol == mypcol)) THEN
1985 IF (matrixa%use_sp) THEN
1986 CALL sscal(n, real(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1987 ELSE
1988 CALL dscal(n, scaling(icol_global), a(:, icol_local), 1)
1989 END IF
1990 END IF
1991 END DO
1992#else
1993 DO i = 1, k
1994 IF (matrixa%use_sp) THEN
1995 CALL sscal(n, real(scaling(i), sp), a_sp(:, i), 1)
1996 ELSE
1997 CALL dscal(n, scaling(i), a(:, i), 1)
1998 END IF
1999 END DO
2000#endif
2001 END SUBROUTINE cp_fm_column_scale
2002
2003! **************************************************************************************************
2004!> \brief scales row i of matrix a with scaling(i)
2005!> \param matrixa ...
2006!> \param scaling : an array used for scaling the columns,
2007!> \author JGH
2008!> \note
2009! **************************************************************************************************
2010 SUBROUTINE cp_fm_row_scale(matrixa, scaling)
2011 TYPE(cp_fm_type), INTENT(IN) :: matrixa
2012 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: scaling
2013
2014 INTEGER :: n, m, nrow_global, nrow_local, ncol_local
2015 INTEGER, DIMENSION(:), POINTER :: row_indices
2016 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2017 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
2018#if defined(__parallel)
2019 INTEGER :: irow_global, icol, irow
2020#else
2021 INTEGER :: j
2022#endif
2023
2024 CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
2025 nrow_local=nrow_local, ncol_local=ncol_local)
2026 cpassert(SIZE(scaling) == nrow_global)
2027
2028 a => matrixa%local_data
2029 a_sp => matrixa%local_data_sp
2030 IF (matrixa%use_sp) THEN
2031 n = SIZE(a_sp, 1)
2032 m = SIZE(a_sp, 2)
2033 ELSE
2034 n = SIZE(a, 1)
2035 m = SIZE(a, 2)
2036 END IF
2037
2038#if defined(__parallel)
2039 DO icol = 1, ncol_local
2040 IF (matrixa%use_sp) THEN
2041 DO irow = 1, nrow_local
2042 irow_global = row_indices(irow)
2043 a(irow, icol) = real(scaling(irow_global), dp)*a(irow, icol)
2044 END DO
2045 ELSE
2046 DO irow = 1, nrow_local
2047 irow_global = row_indices(irow)
2048 a(irow, icol) = scaling(irow_global)*a(irow, icol)
2049 END DO
2050 END IF
2051 END DO
2052#else
2053 IF (matrixa%use_sp) THEN
2054 DO j = 1, m
2055 a_sp(1:n, j) = real(scaling(1:n), sp)*a_sp(1:n, j)
2056 END DO
2057 ELSE
2058 DO j = 1, m
2059 a(1:n, j) = scaling(1:n)*a(1:n, j)
2060 END DO
2061 END IF
2062#endif
2063 END SUBROUTINE cp_fm_row_scale
2064
2065! **************************************************************************************************
2066!> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
2067!> \param matrix_a the matrix to invert
2068!> \param matrix_inverse the inverse of matrix_a
2069!> \param det_a the determinant of matrix_a
2070!> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
2071!> are screened
2072!> \param eigval optionally return matrix eigenvalues/singular values
2073!> \par History
2074!> note of Jan Wilhelm (12.2015)
2075!> - computation of determinant corrected
2076!> - determinant only computed if det_a is present
2077!> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
2078!> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
2079!> \author Florian Schiffmann(02.2007)
2080! **************************************************************************************************
2081 SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
2082
2083 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
2084 REAL(kind=dp), INTENT(OUT), OPTIONAL :: det_a
2085 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_svd
2086 REAL(kind=dp), DIMENSION(:), POINTER, &
2087 INTENT(INOUT), OPTIONAL :: eigval
2088
2089 INTEGER :: n
2090 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2091 REAL(kind=dp) :: determinant, my_eps_svd
2092 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2093 TYPE(cp_fm_type) :: matrix_lu
2094
2095#if defined(__parallel)
2096 TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
2097 TYPE(mp_comm_type) :: group
2098 INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
2099 INTEGER, DIMENSION(9) :: desca
2100 LOGICAL :: quenched
2101 REAL(kind=dp) :: alpha, beta
2102 REAL(kind=dp), DIMENSION(:), POINTER :: diag
2103 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: work
2104#else
2105 LOGICAL :: sign
2106 REAL(kind=dp) :: eps1
2107#endif
2108
2109 my_eps_svd = 0.0_dp
2110 IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
2111
2112 CALL cp_fm_create(matrix=matrix_lu, &
2113 matrix_struct=matrix_a%matrix_struct, &
2114 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
2115 CALL cp_fm_to_fm(matrix_a, matrix_lu)
2116
2117 a => matrix_lu%local_data
2118 n = matrix_lu%matrix_struct%nrow_global
2119 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2120 ipivot(:) = 0
2121#if defined(__parallel)
2122 IF (my_eps_svd == 0.0_dp) THEN
2123 ! Use LU decomposition
2124 lwork = 3*n
2125 liwork = 3*n
2126 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2127 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2128
2129 IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
2130
2131 ALLOCATE (diag(n))
2132 diag(:) = 0.0_dp
2133 CALL cp_fm_get_diag(matrix_lu, diag)
2134
2135 exponent_of_minus_one = 0
2136 determinant = 1.0_dp
2137 DO i = 1, n
2138 determinant = determinant*diag(i)
2139 IF (ipivot(i) /= i) THEN
2140 exponent_of_minus_one = exponent_of_minus_one + 1
2141 END IF
2142 END DO
2143 IF (PRESENT(eigval)) THEN
2144 cpassert(.NOT. ASSOCIATED(eigval))
2145 ALLOCATE (eigval(n))
2146 eigval(:) = diag
2147 END IF
2148 DEALLOCATE (diag)
2149
2150 group = matrix_lu%matrix_struct%para_env
2151 CALL group%sum(exponent_of_minus_one)
2152
2153 determinant = determinant*(-1.0_dp)**exponent_of_minus_one
2154
2155 END IF
2156
2157 alpha = 0.0_dp
2158 beta = 1.0_dp
2159 CALL cp_fm_set_all(matrix_inverse, alpha, beta)
2160 CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
2161 ELSE
2162 ! Use singular value decomposition
2163 CALL cp_fm_create(matrix=u, &
2164 matrix_struct=matrix_a%matrix_struct, &
2165 name="LEFT_SINGULAR_MATRIX")
2166 CALL cp_fm_set_all(u, alpha=0.0_dp)
2167 CALL cp_fm_create(matrix=vt, &
2168 matrix_struct=matrix_a%matrix_struct, &
2169 name="RIGHT_SINGULAR_MATRIX")
2170 CALL cp_fm_set_all(vt, alpha=0.0_dp)
2171 ALLOCATE (diag(n))
2172 diag(:) = 0.0_dp
2173 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2174 ALLOCATE (work(1))
2175 ! Workspace query
2176 lwork = -1
2177 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2178 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2179 lwork = int(work(1))
2180 DEALLOCATE (work)
2181 ALLOCATE (work(lwork))
2182 ! SVD
2183 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2184 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2185 ! info == n+1 implies homogeneity error when the number of procs is large
2186 ! this likely isnt a problem, but maybe we should handle it separately
2187 IF (info /= 0 .AND. info /= n + 1) &
2188 cpabort("Singular value decomposition of matrix failed.")
2189 ! (Pseudo)inverse and (pseudo)determinant
2190 CALL cp_fm_create(matrix=sigma, &
2191 matrix_struct=matrix_a%matrix_struct, &
2192 name="SINGULAR_VALUE_MATRIX")
2193 CALL cp_fm_set_all(sigma, alpha=0.0_dp)
2194 determinant = 1.0_dp
2195 quenched = .false.
2196 IF (PRESENT(eigval)) THEN
2197 cpassert(.NOT. ASSOCIATED(eigval))
2198 ALLOCATE (eigval(n))
2199 eigval(:) = diag
2200 END IF
2201 DO i = 1, n
2202 IF (diag(i) < my_eps_svd) THEN
2203 diag(i) = 0.0_dp
2204 quenched = .true.
2205 ELSE
2206 determinant = determinant*diag(i)
2207 diag(i) = 1.0_dp/diag(i)
2208 END IF
2209 CALL cp_fm_set_element(sigma, i, i, diag(i))
2210 END DO
2211 DEALLOCATE (diag)
2212 IF (quenched) &
2213 CALL cp_warn(__location__, &
2214 "Linear dependencies were detected in the SVD inversion of matrix "//trim(adjustl(matrix_a%name))// &
2215 ". At least one singular value has been quenched.")
2216 ! Sigma^-1 * U^T
2217 CALL cp_fm_create(matrix=inv_sigma_ut, &
2218 matrix_struct=matrix_a%matrix_struct, &
2219 name="SINGULAR_VALUE_MATRIX")
2220 CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
2221 CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
2222 u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
2223 ! A^-1 = V * (Sigma^-1 * U^T)
2224 CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
2225 CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
2226 inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
2227 ! Clean up
2228 DEALLOCATE (work)
2229 CALL cp_fm_release(u)
2230 CALL cp_fm_release(vt)
2231 CALL cp_fm_release(sigma)
2232 CALL cp_fm_release(inv_sigma_ut)
2233 END IF
2234#else
2235 IF (my_eps_svd == 0.0_dp) THEN
2236 sign = .true.
2237 CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
2238 eval_error=eps1)
2239 CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
2240 IF (PRESENT(eigval)) &
2241 CALL cp_abort(__location__, &
2242 "NYI. Eigenvalues not available for return without SCALAPACK.")
2243 ELSE
2244 CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
2245 determinant, eigval)
2246 END IF
2247#endif
2248 CALL cp_fm_release(matrix_lu)
2249 DEALLOCATE (ipivot)
2250 IF (PRESENT(det_a)) det_a = determinant
2251 END SUBROUTINE cp_fm_invert
2252
2253! **************************************************************************************************
2254!> \brief inverts a triangular matrix
2255!> \param matrix_a ...
2256!> \param uplo_tr triangular format; defaults to 'U'
2257!> \author MI
2258! **************************************************************************************************
2259 SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
2260
2261 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2262 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
2263
2264 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_triangular_invert'
2265
2266 CHARACTER :: unit_diag, uplo
2267 INTEGER :: handle, info, ncol_global
2268 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2269#if defined(__parallel)
2270 INTEGER, DIMENSION(9) :: desca
2271#endif
2272
2273 CALL timeset(routinen, handle)
2274
2275 unit_diag = 'N'
2276 uplo = 'U'
2277 IF (PRESENT(uplo_tr)) uplo = uplo_tr
2278
2279 ncol_global = matrix_a%matrix_struct%ncol_global
2280
2281 a => matrix_a%local_data
2282
2283#if defined(__parallel)
2284 desca(:) = matrix_a%matrix_struct%descriptor(:)
2285
2286 CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
2287
2288#else
2289 CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
2290#endif
2291
2292 CALL timestop(handle)
2293 END SUBROUTINE cp_fm_triangular_invert
2294
2295! **************************************************************************************************
2296!> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
2297!> the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
2298!> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
2299!> \param matrix_a ...
2300!> \param matrix_r ...
2301!> \param nrow_fact ...
2302!> \param ncol_fact ...
2303!> \param first_row ...
2304!> \param first_col ...
2305!> \author MI
2306! **************************************************************************************************
2307 SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
2308 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
2309 INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
2310 first_row, first_col
2311 CHARACTER, INTENT(IN), OPTIONAL :: uplo
2312
2313 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_qr_factorization'
2314
2315 CHARACTER :: myuplo
2316 INTEGER :: handle, i, icol, info, irow, &
2317 j, lda, lwork, ncol, &
2318 ndim, nrow
2319 REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
2320 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
2321 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2322#if defined(__parallel)
2323 INTEGER, DIMENSION(9) :: desca
2324#endif
2325
2326 CALL timeset(routinen, handle)
2327
2328 myuplo = 'U'
2329 IF (PRESENT(uplo)) myuplo = uplo
2330
2331 ncol = matrix_a%matrix_struct%ncol_global
2332 nrow = matrix_a%matrix_struct%nrow_global
2333 lda = nrow
2334
2335 a => matrix_a%local_data
2336
2337 IF (PRESENT(nrow_fact)) nrow = nrow_fact
2338 IF (PRESENT(ncol_fact)) ncol = ncol_fact
2339 irow = 1
2340 IF (PRESENT(first_row)) irow = first_row
2341 icol = 1
2342 IF (PRESENT(first_col)) icol = first_col
2343
2344 cpassert(nrow >= ncol)
2345 ndim = SIZE(a, 2)
2346 ALLOCATE (tau(ndim))
2347
2348#if defined(__parallel)
2349
2350 desca(:) = matrix_a%matrix_struct%descriptor(:)
2351
2352 lwork = -1
2353 ALLOCATE (work(2*ndim))
2354 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2355 lwork = int(work(1))
2356 DEALLOCATE (work)
2357 ALLOCATE (work(lwork))
2358 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2359
2360#else
2361 lwork = -1
2362 ALLOCATE (work(2*ndim))
2363 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2364 lwork = int(work(1))
2365 DEALLOCATE (work)
2366 ALLOCATE (work(lwork))
2367 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2368
2369#endif
2370
2371 ALLOCATE (r_mat(ncol, ncol))
2372 CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
2373 IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
2374 DO i = 1, ncol
2375 DO j = i + 1, ncol
2376 r_mat(j, i) = 0.0_dp
2377 END DO
2378 END DO
2379 ELSE
2380 DO j = 1, ncol
2381 DO i = j + 1, ncol
2382 r_mat(i, j) = 0.0_dp
2383 END DO
2384 END DO
2385 END IF
2386 CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
2387
2388 DEALLOCATE (tau, work, r_mat)
2389
2390 CALL timestop(handle)
2391
2392 END SUBROUTINE cp_fm_qr_factorization
2393
2394! **************************************************************************************************
2395!> \brief computes the the solution to A*b=A_general using lu decomposition
2396!> \param matrix_a input matrix; will be overwritten
2397!> \param general_a contains the result
2398!> \author Florian Schiffmann
2399! **************************************************************************************************
2400 SUBROUTINE cp_fm_solve(matrix_a, general_a)
2401 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
2402
2403 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_solve'
2404
2405 INTEGER :: handle, info, n, nrhs
2406 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2407 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, a_general
2408#if defined(__parallel)
2409 INTEGER, DIMENSION(9) :: desca, descb
2410#else
2411 INTEGER :: lda, ldb
2412#endif
2413
2414 CALL timeset(routinen, handle)
2415
2416 a => matrix_a%local_data
2417 a_general => general_a%local_data
2418 n = matrix_a%matrix_struct%nrow_global
2419 nrhs = general_a%matrix_struct%ncol_global
2420 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2421
2422#if defined(__parallel)
2423 desca(:) = matrix_a%matrix_struct%descriptor(:)
2424 descb(:) = general_a%matrix_struct%descriptor(:)
2425 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2426 CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
2427 1, 1, descb, info)
2428
2429#else
2430 lda = SIZE(a, 1)
2431 ldb = SIZE(a_general, 1)
2432 CALL dgetrf(n, n, a, lda, ipivot, info)
2433 CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
2434
2435#endif
2436 ! info is allowed to be zero
2437 ! this does just signal a zero diagonal element
2438 DEALLOCATE (ipivot)
2439 CALL timestop(handle)
2440 END SUBROUTINE
2441
2442! **************************************************************************************************
2443!> \brief Convenience function. Computes the matrix multiplications needed
2444!> for the multiplication of complex matrices.
2445!> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
2446!> \param transa : 'N' -> normal 'T' -> transpose
2447!> alpha,beta :: can be 0.0_dp and 1.0_dp
2448!> \param transb ...
2449!> \param m ...
2450!> \param n ...
2451!> \param k ...
2452!> \param alpha ...
2453!> \param A_re m x k matrix ( ! for transa = 'N'), real part
2454!> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
2455!> \param B_re k x n matrix ( ! for transa = 'N'), real part
2456!> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
2457!> \param beta ...
2458!> \param C_re m x n matrix, real part
2459!> \param C_im m x n matrix, imaginary part
2460!> \param a_first_col ...
2461!> \param a_first_row ...
2462!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
2463!> \param b_first_row ...
2464!> \param c_first_col ...
2465!> \param c_first_row ...
2466!> \author Samuel Andermatt
2467!> \note
2468!> C should have no overlap with A, B
2469! **************************************************************************************************
2470 SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
2471 C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
2472 c_first_row)
2473 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
2474 INTEGER, INTENT(IN) :: m, n, k
2475 REAL(kind=dp), INTENT(IN) :: alpha
2476 TYPE(cp_fm_type), INTENT(IN) :: a_re, a_im, b_re, b_im
2477 REAL(kind=dp), INTENT(IN) :: beta
2478 TYPE(cp_fm_type), INTENT(IN) :: c_re, c_im
2479 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
2480 b_first_row, c_first_col, c_first_row
2481
2482 CHARACTER(len=*), PARAMETER :: routinen = 'cp_complex_fm_gemm'
2483
2484 INTEGER :: handle
2485
2486 CALL timeset(routinen, handle)
2487
2488 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_re, beta, c_re, &
2489 a_first_col=a_first_col, &
2490 a_first_row=a_first_row, &
2491 b_first_col=b_first_col, &
2492 b_first_row=b_first_row, &
2493 c_first_col=c_first_col, &
2494 c_first_row=c_first_row)
2495 CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, a_im, b_im, 1.0_dp, c_re, &
2496 a_first_col=a_first_col, &
2497 a_first_row=a_first_row, &
2498 b_first_col=b_first_col, &
2499 b_first_row=b_first_row, &
2500 c_first_col=c_first_col, &
2501 c_first_row=c_first_row)
2502 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_im, beta, c_im, &
2503 a_first_col=a_first_col, &
2504 a_first_row=a_first_row, &
2505 b_first_col=b_first_col, &
2506 b_first_row=b_first_row, &
2507 c_first_col=c_first_col, &
2508 c_first_row=c_first_row)
2509 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_im, b_re, 1.0_dp, c_im, &
2510 a_first_col=a_first_col, &
2511 a_first_row=a_first_row, &
2512 b_first_col=b_first_col, &
2513 b_first_row=b_first_row, &
2514 c_first_col=c_first_col, &
2515 c_first_row=c_first_row)
2516
2517 CALL timestop(handle)
2518
2519 END SUBROUTINE cp_complex_fm_gemm
2520
2521! **************************************************************************************************
2522!> \brief inverts a matrix using LU decomposition
2523!> the input matrix will be overwritten
2524!> \param matrix : input a general square non-singular matrix, outputs its inverse
2525!> \param info_out : optional, if present outputs the info from (p)zgetri
2526!> \author Lianheng Tong
2527! **************************************************************************************************
2528 SUBROUTINE cp_fm_lu_invert(matrix, info_out)
2529 TYPE(cp_fm_type), INTENT(IN) :: matrix
2530 INTEGER, INTENT(OUT), OPTIONAL :: info_out
2531
2532 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_invert'
2533
2534 INTEGER :: nrows_global, handle, info, lwork
2535 INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
2536 REAL(kind=dp), DIMENSION(:, :), POINTER :: mat
2537 REAL(kind=sp), DIMENSION(:, :), POINTER :: mat_sp
2538 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2539 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2540#if defined(__parallel)
2541 INTEGER :: liwork
2542 INTEGER, DIMENSION(9) :: desca
2543 INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
2544#else
2545 INTEGER :: lda
2546#endif
2547
2548 CALL timeset(routinen, handle)
2549
2550 mat => matrix%local_data
2551 mat_sp => matrix%local_data_sp
2552 nrows_global = matrix%matrix_struct%nrow_global
2553 cpassert(nrows_global == matrix%matrix_struct%ncol_global)
2554 ALLOCATE (ipivot(nrows_global))
2555 ! do LU decomposition
2556#if defined(__parallel)
2557 desca = matrix%matrix_struct%descriptor
2558 IF (matrix%use_sp) THEN
2559 CALL psgetrf(nrows_global, nrows_global, &
2560 mat_sp, 1, 1, desca, ipivot, info)
2561 ELSE
2562 CALL pdgetrf(nrows_global, nrows_global, &
2563 mat, 1, 1, desca, ipivot, info)
2564 END IF
2565#else
2566 lda = SIZE(mat, 1)
2567 IF (matrix%use_sp) THEN
2568 CALL sgetrf(nrows_global, nrows_global, &
2569 mat_sp, lda, ipivot, info)
2570 ELSE
2571 CALL dgetrf(nrows_global, nrows_global, &
2572 mat, lda, ipivot, info)
2573 END IF
2574#endif
2575 IF (info /= 0) THEN
2576 CALL cp_abort(__location__, "LU decomposition has failed")
2577 END IF
2578 ! do inversion
2579 IF (matrix%use_sp) THEN
2580 ALLOCATE (work(1))
2581 ELSE
2582 ALLOCATE (work_sp(1))
2583 END IF
2584#if defined(__parallel)
2585 ALLOCATE (iwork(1))
2586 IF (matrix%use_sp) THEN
2587 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2588 ipivot, work_sp, -1, iwork, -1, info)
2589 lwork = int(work_sp(1))
2590 DEALLOCATE (work_sp)
2591 ALLOCATE (work_sp(lwork))
2592 ELSE
2593 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2594 ipivot, work, -1, iwork, -1, info)
2595 lwork = int(work(1))
2596 DEALLOCATE (work)
2597 ALLOCATE (work(lwork))
2598 END IF
2599 liwork = int(iwork(1))
2600 DEALLOCATE (iwork)
2601 ALLOCATE (iwork(liwork))
2602 IF (matrix%use_sp) THEN
2603 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2604 ipivot, work_sp, lwork, iwork, liwork, info)
2605 ELSE
2606 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2607 ipivot, work, lwork, iwork, liwork, info)
2608 END IF
2609 DEALLOCATE (iwork)
2610#else
2611 IF (matrix%use_sp) THEN
2612 CALL sgetri(nrows_global, mat_sp, lda, &
2613 ipivot, work_sp, -1, info)
2614 lwork = int(work_sp(1))
2615 DEALLOCATE (work_sp)
2616 ALLOCATE (work_sp(lwork))
2617 CALL sgetri(nrows_global, mat_sp, lda, &
2618 ipivot, work_sp, lwork, info)
2619 ELSE
2620 CALL dgetri(nrows_global, mat, lda, &
2621 ipivot, work, -1, info)
2622 lwork = int(work(1))
2623 DEALLOCATE (work)
2624 ALLOCATE (work(lwork))
2625 CALL dgetri(nrows_global, mat, lda, &
2626 ipivot, work, lwork, info)
2627 END IF
2628#endif
2629 IF (matrix%use_sp) THEN
2630 DEALLOCATE (work_sp)
2631 ELSE
2632 DEALLOCATE (work)
2633 END IF
2634 DEALLOCATE (ipivot)
2635
2636 IF (PRESENT(info_out)) THEN
2637 info_out = info
2638 ELSE
2639 IF (info /= 0) &
2640 CALL cp_abort(__location__, "LU inversion has failed")
2641 END IF
2642
2643 CALL timestop(handle)
2644
2645 END SUBROUTINE cp_fm_lu_invert
2646
2647! **************************************************************************************************
2648!> \brief norm of matrix using (p)dlange
2649!> \param matrix : input a general matrix
2650!> \param mode : 'M' max abs element value,
2651!> '1' or 'O' one norm, i.e. maximum column sum
2652!> 'I' infinity norm, i.e. maximum row sum
2653!> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2654!> \return : the norm according to mode
2655!> \author Lianheng Tong
2656! **************************************************************************************************
2657 FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2658 TYPE(cp_fm_type), INTENT(IN) :: matrix
2659 CHARACTER, INTENT(IN) :: mode
2660 REAL(kind=dp) :: res
2661
2662 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_norm'
2663
2664 INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2665 REAL(kind=sp) :: res_sp
2666 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2667 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2668 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2669 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2670#if defined(__parallel)
2671 INTEGER, DIMENSION(9) :: desca
2672#else
2673 INTEGER :: lda
2674#endif
2675
2676 CALL timeset(routinen, handle)
2677
2678 CALL cp_fm_get_info(matrix=matrix, &
2679 nrow_global=nrows, &
2680 ncol_global=ncols, &
2681 nrow_local=nrows_local, &
2682 ncol_local=ncols_local)
2683 aa => matrix%local_data
2684 aa_sp => matrix%local_data_sp
2685
2686#if defined(__parallel)
2687 desca = matrix%matrix_struct%descriptor
2688 SELECT CASE (mode)
2689 CASE ('M', 'm')
2690 lwork = 1
2691 CASE ('1', 'O', 'o')
2692 lwork = ncols_local
2693 CASE ('I', 'i')
2694 lwork = nrows_local
2695 CASE ('F', 'f', 'E', 'e')
2696 lwork = 1
2697 CASE DEFAULT
2698 cpabort("mode input is not valid")
2699 END SELECT
2700 IF (matrix%use_sp) THEN
2701 ALLOCATE (work_sp(lwork))
2702 res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2703 DEALLOCATE (work_sp)
2704 res = real(res_sp, kind=dp)
2705 ELSE
2706 ALLOCATE (work(lwork))
2707 res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2708 DEALLOCATE (work)
2709 END IF
2710#else
2711 SELECT CASE (mode)
2712 CASE ('M', 'm')
2713 lwork = 1
2714 CASE ('1', 'O', 'o')
2715 lwork = 1
2716 CASE ('I', 'i')
2717 lwork = nrows
2718 CASE ('F', 'f', 'E', 'e')
2719 lwork = 1
2720 CASE DEFAULT
2721 cpabort("mode input is not valid")
2722 END SELECT
2723 IF (matrix%use_sp) THEN
2724 ALLOCATE (work_sp(lwork))
2725 lda = SIZE(aa_sp, 1)
2726 res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2727 DEALLOCATE (work_sp)
2728 res = real(res_sp, kind=dp)
2729 ELSE
2730 ALLOCATE (work(lwork))
2731 lda = SIZE(aa, 1)
2732 res = dlange(mode, nrows, ncols, aa, lda, work)
2733 DEALLOCATE (work)
2734 END IF
2735#endif
2736
2737 CALL timestop(handle)
2738
2739 END FUNCTION cp_fm_norm
2740
2741! **************************************************************************************************
2742!> \brief trace of a matrix using pdlatra
2743!> \param matrix : input a square matrix
2744!> \return : the trace
2745!> \author Lianheng Tong
2746! **************************************************************************************************
2747 FUNCTION cp_fm_latra(matrix) RESULT(res)
2748 TYPE(cp_fm_type), INTENT(IN) :: matrix
2749 REAL(kind=dp) :: res
2750
2751 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_latra'
2752
2753 INTEGER :: nrows, ncols, handle
2754 REAL(kind=sp) :: res_sp
2755 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2756 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2757#if defined(__parallel)
2758 INTEGER, DIMENSION(9) :: desca
2759#else
2760 INTEGER :: ii
2761#endif
2762
2763 CALL timeset(routinen, handle)
2764
2765 nrows = matrix%matrix_struct%nrow_global
2766 ncols = matrix%matrix_struct%ncol_global
2767 cpassert(nrows == ncols)
2768 aa => matrix%local_data
2769 aa_sp => matrix%local_data_sp
2770
2771#if defined(__parallel)
2772 desca = matrix%matrix_struct%descriptor
2773 IF (matrix%use_sp) THEN
2774 res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2775 res = real(res_sp, kind=dp)
2776 ELSE
2777 res = pdlatra(nrows, aa, 1, 1, desca)
2778 END IF
2779#else
2780 IF (matrix%use_sp) THEN
2781 res_sp = 0.0_sp
2782 DO ii = 1, nrows
2783 res_sp = res_sp + aa_sp(ii, ii)
2784 END DO
2785 res = real(res_sp, kind=dp)
2786 ELSE
2787 res = 0.0_dp
2788 DO ii = 1, nrows
2789 res = res + aa(ii, ii)
2790 END DO
2791 END IF
2792#endif
2793
2794 CALL timestop(handle)
2795
2796 END FUNCTION cp_fm_latra
2797
2798! **************************************************************************************************
2799!> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2800!> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2801!> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2802!> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2803!> \param nrow ...
2804!> \param ncol ...
2805!> \param first_row ...
2806!> \param first_col ...
2807!> \author MI
2808! **************************************************************************************************
2809 SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2810
2811 TYPE(cp_fm_type), INTENT(IN) :: matrix
2812 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2813 INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2814
2815 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdgeqpf'
2816
2817 INTEGER :: handle
2818 INTEGER :: info, lwork
2819 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2820 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2821 REAL(kind=dp), DIMENSION(:), POINTER :: work
2822#if defined(__parallel)
2823 INTEGER, DIMENSION(9) :: descc
2824#else
2825 INTEGER :: lda
2826#endif
2827
2828 CALL timeset(routinen, handle)
2829
2830 a => matrix%local_data
2831 lwork = -1
2832 ALLOCATE (work(2*nrow))
2833 ALLOCATE (ipiv(ncol))
2834 info = 0
2835
2836#if defined(__parallel)
2837 descc(:) = matrix%matrix_struct%descriptor(:)
2838 ! Call SCALAPACK routine to get optimal work dimension
2839 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2840 lwork = int(work(1))
2841 DEALLOCATE (work)
2842 ALLOCATE (work(lwork))
2843 tau = 0.0_dp
2844 ipiv = 0
2845
2846 ! Call SCALAPACK routine to get QR decomposition of CTs
2847 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2848#else
2849 cpassert(first_row == 1 .AND. first_col == 1)
2850 lda = SIZE(a, 1)
2851 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2852 lwork = int(work(1))
2853 DEALLOCATE (work)
2854 ALLOCATE (work(lwork))
2855 tau = 0.0_dp
2856 ipiv = 0
2857 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2858#endif
2859 cpassert(info == 0)
2860
2861 DEALLOCATE (work)
2862 DEALLOCATE (ipiv)
2863
2864 CALL timestop(handle)
2865
2866 END SUBROUTINE cp_fm_pdgeqpf
2867
2868! **************************************************************************************************
2869!> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2870!> with orthonormal columns, which is defined as the first N columns of a product of K
2871!> elementary reflectors of order M
2872!> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2873!> as returned from PDGEQRF
2874!> On exit it contains the M-by-N distributed matrix Q
2875!> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2876!> \param nrow ...
2877!> \param first_row ...
2878!> \param first_col ...
2879!> \author MI
2880! **************************************************************************************************
2881 SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2882
2883 TYPE(cp_fm_type), INTENT(IN) :: matrix
2884 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2885 INTEGER, INTENT(IN) :: nrow, first_row, first_col
2886
2887 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdorgqr'
2888
2889 INTEGER :: handle
2890 INTEGER :: info, lwork
2891 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2892 REAL(kind=dp), DIMENSION(:), POINTER :: work
2893#if defined(__parallel)
2894 INTEGER, DIMENSION(9) :: descc
2895#else
2896 INTEGER :: lda
2897#endif
2898
2899 CALL timeset(routinen, handle)
2900
2901 a => matrix%local_data
2902 lwork = -1
2903 ALLOCATE (work(2*nrow))
2904 info = 0
2905
2906#if defined(__parallel)
2907 descc(:) = matrix%matrix_struct%descriptor(:)
2908
2909 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2910 cpassert(info == 0)
2911 lwork = int(work(1))
2912 DEALLOCATE (work)
2913 ALLOCATE (work(lwork))
2914
2915 ! Call SCALAPACK routine to get Q
2916 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2917#else
2918 cpassert(first_row == 1 .AND. first_col == 1)
2919 lda = SIZE(a, 1)
2920 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2921 lwork = int(work(1))
2922 DEALLOCATE (work)
2923 ALLOCATE (work(lwork))
2924 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2925#endif
2926 cpassert(info == 0)
2927
2928 DEALLOCATE (work)
2929 CALL timestop(handle)
2930
2931 END SUBROUTINE cp_fm_pdorgqr
2932
2933! **************************************************************************************************
2934!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2935!> \param matrix ...
2936!> \param irow ...
2937!> \param jrow ...
2938!> \param cs cosine of the rotation angle
2939!> \param sn sinus of the rotation angle
2940!> \author Ole Schuett
2941! **************************************************************************************************
2942 SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2943 TYPE(cp_fm_type), INTENT(IN) :: matrix
2944 INTEGER, INTENT(IN) :: irow, jrow
2945 REAL(dp), INTENT(IN) :: cs, sn
2946
2947 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_rows'
2948 INTEGER :: handle, ncol
2949
2950#if defined(__parallel)
2951 INTEGER :: info, lwork
2952 INTEGER, DIMENSION(9) :: desc
2953 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2954#endif
2955 CALL timeset(routinen, handle)
2956 CALL cp_fm_get_info(matrix, ncol_global=ncol)
2957#if defined(__parallel)
2958 IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2959 lwork = 2*ncol + 1
2960 ALLOCATE (work(lwork))
2961 desc(:) = matrix%matrix_struct%descriptor(:)
2962 CALL pdrot(ncol, &
2963 matrix%local_data(1, 1), irow, 1, desc, ncol, &
2964 matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2965 cs, sn, work, lwork, info)
2966 cpassert(info == 0)
2967 DEALLOCATE (work)
2968 ELSE
2969#endif
2970 CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2971#if defined(__parallel)
2972 END IF
2973#endif
2974 CALL timestop(handle)
2975 END SUBROUTINE cp_fm_rot_rows
2976
2977! **************************************************************************************************
2978!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2979!> \param matrix ...
2980!> \param icol ...
2981!> \param jcol ...
2982!> \param cs cosine of the rotation angle
2983!> \param sn sinus of the rotation angle
2984!> \author Ole Schuett
2985! **************************************************************************************************
2986 SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2987 TYPE(cp_fm_type), INTENT(IN) :: matrix
2988 INTEGER, INTENT(IN) :: icol, jcol
2989 REAL(dp), INTENT(IN) :: cs, sn
2990
2991 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_cols'
2992 INTEGER :: handle, nrow
2993
2994#if defined(__parallel)
2995 INTEGER :: info, lwork
2996 INTEGER, DIMENSION(9) :: desc
2997 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2998#endif
2999 CALL timeset(routinen, handle)
3000 CALL cp_fm_get_info(matrix, nrow_global=nrow)
3001#if defined(__parallel)
3002 IF (1 /= matrix%matrix_struct%context%n_pid) THEN
3003 lwork = 2*nrow + 1
3004 ALLOCATE (work(lwork))
3005 desc(:) = matrix%matrix_struct%descriptor(:)
3006 CALL pdrot(nrow, &
3007 matrix%local_data(1, 1), 1, icol, desc, 1, &
3008 matrix%local_data(1, 1), 1, jcol, desc, 1, &
3009 cs, sn, work, lwork, info)
3010 cpassert(info == 0)
3011 DEALLOCATE (work)
3012 ELSE
3013#endif
3014 CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
3015#if defined(__parallel)
3016 END IF
3017#endif
3018 CALL timestop(handle)
3019 END SUBROUTINE cp_fm_rot_cols
3020
3021! **************************************************************************************************
3022!> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
3023!> \param matrix_a ...
3024!> \param B ...
3025!> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
3026!> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
3027!> \param start_row starting index of rows, optional, defaults to 1
3028!> \param start_col starting index of columns, optional, defaults to 1
3029!> \param do_norm ...
3030!> \param do_print ...
3031! **************************************************************************************************
3032 SUBROUTINE cp_fm_gram_schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
3033 do_norm, do_print)
3034
3035 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
3036 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: b
3037 INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
3038 LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
3039
3040 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_Gram_Schmidt_orthonorm'
3041
3042 INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
3043 j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
3044 start_col_local, start_row_global, start_row_local, this_col, unit_nr
3045 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
3046 LOGICAL :: my_do_norm, my_do_print
3047 REAL(kind=dp) :: norm
3048 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3049
3050 CALL timeset(routinen, handle)
3051
3052 my_do_norm = .true.
3053 IF (PRESENT(do_norm)) my_do_norm = do_norm
3054
3055 my_do_print = .false.
3056 IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
3057
3058 unit_nr = -1
3059 IF (my_do_print) THEN
3061 IF (unit_nr < 1) my_do_print = .false.
3062 END IF
3063
3064 IF (SIZE(b) /= 0) THEN
3065 IF (PRESENT(nrows)) THEN
3066 nrow_global = nrows
3067 ELSE
3068 nrow_global = SIZE(b, 1)
3069 END IF
3070
3071 IF (PRESENT(ncols)) THEN
3072 ncol_global = ncols
3073 ELSE
3074 ncol_global = SIZE(b, 2)
3075 END IF
3076
3077 IF (PRESENT(start_row)) THEN
3078 start_row_global = start_row
3079 ELSE
3080 start_row_global = 1
3081 END IF
3082
3083 IF (PRESENT(start_col)) THEN
3084 start_col_global = start_col
3085 ELSE
3086 start_col_global = 1
3087 END IF
3088
3089 end_row_global = start_row_global + nrow_global - 1
3090 end_col_global = start_col_global + ncol_global - 1
3091
3092 CALL cp_fm_get_info(matrix=matrix_a, &
3093 nrow_global=nrow_global, ncol_global=ncol_global, &
3094 nrow_local=nrow_local, ncol_local=ncol_local, &
3095 row_indices=row_indices, col_indices=col_indices)
3096 IF (end_row_global > nrow_global) THEN
3097 end_row_global = nrow_global
3098 END IF
3099 IF (end_col_global > ncol_global) THEN
3100 end_col_global = ncol_global
3101 END IF
3102
3103 ! find out row/column indices of locally stored matrix elements that
3104 ! needs to be copied.
3105 ! Arrays row_indices and col_indices are assumed to be sorted in
3106 ! ascending order
3107 DO start_row_local = 1, nrow_local
3108 IF (row_indices(start_row_local) >= start_row_global) EXIT
3109 END DO
3110
3111 DO end_row_local = start_row_local, nrow_local
3112 IF (row_indices(end_row_local) > end_row_global) EXIT
3113 END DO
3114 end_row_local = end_row_local - 1
3115
3116 DO start_col_local = 1, ncol_local
3117 IF (col_indices(start_col_local) >= start_col_global) EXIT
3118 END DO
3119
3120 DO end_col_local = start_col_local, ncol_local
3121 IF (col_indices(end_col_local) > end_col_global) EXIT
3122 END DO
3123 end_col_local = end_col_local - 1
3124
3125 a => matrix_a%local_data
3126
3127 this_col = col_indices(start_col_local) - start_col_global + 1
3128
3129 b(:, this_col) = a(:, start_col_local)
3130
3131 IF (my_do_norm) THEN
3132 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3133 b(:, this_col) = b(:, this_col)/norm
3134 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3135 END IF
3136
3137 DO i = start_col_local + 1, end_col_local
3138 this_col = col_indices(i) - start_col_global + 1
3139 b(:, this_col) = a(:, i)
3140 DO j = start_col_local, i - 1
3141 j_col = col_indices(j) - start_col_global + 1
3142 b(:, this_col) = b(:, this_col) - &
3143 accurate_dot_product(b(:, j_col), b(:, this_col))* &
3144 b(:, j_col)/accurate_dot_product(b(:, j_col), b(:, j_col))
3145 END DO
3146
3147 IF (my_do_norm) THEN
3148 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3149 b(:, this_col) = b(:, this_col)/norm
3150 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3151 END IF
3152
3153 END DO
3154 CALL matrix_a%matrix_struct%para_env%sum(b)
3155 END IF
3156
3157 CALL timestop(handle)
3158
3159 END SUBROUTINE cp_fm_gram_schmidt_orthonorm
3160
3161! **************************************************************************************************
3162!> \brief Cholesky decomposition
3163!> \param fm_matrix ...
3164!> \param n ...
3165!> \param uplo triangular format; defaults to 'U'
3166! **************************************************************************************************
3167 SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
3168 TYPE(cp_fm_type) :: fm_matrix
3169 INTEGER, INTENT(IN) :: n
3170 CHARACTER, INTENT(IN), OPTIONAL :: uplo
3171
3172 CHARACTER :: myuplo
3173 INTEGER :: info
3174 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3175 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3176#if defined(__parallel)
3177 INTEGER, DIMENSION(9) :: desca
3178#endif
3179
3180 myuplo = 'U'
3181 IF (PRESENT(uplo)) myuplo = uplo
3182
3183 a => fm_matrix%local_data
3184 a_sp => fm_matrix%local_data_sp
3185#if defined(__parallel)
3186 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3187 IF (fm_matrix%use_sp) THEN
3188 CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
3189 ELSE
3190 CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
3191 END IF
3192#else
3193 IF (fm_matrix%use_sp) THEN
3194 CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
3195 ELSE
3196 CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
3197 END IF
3198#endif
3199 IF (info /= 0) &
3200 cpabort("Cholesky decomposition failed. Matrix ill-conditioned?")
3201
3202 END SUBROUTINE cp_fm_potrf
3203
3204! **************************************************************************************************
3205!> \brief Invert trianguar matrix
3206!> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
3207!> \param n size of the matrix to invert
3208!> \param uplo triangular format; defaults to 'U'
3209! **************************************************************************************************
3210 SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
3211 TYPE(cp_fm_type) :: fm_matrix
3212 INTEGER, INTENT(IN) :: n
3213 CHARACTER, INTENT(IN), OPTIONAL :: uplo
3214
3215 CHARACTER :: myuplo
3216 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3217 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3218 INTEGER :: info
3219#if defined(__parallel)
3220 INTEGER, DIMENSION(9) :: desca
3221#endif
3222
3223 myuplo = 'U'
3224 IF (PRESENT(uplo)) myuplo = uplo
3225
3226 a => fm_matrix%local_data
3227 a_sp => fm_matrix%local_data_sp
3228#if defined(__parallel)
3229 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3230 IF (fm_matrix%use_sp) THEN
3231 CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
3232 ELSE
3233 CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
3234 END IF
3235#else
3236 IF (fm_matrix%use_sp) THEN
3237 CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
3238 ELSE
3239 CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
3240 END IF
3241#endif
3242 cpassert(info == 0)
3243 END SUBROUTINE cp_fm_potri
3244
3245! **************************************************************************************************
3246!> \brief Calculates
3247!> yv = alpha*amat*xv + beta*yv
3248!> where amat: fm matrix
3249!> xv : vector replicated
3250!> yv : vector replicated
3251!> Defaults: alpha = 1, beta = 0
3252! **************************************************************************************************
3253 SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
3254 TYPE(cp_fm_type), INTENT(IN) :: amat
3255 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: xv
3256 REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: yv
3257 REAL(kind=dp), OPTIONAL, INTENT(IN) :: alpha, beta
3258
3259 INTEGER :: na, nc, nx, ny
3260 REAL(kind=dp) :: aval, bval
3261#if defined(__parallel)
3262 INTEGER :: nrl, ncl, ic, ir
3263 INTEGER, DIMENSION(:), POINTER :: rind, cind
3264 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: xvl, yvl, yvm
3265#endif
3266
3267 IF (amat%use_sp) THEN
3268 cpabort("cp_fm_matvec: SP option not available")
3269 END IF
3270 aval = 1.0_dp
3271 IF (PRESENT(alpha)) aval = alpha
3272 bval = 0.0_dp
3273 IF (PRESENT(beta)) bval = beta
3274
3275 CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
3276 nx = SIZE(xv)
3277 ny = SIZE(yv)
3278 IF ((nx /= ny) .OR. (nc /= nx)) THEN
3279 cpabort("cp_fm_matvec: incompatible dimensions")
3280 END IF
3281#if defined(__parallel)
3282 CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
3283 row_indices=rind, col_indices=cind)
3284 ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
3285 DO ic = 1, ncl
3286 xvl(ic) = xv(cind(ic))
3287 END DO
3288 yvl(1:nrl) = matmul(amat%local_data, xvl(1:ncl))
3289 yvm = 0.0_dp
3290 DO ir = 1, nrl
3291 yvm(rind(ir)) = yvl(ir)
3292 END DO
3293 CALL amat%matrix_struct%para_env%sum(yvm)
3294 IF (bval == 0.0_dp) THEN
3295 yv = aval*yvm
3296 ELSE
3297 yv = bval*yv + aval*yvm
3298 END IF
3299#else
3300 IF (bval == 0.0_dp) THEN
3301 yv = aval*matmul(amat%local_data, xv)
3302 ELSE
3303 yv = bval*yv + aval*matmul(amat%local_data, xv)
3304 END IF
3305#endif
3306
3307 END SUBROUTINE cp_fm_matvec
3308
3309END MODULE cp_fm_basic_linalg
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
methods related to the blacs parallel environment
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_fm_row_scale(matrixa, scaling)
scales row i of matrix a with scaling(i)
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
Add (and scale) a subset of columns of a fm to a fm b = alpha*a + b.
subroutine, public cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_fm_solve(matrix_a, general_a)
computes the the solution to A*b=A_general using lu decomposition
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_det(matrix_a, det_a)
Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix.
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
performs a QR factorization of the input rectangular matrix A or of a submatrix of A the computed tri...
subroutine, public cp_fm_gram_schmidt_orthonorm(matrix_a, b, nrows, ncols, start_row, start_col, do_norm, do_print)
Orthonormalizes selected rows and columns of a full matrix, matrix_a.
subroutine, public cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
performs a rank-k update of a symmetric matrix_c matrix_c = beta * matrix_c + alpha * matrix_a * tran...
subroutine, public cp_fm_potrf(fm_matrix, n, uplo)
Cholesky decomposition.
subroutine, public cp_fm_potri(fm_matrix, n, uplo)
Invert trianguar matrix.
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
computes the schur product of two matrices c_ij = a_ij * b_ij
real(kind=dp) function, public cp_fm_norm(matrix, mode)
norm of matrix using (p)dlange
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
subroutine, public cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix.
subroutine, public cp_complex_fm_gemm(transa, transb, m, n, k, alpha, a_re, a_im, b_re, b_im, beta, c_re, c_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
Convenience function. Computes the matrix multiplications needed for the multiplication of complex ma...
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_invert(matrix_a, uplo_tr)
inverts a triangular matrix
subroutine, public cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b computes matrix_c = beta * matrix_c...
subroutine, public cp_fm_matvec(amat, xv, yv, alpha, beta)
Calculates yv = alpha*amat*xv + beta*yv where amat: fm matrix xv : vector replicated yv : vector repl...
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
represent the structure of a full matrix
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:452
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
subroutine, public get_pseudo_inverse_svd(a, a_pinverse, rskip, determinant, sval)
returns the pseudoinverse of a real, square matrix using singular value decomposition
Definition mathlib.F:946
subroutine, public diag(n, a, d, v)
Diagonalize matrix a. The eigenvalues are returned in vector d and the eigenvectors are returned in m...
Definition mathlib.F:1504
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
just to build arrays of pointers to matrices
represent a full matrix