(git:07c9450)
Loading...
Searching...
No Matches
cp_fm_basic_linalg.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Basic linear algebra operations for full matrices.
10!> \par History
11!> 08.2002 split out of qs_blacs [fawzi]
12!> \author Fawzi Mohamed
13! **************************************************************************************************
17 USE cp_fm_types, ONLY: &
25 USE kinds, ONLY: dp, &
26 int_8, &
27 sp
28 USE machine, ONLY: m_memory
32#include "../base/base_uses.f90"
33
34 IMPLICIT NONE
35 PRIVATE
36
37 LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .true.
38 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
39
40 PUBLIC :: cp_fm_scale, & ! scale a matrix
41 cp_fm_scale_and_add, & ! scale and add two matrices
42 cp_fm_geadd, & ! general addition
43 cp_fm_add_columns, & ! daxpy type column add
44 cp_fm_column_scale, & ! scale columns of a matrix
45 cp_fm_row_scale, & ! scale rows of a matrix
46 cp_fm_trace, & ! trace of the transpose(A)*B
47 cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
48 cp_fm_norm, & ! different norms of A
49 cp_fm_schur_product, & ! schur product
50 cp_fm_transpose, & ! transpose a matrix
51 cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
52 cp_fm_syrk, & ! rank k update
53 cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
54 cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
55 cp_fm_gemm, & ! multiply two matrices
56 cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
57 cp_fm_invert, & ! computes the inverse and determinant
58 cp_fm_frobenius_norm, & ! frobenius norm
59 cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
60 cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
61 cp_fm_solve, & ! solves the equation A*B=C A and C are input
62 cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
63 cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
64 cp_fm_potrf, & ! Cholesky decomposition
65 cp_fm_potri, & ! Invert triangular matrix
66 cp_fm_rot_rows, & ! rotates two rows
67 cp_fm_rot_cols, & ! rotates two columns
68 cp_fm_gram_schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
69 cp_fm_det, & ! determinant of a real matrix with correct sign
70 cp_fm_matvec ! matrix-vector multiplication (vector replicated)
71
72 REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
73 REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
74
75 INTERFACE cp_fm_trace
76 MODULE PROCEDURE cp_fm_trace_a0b0t0
77 MODULE PROCEDURE cp_fm_trace_a1b0t1_a
78 MODULE PROCEDURE cp_fm_trace_a1b0t1_p
79 MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
80 MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
81 MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
82 MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
83 END INTERFACE cp_fm_trace
84
86 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
87 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
88 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
89 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
90 END INTERFACE cp_fm_contracted_trace
91CONTAINS
92
93! **************************************************************************************************
94!> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
95!> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
96! **************************************************************************************************
97 SUBROUTINE cp_fm_det(matrix_a, det_a)
98
99 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
100 REAL(kind=dp), INTENT(OUT) :: det_a
101 REAL(kind=dp) :: determinant
102 TYPE(cp_fm_type) :: matrix_lu
103 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
104 INTEGER :: n, i, info, p
105 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
106 REAL(kind=dp), DIMENSION(:), POINTER :: diag
107
108#if defined(__parallel)
109 INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
110 INTEGER, DIMENSION(9) :: desca
111#endif
112
113 CALL cp_fm_create(matrix=matrix_lu, &
114 matrix_struct=matrix_a%matrix_struct, &
115 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
116 CALL cp_fm_to_fm(matrix_a, matrix_lu)
117
118 a => matrix_lu%local_data
119 n = matrix_lu%matrix_struct%nrow_global
120 ALLOCATE (ipivot(n))
121 ipivot(:) = 0
122 p = 0
123 ALLOCATE (diag(n))
124 diag(:) = 0.0_dp
125#if defined(__parallel)
126 ! Use LU decomposition
127 desca(:) = matrix_lu%matrix_struct%descriptor(:)
128 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
129 CALL cp_fm_get_diag(matrix_lu, diag)
130 determinant = product(diag)
131 myprow = matrix_lu%matrix_struct%context%mepos(1)
132 nprow = matrix_lu%matrix_struct%context%num_pe(1)
133 npcol = matrix_lu%matrix_struct%context%num_pe(2)
134 nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
135 nrow_block = matrix_lu%matrix_struct%nrow_block
136 DO irow_local = 1, nrow_local
137 i = matrix_lu%matrix_struct%row_indices(irow_local)
138 IF (ipivot(irow_local) /= i) p = p + 1
139 END DO
140 CALL matrix_lu%matrix_struct%para_env%sum(p)
141 ! very important fix
142 p = p/npcol
143#else
144 CALL dgetrf(n, n, a, n, ipivot, info)
145 CALL cp_fm_get_diag(matrix_lu, diag)
146 determinant = product(diag)
147 DO i = 1, n
148 IF (ipivot(i) /= i) p = p + 1
149 END DO
150#endif
151 DEALLOCATE (ipivot)
152 DEALLOCATE (diag)
153 CALL cp_fm_release(matrix_lu)
154 det_a = determinant*(-2*mod(p, 2) + 1.0_dp)
155 END SUBROUTINE cp_fm_det
156
157! **************************************************************************************************
158!> \brief calc A <- alpha*A + beta*B
159!> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
160!> scale A)
161!> \param alpha ...
162!> \param matrix_a ...
163!> \param beta ...
164!> \param matrix_b ...
165! **************************************************************************************************
166 SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
167
168 REAL(kind=dp), INTENT(IN) :: alpha
169 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
170 REAL(kind=dp), INTENT(IN), OPTIONAL :: beta
171 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
172
173 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale_and_add'
174
175 INTEGER :: handle, size_a, size_b
176 REAL(kind=dp) :: my_beta
177 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
178 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
179
180 CALL timeset(routinen, handle)
181
182 my_beta = 0.0_dp
183 IF (PRESENT(matrix_b)) my_beta = 1.0_dp
184 IF (PRESENT(beta)) my_beta = beta
185 NULLIFY (a, b)
186
187 IF (PRESENT(beta)) THEN
188 cpassert(PRESENT(matrix_b))
189 IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
190 cpwarn("Bad use of routine. Call cp_fm_scale instead")
191 CALL cp_fm_scale(alpha + beta, matrix_a)
192 CALL timestop(handle)
193 RETURN
194 END IF
195 END IF
196
197 a => matrix_a%local_data
198 a_sp => matrix_a%local_data_sp
199
200 IF (matrix_a%use_sp) THEN
201 size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
202 ELSE
203 size_a = SIZE(a, 1)*SIZE(a, 2)
204 END IF
205
206 IF (alpha /= 1.0_dp) THEN
207 IF (matrix_a%use_sp) THEN
208 CALL sscal(size_a, real(alpha, sp), a_sp, 1)
209 ELSE
210 CALL dscal(size_a, alpha, a, 1)
211 END IF
212 END IF
213 IF (my_beta /= 0.0_dp) THEN
214 IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
215 cpabort("Matrices must be in the same blacs context")
216
217 IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
218 matrix_b%matrix_struct)) THEN
219
220 b => matrix_b%local_data
221 b_sp => matrix_b%local_data_sp
222 IF (matrix_b%use_sp) THEN
223 size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
224 ELSE
225 size_b = SIZE(b, 1)*SIZE(b, 2)
226 END IF
227 IF (size_a /= size_b) &
228 cpabort("Matrices must have same local sizes")
229
230 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
231 CALL saxpy(size_a, real(my_beta, sp), b_sp, 1, a_sp, 1)
232 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
233 CALL saxpy(size_a, real(my_beta, sp), real(b, sp), 1, a_sp, 1)
234 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
235 CALL daxpy(size_a, my_beta, real(b_sp, dp), 1, a, 1)
236 ELSE
237 CALL daxpy(size_a, my_beta, b, 1, a, 1)
238 END IF
239
240 ELSE
241#ifdef __parallel
242 cpabort("to do (pdscal,pdcopy,pdaxpy)")
243#else
244 cpabort("")
245#endif
246 END IF
247
248 END IF
249
250 CALL timestop(handle)
251
252 END SUBROUTINE cp_fm_scale_and_add
253
254! **************************************************************************************************
255!> \brief interface to BLACS geadd:
256!> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
257!> where opt(matrix_a) can be either:
258!> 'N': matrix_a
259!> 'T': matrix_a^T
260!> 'C': matrix_a^H (Hermitian conjugate)
261!> note that this is a level three routine, use cp_fm_scale_and_add if that
262!> is sufficient for your needs
263!> \param alpha : complex scalar
264!> \param trans : 'N' normal, 'T' transposed
265!> \param matrix_a : input matrix_a
266!> \param beta : complex scalar
267!> \param matrix_b : input matrix_b, upon out put the updated matrix_b
268!> \author Lianheng Tong
269! **************************************************************************************************
270 SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
271 REAL(kind=dp), INTENT(IN) :: alpha, beta
272 CHARACTER, INTENT(IN) :: trans
273 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
274
275 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geadd'
276
277 INTEGER :: nrow_global, ncol_global, handle
278 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
279#if defined(__parallel)
280 INTEGER, DIMENSION(9) :: desca, descb
281#elif !defined(__MKL)
282 INTEGER :: ii, jj
283#endif
284
285 CALL timeset(routinen, handle)
286
287 nrow_global = matrix_a%matrix_struct%nrow_global
288 ncol_global = matrix_a%matrix_struct%ncol_global
289 cpassert(nrow_global == matrix_b%matrix_struct%nrow_global)
290 cpassert(ncol_global == matrix_b%matrix_struct%ncol_global)
291
292 aa => matrix_a%local_data
293 bb => matrix_b%local_data
294
295#if defined(__parallel)
296 desca = matrix_a%matrix_struct%descriptor
297 descb = matrix_b%matrix_struct%descriptor
298 CALL pdgeadd(trans, &
299 nrow_global, &
300 ncol_global, &
301 alpha, &
302 aa, &
303 1, 1, &
304 desca, &
305 beta, &
306 bb, &
307 1, 1, &
308 descb)
309#elif defined(__MKL)
310 CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
311 alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
312#else
313 ! dgeadd is not a standard BLAS function, although it is implemented
314 ! in some libraries like OpenBLAS, so not going to use it here
315 SELECT CASE (trans)
316 CASE ('T')
317 DO jj = 1, ncol_global
318 DO ii = 1, nrow_global
319 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
320 END DO
321 END DO
322 CASE DEFAULT
323 DO jj = 1, ncol_global
324 DO ii = 1, nrow_global
325 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
326 END DO
327 END DO
328 END SELECT
329#endif
330
331 CALL timestop(handle)
332
333 END SUBROUTINE cp_fm_geadd
334
335! **************************************************************************************************
336!> \brief Add (and scale) a subset of columns of a fm to a fm
337!> b = alpha*a + b
338!> \param msource ...
339!> \param mtarget ...
340!> \param ncol ...
341!> \param alpha ...
342!> \param source_start ...
343!> \param target_start ...
344! **************************************************************************************************
345 SUBROUTINE cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
346
347 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
348 INTEGER, INTENT(IN) :: ncol
349 REAL(kind=dp), INTENT(IN), OPTIONAL :: alpha
350 INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
351
352 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_add_columns'
353
354 INTEGER :: handle, n, ss, ts, i
355 REAL(kind=dp) :: fscale
356 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
357#if defined(__parallel)
358 INTEGER, DIMENSION(9) :: desca, descb
359#endif
360
361 CALL timeset(routinen, handle)
362
363 ss = 1
364 ts = 1
365
366 IF (PRESENT(source_start)) ss = source_start
367 IF (PRESENT(target_start)) ts = target_start
368
369 fscale = 1.0_dp
370 IF (PRESENT(alpha)) fscale = alpha
371
372 n = msource%matrix_struct%nrow_global
373
374 a => msource%local_data
375 b => mtarget%local_data
376
377#if defined(__parallel)
378 mark_used(i)
379 desca(:) = msource%matrix_struct%descriptor(:)
380 descb(:) = mtarget%matrix_struct%descriptor(:)
381 CALL pdgeadd("N", n, ncol, fscale, a, 1, ss, desca, 1.0_dp, b, 1, ts, descb)
382#else
383 DO i = 0, ncol - 1
384 b(1:n, ts + i) = b(1:n, ts + i) + fscale*a(1:n, ss + i)
385 END DO
386#endif
387
388 CALL timestop(handle)
389
390 END SUBROUTINE cp_fm_add_columns
391
392! **************************************************************************************************
393!> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
394!> IMPORTANT : the sign of the determinant is not defined correctly yet ....
395!> \param matrix_a ...
396!> \param almost_determinant ...
397!> \param correct_sign ...
398!> \par History
399!> added correct_sign 02.07 (fschiff)
400!> \author Joost VandeVondele
401!> \note
402!> - matrix_a is overwritten
403!> - the sign of the determinant might be wrong
404!> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
405!> - one should be able to find out if ipivot is an even or an odd permutation...
406!> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
407!> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
408! **************************************************************************************************
409 SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
410 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
411 REAL(kind=dp), INTENT(OUT) :: almost_determinant
412 LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
413
414 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_decompose'
415
416 INTEGER :: handle, i, info, n
417 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
418 REAL(kind=dp) :: determinant
419 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
420#if defined(__parallel)
421 INTEGER, DIMENSION(9) :: desca
422 REAL(kind=dp), DIMENSION(:), POINTER :: diag
423#else
424 INTEGER :: lda
425#endif
426
427 CALL timeset(routinen, handle)
428
429 a => matrix_a%local_data
430 n = matrix_a%matrix_struct%nrow_global
431 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
432
433#if defined(__parallel)
434 mark_used(correct_sign)
435 desca(:) = matrix_a%matrix_struct%descriptor(:)
436 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
437
438 ALLOCATE (diag(n))
439 diag(:) = 0.0_dp
440 CALL cp_fm_get_diag(matrix_a, diag)
441 determinant = 1.0_dp
442 DO i = 1, n
443 determinant = determinant*diag(i)
444 END DO
445 DEALLOCATE (diag)
446#else
447 lda = SIZE(a, 1)
448 CALL dgetrf(n, n, a, lda, ipivot, info)
449 determinant = 1.0_dp
450 IF (correct_sign) THEN
451 DO i = 1, n
452 IF (ipivot(i) /= i) THEN
453 determinant = -determinant*a(i, i)
454 ELSE
455 determinant = determinant*a(i, i)
456 END IF
457 END DO
458 ELSE
459 DO i = 1, n
460 determinant = determinant*a(i, i)
461 END DO
462 END IF
463#endif
464 ! info is allowed to be zero
465 ! this does just signal a zero diagonal element
466 DEALLOCATE (ipivot)
467 almost_determinant = determinant ! notice that the sign is random
468 CALL timestop(handle)
469 END SUBROUTINE cp_fm_lu_decompose
470
471! **************************************************************************************************
472!> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
473!> \param transa : 'N' -> normal 'T' -> transpose
474!> alpha,beta :: can be 0.0_dp and 1.0_dp
475!> \param transb ...
476!> \param m ...
477!> \param n ...
478!> \param k ...
479!> \param alpha ...
480!> \param matrix_a : m x k matrix ( ! for transa = 'N')
481!> \param matrix_b : k x n matrix ( ! for transb = 'N')
482!> \param beta ...
483!> \param matrix_c : m x n matrix
484!> \param a_first_col ...
485!> \param a_first_row ...
486!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
487!> \param b_first_row ...
488!> \param c_first_col ...
489!> \param c_first_row ...
490!> \author Matthias Krack
491!> \note
492!> matrix_c should have no overlap with matrix_a, matrix_b
493! **************************************************************************************************
494 SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
495 matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
496 c_first_col, c_first_row)
497
498 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
499 INTEGER, INTENT(IN) :: m, n, k
500 REAL(kind=dp), INTENT(IN) :: alpha
501 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
502 REAL(kind=dp), INTENT(IN) :: beta
503 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
504 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
505 b_first_col, b_first_row, &
506 c_first_col, c_first_row
507
508 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_gemm'
509
510 INTEGER :: handle, i_a, i_b, i_c, j_a, &
511 j_b, j_c
512 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
513 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
514#if defined(__parallel)
515 INTEGER, DIMENSION(9) :: desca, descb, descc
516#else
517 INTEGER :: lda, ldb, ldc
518#endif
519
520 CALL timeset(routinen, handle)
521
522 !sample peak memory
523 CALL m_memory()
524
525 a => matrix_a%local_data
526 b => matrix_b%local_data
527 c => matrix_c%local_data
528
529 a_sp => matrix_a%local_data_sp
530 b_sp => matrix_b%local_data_sp
531 c_sp => matrix_c%local_data_sp
532
533 i_a = 1
534 IF (PRESENT(a_first_row)) i_a = a_first_row
535
536 j_a = 1
537 IF (PRESENT(a_first_col)) j_a = a_first_col
538
539 i_b = 1
540 IF (PRESENT(b_first_row)) i_b = b_first_row
541
542 j_b = 1
543 IF (PRESENT(b_first_col)) j_b = b_first_col
544
545 i_c = 1
546 IF (PRESENT(c_first_row)) i_c = c_first_row
547
548 j_c = 1
549 IF (PRESENT(c_first_col)) j_c = c_first_col
550
551#if defined(__parallel)
552
553 desca(:) = matrix_a%matrix_struct%descriptor(:)
554 descb(:) = matrix_b%matrix_struct%descriptor(:)
555 descc(:) = matrix_c%matrix_struct%descriptor(:)
556
557 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
558
559 CALL psgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
560 descb, real(beta, sp), c_sp(1, 1), i_c, j_c, descc)
561
562 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
563
564 CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
565 descb, beta, c, i_c, j_c, descc)
566
567 ELSE
568 cpabort("Mixed precision gemm NYI")
569 END IF
570#else
571
572 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
573
574 lda = SIZE(a_sp, 1)
575 ldb = SIZE(b_sp, 1)
576 ldc = SIZE(c_sp, 1)
577
578 CALL sgemm(transa, transb, m, n, k, real(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
579 REAL(beta, sp), c_sp(i_c, j_c), ldc)
580
581 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
582
583 lda = SIZE(a, 1)
584 ldb = SIZE(b, 1)
585 ldc = SIZE(c, 1)
586
587 CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
588
589 ELSE
590 cpabort("Mixed precision gemm NYI")
591 END IF
592
593#endif
594 CALL timestop(handle)
595
596 END SUBROUTINE cp_fm_gemm
597
598! **************************************************************************************************
599!> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
600!> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
601!> where matrix_a is symmetric
602!> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
603!> alpha,beta :: can be 0.0_dp and 1.0_dp
604!> \param uplo triangular format
605!> \param m ...
606!> \param n ...
607!> \param alpha ...
608!> \param matrix_a : m x m matrix
609!> \param matrix_b : m x n matrix
610!> \param beta ...
611!> \param matrix_c : m x n matrix
612!> \author Matthias Krack
613!> \note
614!> matrix_c should have no overlap with matrix_a, matrix_b
615!> all matrices in QS are triangular according to uplo
616!> matrix_a is always an m x m matrix
617!> typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
618! **************************************************************************************************
619 SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
620
621 CHARACTER(LEN=1), INTENT(IN) :: side, uplo
622 INTEGER, INTENT(IN) :: m, n
623 REAL(kind=dp), INTENT(IN) :: alpha
624 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
625 REAL(kind=dp), INTENT(IN) :: beta
626 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
627
628 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_symm'
629
630 INTEGER :: handle
631 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
632#if defined(__parallel)
633 INTEGER, DIMENSION(9) :: desca, descb, descc
634#else
635 INTEGER :: lda, ldb, ldc
636#endif
637
638 CALL timeset(routinen, handle)
639
640 a => matrix_a%local_data
641 b => matrix_b%local_data
642 c => matrix_c%local_data
643
644#if defined(__parallel)
645
646 desca(:) = matrix_a%matrix_struct%descriptor(:)
647 descb(:) = matrix_b%matrix_struct%descriptor(:)
648 descc(:) = matrix_c%matrix_struct%descriptor(:)
649
650 CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
651
652#else
653
654 lda = matrix_a%matrix_struct%local_leading_dimension
655 ldb = matrix_b%matrix_struct%local_leading_dimension
656 ldc = matrix_c%matrix_struct%local_leading_dimension
657
658 CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
659
660#endif
661 CALL timestop(handle)
662
663 END SUBROUTINE cp_fm_symm
664
665! **************************************************************************************************
666!> \brief computes the Frobenius norm of matrix_a
667!> \brief computes the Frobenius norm of matrix_a
668!> \param matrix_a : m x n matrix
669!> \return ...
670!> \author VW
671! **************************************************************************************************
672 FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
673 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
674 REAL(kind=dp) :: norm
675
676 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_frobenius_norm'
677
678 INTEGER :: handle, size_a
679 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
680 REAL(kind=dp), EXTERNAL :: ddot
681#if defined(__parallel)
682 TYPE(mp_comm_type) :: group
683#endif
684
685 CALL timeset(routinen, handle)
686
687 norm = 0.0_dp
688 a => matrix_a%local_data
689 size_a = SIZE(a, 1)*SIZE(a, 2)
690 norm = ddot(size_a, a(1, 1), 1, a(1, 1), 1)
691#if defined(__parallel)
692 group = matrix_a%matrix_struct%para_env
693 CALL group%sum(norm)
694#endif
695 norm = sqrt(norm)
696
697 CALL timestop(handle)
698
699 END FUNCTION cp_fm_frobenius_norm
700
701! **************************************************************************************************
702!> \brief performs a rank-k update of a symmetric matrix_c
703!> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
704!> \param uplo : 'U' ('L')
705!> \param trans : 'N' ('T')
706!> \param k : number of cols to use in matrix_a
707!> ia,ja :: 1,1 (could be used for selecting subblock of a)
708!> \param alpha ...
709!> \param matrix_a ...
710!> \param ia ...
711!> \param ja ...
712!> \param beta ...
713!> \param matrix_c ...
714!> \author Matthias Krack
715! **************************************************************************************************
716 SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
717 CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
718 INTEGER, INTENT(IN) :: k
719 REAL(kind=dp), INTENT(IN) :: alpha
720 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
721 INTEGER, INTENT(IN) :: ia, ja
722 REAL(kind=dp), INTENT(IN) :: beta
723 TYPE(cp_fm_type), INTENT(IN) :: matrix_c
724
725 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_syrk'
726
727 INTEGER :: handle, n
728 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
729#if defined(__parallel)
730 INTEGER, DIMENSION(9) :: desca, descc
731#else
732 INTEGER :: lda, ldc
733#endif
734
735 CALL timeset(routinen, handle)
736
737 n = matrix_c%matrix_struct%nrow_global
738
739 a => matrix_a%local_data
740 c => matrix_c%local_data
741
742#if defined(__parallel)
743
744 desca(:) = matrix_a%matrix_struct%descriptor(:)
745 descc(:) = matrix_c%matrix_struct%descriptor(:)
746
747 CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
748
749#else
750
751 lda = SIZE(a, 1)
752 ldc = SIZE(c, 1)
753
754 CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
755
756#endif
757 CALL timestop(handle)
758
759 END SUBROUTINE cp_fm_syrk
760
761! **************************************************************************************************
762!> \brief computes the schur product of two matrices
763!> c_ij = a_ij * b_ij
764!> \param matrix_a ...
765!> \param matrix_b ...
766!> \param matrix_c ...
767!> \author Joost VandeVondele
768! **************************************************************************************************
769 SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
770
771 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
772
773 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_schur_product'
774
775 INTEGER :: handle, icol_local, irow_local, mypcol, &
776 myprow, ncol_local, &
777 nrow_local
778 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b, c
779 TYPE(cp_blacs_env_type), POINTER :: context
780
781 CALL timeset(routinen, handle)
782
783 context => matrix_a%matrix_struct%context
784 myprow = context%mepos(1)
785 mypcol = context%mepos(2)
786
787 a => matrix_a%local_data
788 b => matrix_b%local_data
789 c => matrix_c%local_data
790
791 nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
792 ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
793
794 DO icol_local = 1, ncol_local
795 DO irow_local = 1, nrow_local
796 c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
797 END DO
798 END DO
799
800 CALL timestop(handle)
801
802 END SUBROUTINE cp_fm_schur_product
803
804! **************************************************************************************************
805!> \brief returns the trace of matrix_a^T matrix_b, i.e
806!> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
807!> \param matrix_a a matrix
808!> \param matrix_b another matrix
809!> \param trace ...
810!> \par History
811!> 11.06.2001 Creation (Matthias Krack)
812!> 12.2002 added doc [fawzi]
813!> \author Matthias Krack
814!> \note
815!> note the transposition of matrix_a!
816! **************************************************************************************************
817 SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
818
819 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
820 REAL(KIND=dp), INTENT(OUT) :: trace
821
822 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
823
824 INTEGER :: handle, mypcol, myprow, ncol_local, &
825 nrow_local
826 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
827 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
828 TYPE(cp_blacs_env_type), POINTER :: context
829 TYPE(mp_comm_type) :: group
830
831 CALL timeset(routinen, handle)
832
833 context => matrix_a%matrix_struct%context
834 myprow = context%mepos(1)
835 mypcol = context%mepos(2)
836
837 group = matrix_a%matrix_struct%para_env
838
839 a => matrix_a%local_data
840 b => matrix_b%local_data
841
842 a_sp => matrix_a%local_data_sp
843 b_sp => matrix_b%local_data_sp
844
845 nrow_local = min(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
846 ncol_local = min(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
847
848 ! cries for an accurate_dot_product
849 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
850 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local)* &
851 b_sp(1:nrow_local, 1:ncol_local), dp))
852 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
853 trace = accurate_sum(real(a_sp(1:nrow_local, 1:ncol_local), dp)* &
854 b(1:nrow_local, 1:ncol_local))
855 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
856 trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
857 REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
858 ELSE
859 trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
860 b(1:nrow_local, 1:ncol_local))
861 END IF
862
863 CALL group%sum(trace)
864
865 CALL timestop(handle)
866
867 END SUBROUTINE cp_fm_trace_a0b0t0
868
869
870! **************************************************************************************************
871!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
872!> \param matrix_a list of A matrices
873!> \param matrix_b B matrix
874!> \param trace computed traces
875!> \par History
876!> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
877!> \note \parblock
878!> Computing the trace requires collective communication between involved MPI processes
879!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
880!> the amount of time wasted in such synchronisation by performing one large collective
881!> operation which involves all the matrices in question.
882!>
883!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
884!> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
885!> 'matrix_b' is a single matrix.
886!> \endparblock
887! **************************************************************************************************
888 SUBROUTINE cp_fm_trace_a1b0t1_a (matrix_a, matrix_b, trace)
889 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
890 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
891 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
892
893 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_a'
894
895 INTEGER :: handle, imatrix, n_matrices, &
896 ncols_local, nrows_local
897 LOGICAL :: use_sp_a, use_sp_b
898 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
899 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
900 TYPE(mp_comm_type) :: group
901
902 CALL timeset(routinen, handle)
903
904 n_matrices = SIZE(trace)
905 cpassert(SIZE(matrix_a) == n_matrices)
906
907 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
908 use_sp_b = matrix_b%use_sp
909
910 IF (use_sp_b) THEN
911 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
912 ELSE
913 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
914 END IF
915
916!$OMP PARALLEL DO DEFAULT(NONE), &
917!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
918!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
919!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
920
921 DO imatrix = 1, n_matrices
922
923 use_sp_a = matrix_a(imatrix) %use_sp
924
925 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
926 IF (use_sp_a .AND. use_sp_b) THEN
927 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
928 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
929 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
930 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
931 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
932 ELSE
933 cpabort("Matrices A and B are of different types")
934 END IF
935 END DO
936!$OMP END PARALLEL DO
937
938 group = matrix_b%matrix_struct%para_env
939 CALL group%sum(trace)
940
941 CALL timestop(handle)
942 END SUBROUTINE cp_fm_trace_a1b0t1_a
943 SUBROUTINE cp_fm_trace_a1b0t1_p (matrix_a, matrix_b, trace)
944 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
945 TYPE(cp_fm_type), INTENT(IN) :: matrix_b
946 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
947
948 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_p'
949
950 INTEGER :: handle, imatrix, n_matrices, &
951 ncols_local, nrows_local
952 LOGICAL :: use_sp_a, use_sp_b
953 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
954 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
955 TYPE(mp_comm_type) :: group
956
957 CALL timeset(routinen, handle)
958
959 n_matrices = SIZE(trace)
960 cpassert(SIZE(matrix_a) == n_matrices)
961
962 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
963 use_sp_b = matrix_b%use_sp
964
965 IF (use_sp_b) THEN
966 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
967 ELSE
968 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
969 END IF
970
971!$OMP PARALLEL DO DEFAULT(NONE), &
972!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
973!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
974!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
975
976 DO imatrix = 1, n_matrices
977
978 use_sp_a = matrix_a(imatrix) %matrix%use_sp
979
980 ! assume that the matrices A(i) and B have identical shapes and distribution schemes
981 IF (use_sp_a .AND. use_sp_b) THEN
982 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
983 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
984 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
985 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
986 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
987 ELSE
988 cpabort("Matrices A and B are of different types")
989 END IF
990 END DO
991!$OMP END PARALLEL DO
992
993 group = matrix_b%matrix_struct%para_env
994 CALL group%sum(trace)
995
996 CALL timestop(handle)
997 END SUBROUTINE cp_fm_trace_a1b0t1_p
998
999! **************************************************************************************************
1000!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
1001!> \param matrix_a list of A matrices
1002!> \param matrix_b list of B matrices
1003!> \param trace computed traces
1004!> \param accurate ...
1005!> \par History
1006!> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
1007!> \note \parblock
1008!> Computing the trace requires collective communication between involved MPI processes
1009!> that implies a synchronisation point between them. The aim of this subroutine is to reduce
1010!> the amount of time wasted in such synchronisation by performing one large collective
1011!> operation which involves all the matrices in question.
1012!>
1013!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
1014!> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
1015!> \endparblock
1016! **************************************************************************************************
1017 SUBROUTINE cp_fm_trace_a1b1t1_aa (matrix_a, matrix_b, trace, accurate)
1018 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
1019 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_b
1020 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1021 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1022
1023 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_aa'
1024
1025 INTEGER :: handle, imatrix, n_matrices, &
1026 ncols_local, nrows_local
1027 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1028 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1029 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1030 TYPE(mp_comm_type) :: group
1031
1032 CALL timeset(routinen, handle)
1033
1034 n_matrices = SIZE(trace)
1035 cpassert(SIZE(matrix_a) == n_matrices)
1036 cpassert(SIZE(matrix_b) == n_matrices)
1037
1038 use_accurate_sum = .true.
1039 IF (PRESENT(accurate)) use_accurate_sum = accurate
1040
1041!$OMP PARALLEL DO DEFAULT(NONE), &
1042!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1043!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1044!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1045 DO imatrix = 1, n_matrices
1046 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1047
1048 use_sp_a = matrix_a(imatrix) %use_sp
1049 use_sp_b = matrix_b(imatrix) %use_sp
1050
1051 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1052 IF (use_sp_a .AND. use_sp_b) THEN
1053 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1054 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1055 IF (use_accurate_sum) THEN
1056 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1057 ELSE
1058 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1059 END IF
1060 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1061 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1062 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1063 IF (use_accurate_sum) THEN
1064 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1065 ELSE
1066 trace(imatrix) = sum(ldata_a*ldata_b)
1067 END IF
1068 ELSE
1069 cpabort("Matrices A and B are of different types")
1070 END IF
1071 END DO
1072!$OMP END PARALLEL DO
1073
1074 group = matrix_a(1) %matrix_struct%para_env
1075 CALL group%sum(trace)
1076
1077 CALL timestop(handle)
1078 END SUBROUTINE cp_fm_trace_a1b1t1_aa
1079 SUBROUTINE cp_fm_trace_a1b1t1_ap (matrix_a, matrix_b, trace, accurate)
1080 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_a
1081 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_b
1082 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1083 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1084
1085 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_ap'
1086
1087 INTEGER :: handle, imatrix, n_matrices, &
1088 ncols_local, nrows_local
1089 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1090 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1091 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1092 TYPE(mp_comm_type) :: group
1093
1094 CALL timeset(routinen, handle)
1095
1096 n_matrices = SIZE(trace)
1097 cpassert(SIZE(matrix_a) == n_matrices)
1098 cpassert(SIZE(matrix_b) == n_matrices)
1099
1100 use_accurate_sum = .true.
1101 IF (PRESENT(accurate)) use_accurate_sum = accurate
1102
1103!$OMP PARALLEL DO DEFAULT(NONE), &
1104!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1105!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1106!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1107 DO imatrix = 1, n_matrices
1108 CALL cp_fm_get_info(matrix_a(imatrix) , nrow_local=nrows_local, ncol_local=ncols_local)
1109
1110 use_sp_a = matrix_a(imatrix) %use_sp
1111 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1112
1113 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1114 IF (use_sp_a .AND. use_sp_b) THEN
1115 ldata_a_sp => matrix_a(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1116 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1117 IF (use_accurate_sum) THEN
1118 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1119 ELSE
1120 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1121 END IF
1122 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1123 ldata_a => matrix_a(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1124 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1125 IF (use_accurate_sum) THEN
1126 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1127 ELSE
1128 trace(imatrix) = sum(ldata_a*ldata_b)
1129 END IF
1130 ELSE
1131 cpabort("Matrices A and B are of different types")
1132 END IF
1133 END DO
1134!$OMP END PARALLEL DO
1135
1136 group = matrix_a(1) %matrix_struct%para_env
1137 CALL group%sum(trace)
1138
1139 CALL timestop(handle)
1140 END SUBROUTINE cp_fm_trace_a1b1t1_ap
1141 SUBROUTINE cp_fm_trace_a1b1t1_pa (matrix_a, matrix_b, trace, accurate)
1142 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
1143 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: matrix_b
1144 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1145 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1146
1147 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pa'
1148
1149 INTEGER :: handle, imatrix, n_matrices, &
1150 ncols_local, nrows_local
1151 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1152 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1153 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1154 TYPE(mp_comm_type) :: group
1155
1156 CALL timeset(routinen, handle)
1157
1158 n_matrices = SIZE(trace)
1159 cpassert(SIZE(matrix_a) == n_matrices)
1160 cpassert(SIZE(matrix_b) == n_matrices)
1161
1162 use_accurate_sum = .true.
1163 IF (PRESENT(accurate)) use_accurate_sum = accurate
1164
1165!$OMP PARALLEL DO DEFAULT(NONE), &
1166!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1167!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1168!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1169 DO imatrix = 1, n_matrices
1170 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1171
1172 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1173 use_sp_b = matrix_b(imatrix) %use_sp
1174
1175 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1176 IF (use_sp_a .AND. use_sp_b) THEN
1177 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1178 ldata_b_sp => matrix_b(imatrix) %local_data_sp(1:nrows_local, 1:ncols_local)
1179 IF (use_accurate_sum) THEN
1180 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1181 ELSE
1182 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1183 END IF
1184 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1185 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1186 ldata_b => matrix_b(imatrix) %local_data(1:nrows_local, 1:ncols_local)
1187 IF (use_accurate_sum) THEN
1188 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1189 ELSE
1190 trace(imatrix) = sum(ldata_a*ldata_b)
1191 END IF
1192 ELSE
1193 cpabort("Matrices A and B are of different types")
1194 END IF
1195 END DO
1196!$OMP END PARALLEL DO
1197
1198 group = matrix_a(1) %matrix%matrix_struct%para_env
1199 CALL group%sum(trace)
1200
1201 CALL timestop(handle)
1202 END SUBROUTINE cp_fm_trace_a1b1t1_pa
1203 SUBROUTINE cp_fm_trace_a1b1t1_pp (matrix_a, matrix_b, trace, accurate)
1204 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_a
1205 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(IN) :: matrix_b
1206 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
1207 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1208
1209 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_pp'
1210
1211 INTEGER :: handle, imatrix, n_matrices, &
1212 ncols_local, nrows_local
1213 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1214 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1215 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1216 TYPE(mp_comm_type) :: group
1217
1218 CALL timeset(routinen, handle)
1219
1220 n_matrices = SIZE(trace)
1221 cpassert(SIZE(matrix_a) == n_matrices)
1222 cpassert(SIZE(matrix_b) == n_matrices)
1223
1224 use_accurate_sum = .true.
1225 IF (PRESENT(accurate)) use_accurate_sum = accurate
1226
1227!$OMP PARALLEL DO DEFAULT(NONE), &
1228!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1229!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1230!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1231 DO imatrix = 1, n_matrices
1232 CALL cp_fm_get_info(matrix_a(imatrix) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1233
1234 use_sp_a = matrix_a(imatrix) %matrix%use_sp
1235 use_sp_b = matrix_b(imatrix) %matrix%use_sp
1236
1237 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1238 IF (use_sp_a .AND. use_sp_b) THEN
1239 ldata_a_sp => matrix_a(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1240 ldata_b_sp => matrix_b(imatrix) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1241 IF (use_accurate_sum) THEN
1242 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1243 ELSE
1244 trace(imatrix) = sum(ldata_a_sp*ldata_b_sp)
1245 END IF
1246 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1247 ldata_a => matrix_a(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1248 ldata_b => matrix_b(imatrix) %matrix%local_data(1:nrows_local, 1:ncols_local)
1249 IF (use_accurate_sum) THEN
1250 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1251 ELSE
1252 trace(imatrix) = sum(ldata_a*ldata_b)
1253 END IF
1254 ELSE
1255 cpabort("Matrices A and B are of different types")
1256 END IF
1257 END DO
1258!$OMP END PARALLEL DO
1259
1260 group = matrix_a(1) %matrix%matrix_struct%para_env
1261 CALL group%sum(trace)
1262
1263 CALL timestop(handle)
1264 END SUBROUTINE cp_fm_trace_a1b1t1_pp
1265
1266! **************************************************************************************************
1267!> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
1268!> \param matrix_a list of A matrices
1269!> \param matrix_b list of B matrices
1270!> \param trace computed traces
1271!> \param accurate ...
1272! **************************************************************************************************
1273 SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa (matrix_a, matrix_b, trace, accurate)
1274 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1275 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1276 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1277 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1278
1279 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_aa'
1280
1281 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1282 nrows_local, nz
1283 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1284 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1285 REAL(kind=dp) :: t
1286 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1287 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1288 TYPE(mp_comm_type) :: group
1289
1290 CALL timeset(routinen, handle)
1291
1292 nz = SIZE(matrix_a, 1)
1293 cpassert(SIZE(matrix_b, 1) == nz)
1294
1295 na = SIZE(matrix_a, 2)
1296 nb = SIZE(matrix_b, 2)
1297 cpassert(SIZE(trace, 1) == na)
1298 cpassert(SIZE(trace, 2) == nb)
1299
1300 use_accurate_sum = .true.
1301 IF (PRESENT(accurate)) use_accurate_sum = accurate
1302
1303 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1304 ! improve load balance between shared-memory threads
1305 ntraces = na*nb
1306 na8 = int(na, kind=int_8)
1307
1308!$OMP PARALLEL DO DEFAULT(NONE), &
1309!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1310!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1311!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1312 DO itrace = 1, ntraces
1313 ib8 = (itrace - 1)/na8
1314 ia = int(itrace - ib8*na8)
1315 ib = int(ib8) + 1
1316
1317 t = 0.0_dp
1318 DO iz = 1, nz
1319 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1320 use_sp_a = matrix_a(iz, ia) %use_sp
1321 use_sp_b = matrix_b(iz, ib) %use_sp
1322
1323 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1324 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1325 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1326 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1327 IF (use_accurate_sum) THEN
1328 t = t + accurate_dot_product(ldata_a, ldata_b)
1329 ELSE
1330 t = t + sum(ldata_a*ldata_b)
1331 END IF
1332 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1333 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1334 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1335 IF (use_accurate_sum) THEN
1336 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1337 ELSE
1338 t = t + sum(ldata_a_sp*ldata_b_sp)
1339 END IF
1340 ELSE
1341 cpabort("Matrices A and B are of different types")
1342 END IF
1343 END DO
1344 trace(ia, ib) = t
1345 END DO
1346!$OMP END PARALLEL DO
1347
1348 group = matrix_a(1, 1) %matrix_struct%para_env
1349 CALL group%sum(trace)
1350
1351 CALL timestop(handle)
1352 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_aa
1353 SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap (matrix_a, matrix_b, trace, accurate)
1354 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1355 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1356 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1357 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1358
1359 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_ap'
1360
1361 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1362 nrows_local, nz
1363 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1364 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1365 REAL(kind=dp) :: t
1366 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1367 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1368 TYPE(mp_comm_type) :: group
1369
1370 CALL timeset(routinen, handle)
1371
1372 nz = SIZE(matrix_a, 1)
1373 cpassert(SIZE(matrix_b, 1) == nz)
1374
1375 na = SIZE(matrix_a, 2)
1376 nb = SIZE(matrix_b, 2)
1377 cpassert(SIZE(trace, 1) == na)
1378 cpassert(SIZE(trace, 2) == nb)
1379
1380 use_accurate_sum = .true.
1381 IF (PRESENT(accurate)) use_accurate_sum = accurate
1382
1383 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1384 ! improve load balance between shared-memory threads
1385 ntraces = na*nb
1386 na8 = int(na, kind=int_8)
1387
1388!$OMP PARALLEL DO DEFAULT(NONE), &
1389!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1390!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1391!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1392 DO itrace = 1, ntraces
1393 ib8 = (itrace - 1)/na8
1394 ia = int(itrace - ib8*na8)
1395 ib = int(ib8) + 1
1396
1397 t = 0.0_dp
1398 DO iz = 1, nz
1399 CALL cp_fm_get_info(matrix_a(iz, ia) , nrow_local=nrows_local, ncol_local=ncols_local)
1400 use_sp_a = matrix_a(iz, ia) %use_sp
1401 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1402
1403 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1404 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1405 ldata_a => matrix_a(iz, ia) %local_data(1:nrows_local, 1:ncols_local)
1406 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1407 IF (use_accurate_sum) THEN
1408 t = t + accurate_dot_product(ldata_a, ldata_b)
1409 ELSE
1410 t = t + sum(ldata_a*ldata_b)
1411 END IF
1412 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1413 ldata_a_sp => matrix_a(iz, ia) %local_data_sp(1:nrows_local, 1:ncols_local)
1414 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1415 IF (use_accurate_sum) THEN
1416 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1417 ELSE
1418 t = t + sum(ldata_a_sp*ldata_b_sp)
1419 END IF
1420 ELSE
1421 cpabort("Matrices A and B are of different types")
1422 END IF
1423 END DO
1424 trace(ia, ib) = t
1425 END DO
1426!$OMP END PARALLEL DO
1427
1428 group = matrix_a(1, 1) %matrix_struct%para_env
1429 CALL group%sum(trace)
1430
1431 CALL timestop(handle)
1432 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_ap
1433 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa (matrix_a, matrix_b, trace, accurate)
1434 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1435 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1436 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1437 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1438
1439 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pa'
1440
1441 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1442 nrows_local, nz
1443 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1444 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1445 REAL(kind=dp) :: t
1446 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1447 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1448 TYPE(mp_comm_type) :: group
1449
1450 CALL timeset(routinen, handle)
1451
1452 nz = SIZE(matrix_a, 1)
1453 cpassert(SIZE(matrix_b, 1) == nz)
1454
1455 na = SIZE(matrix_a, 2)
1456 nb = SIZE(matrix_b, 2)
1457 cpassert(SIZE(trace, 1) == na)
1458 cpassert(SIZE(trace, 2) == nb)
1459
1460 use_accurate_sum = .true.
1461 IF (PRESENT(accurate)) use_accurate_sum = accurate
1462
1463 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1464 ! improve load balance between shared-memory threads
1465 ntraces = na*nb
1466 na8 = int(na, kind=int_8)
1467
1468!$OMP PARALLEL DO DEFAULT(NONE), &
1469!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1470!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1471!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1472 DO itrace = 1, ntraces
1473 ib8 = (itrace - 1)/na8
1474 ia = int(itrace - ib8*na8)
1475 ib = int(ib8) + 1
1476
1477 t = 0.0_dp
1478 DO iz = 1, nz
1479 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1480 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1481 use_sp_b = matrix_b(iz, ib) %use_sp
1482
1483 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1484 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1485 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1486 ldata_b => matrix_b(iz, ib) %local_data(1:nrows_local, 1:ncols_local)
1487 IF (use_accurate_sum) THEN
1488 t = t + accurate_dot_product(ldata_a, ldata_b)
1489 ELSE
1490 t = t + sum(ldata_a*ldata_b)
1491 END IF
1492 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1493 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1494 ldata_b_sp => matrix_b(iz, ib) %local_data_sp(1:nrows_local, 1:ncols_local)
1495 IF (use_accurate_sum) THEN
1496 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1497 ELSE
1498 t = t + sum(ldata_a_sp*ldata_b_sp)
1499 END IF
1500 ELSE
1501 cpabort("Matrices A and B are of different types")
1502 END IF
1503 END DO
1504 trace(ia, ib) = t
1505 END DO
1506!$OMP END PARALLEL DO
1507
1508 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1509 CALL group%sum(trace)
1510
1511 CALL timestop(handle)
1512 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pa
1513 SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp (matrix_a, matrix_b, trace, accurate)
1514 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_a
1515 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(IN) :: matrix_b
1516 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1517 LOGICAL, INTENT(IN), OPTIONAL :: accurate
1518
1519 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_pp'
1520
1521 INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1522 nrows_local, nz
1523 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1524 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1525 REAL(kind=dp) :: t
1526 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1527 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1528 TYPE(mp_comm_type) :: group
1529
1530 CALL timeset(routinen, handle)
1531
1532 nz = SIZE(matrix_a, 1)
1533 cpassert(SIZE(matrix_b, 1) == nz)
1534
1535 na = SIZE(matrix_a, 2)
1536 nb = SIZE(matrix_b, 2)
1537 cpassert(SIZE(trace, 1) == na)
1538 cpassert(SIZE(trace, 2) == nb)
1539
1540 use_accurate_sum = .true.
1541 IF (PRESENT(accurate)) use_accurate_sum = accurate
1542
1543 ! here we use one running index (itrace) instead of two (ia, ib) in order to
1544 ! improve load balance between shared-memory threads
1545 ntraces = na*nb
1546 na8 = int(na, kind=int_8)
1547
1548!$OMP PARALLEL DO DEFAULT(NONE), &
1549!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1550!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1551!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1552 DO itrace = 1, ntraces
1553 ib8 = (itrace - 1)/na8
1554 ia = int(itrace - ib8*na8)
1555 ib = int(ib8) + 1
1556
1557 t = 0.0_dp
1558 DO iz = 1, nz
1559 CALL cp_fm_get_info(matrix_a(iz, ia) %matrix, nrow_local=nrows_local, ncol_local=ncols_local)
1560 use_sp_a = matrix_a(iz, ia) %matrix%use_sp
1561 use_sp_b = matrix_b(iz, ib) %matrix%use_sp
1562
1563 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1564 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1565 ldata_a => matrix_a(iz, ia) %matrix%local_data(1:nrows_local, 1:ncols_local)
1566 ldata_b => matrix_b(iz, ib) %matrix%local_data(1:nrows_local, 1:ncols_local)
1567 IF (use_accurate_sum) THEN
1568 t = t + accurate_dot_product(ldata_a, ldata_b)
1569 ELSE
1570 t = t + sum(ldata_a*ldata_b)
1571 END IF
1572 ELSE IF (use_sp_a .AND. use_sp_b) THEN
1573 ldata_a_sp => matrix_a(iz, ia) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1574 ldata_b_sp => matrix_b(iz, ib) %matrix%local_data_sp(1:nrows_local, 1:ncols_local)
1575 IF (use_accurate_sum) THEN
1576 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1577 ELSE
1578 t = t + sum(ldata_a_sp*ldata_b_sp)
1579 END IF
1580 ELSE
1581 cpabort("Matrices A and B are of different types")
1582 END IF
1583 END DO
1584 trace(ia, ib) = t
1585 END DO
1586!$OMP END PARALLEL DO
1587
1588 group = matrix_a(1, 1) %matrix%matrix_struct%para_env
1589 CALL group%sum(trace)
1590
1591 CALL timestop(handle)
1592 END SUBROUTINE cp_fm_contracted_trace_a2b2t2_pp
1593
1594! **************************************************************************************************
1595!> \brief multiplies in place by a triangular matrix:
1596!> matrix_b = alpha op(triangular_matrix) matrix_b
1597!> or (if side='R')
1598!> matrix_b = alpha matrix_b op(triangular_matrix)
1599!> op(triangular_matrix) is:
1600!> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1601!> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1602!> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1603!> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1604!> \param triangular_matrix the triangular matrix that multiplies the other
1605!> \param matrix_b the matrix that gets multiplied and stores the result
1606!> \param side on which side of matrix_b stays op(triangular_matrix)
1607!> (defaults to 'L')
1608!> \param transpose_tr if the triangular matrix should be transposed
1609!> (defaults to false)
1610!> \param invert_tr if the triangular matrix should be inverted
1611!> (defaults to false)
1612!> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1613!> lower ('L') triangle (defaults to 'U')
1614!> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1615!> be assumed to be 1 (defaults to false)
1616!> \param n_rows the number of rows of the result (defaults to
1617!> size(matrix_b,1))
1618!> \param n_cols the number of columns of the result (defaults to
1619!> size(matrix_b,2))
1620!> \param alpha ...
1621!> \par History
1622!> 08.2002 created [fawzi]
1623!> \author Fawzi Mohamed
1624!> \note
1625!> needs an mpi env
1626! **************************************************************************************************
1627 SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1628 transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1629 alpha)
1630 TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1631 CHARACTER, INTENT(IN), OPTIONAL :: side
1632 LOGICAL, INTENT(IN), OPTIONAL :: transpose_tr, invert_tr
1633 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1634 LOGICAL, INTENT(IN), OPTIONAL :: unit_diag_tr
1635 INTEGER, INTENT(IN), OPTIONAL :: n_rows, n_cols
1636 REAL(kind=dp), INTENT(IN), OPTIONAL :: alpha
1637
1638 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_triangular_multiply'
1639
1640 CHARACTER :: side_char, transa, unit_diag, uplo
1641 INTEGER :: handle, mdim, m, n
1642 LOGICAL :: invert
1643 REAL(kind=dp) :: al
1644
1645 CALL timeset(routinen, handle)
1646 side_char = 'L'
1647 unit_diag = 'N'
1648 uplo = 'U'
1649 transa = 'N'
1650 invert = .false.
1651 al = 1.0_dp
1652 CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1653 IF (PRESENT(side)) side_char = side
1654 mdim = merge(1, 2, 'L' == side_char)
1655 IF (PRESENT(invert_tr)) invert = invert_tr
1656 IF (PRESENT(uplo_tr)) uplo = uplo_tr
1657 IF (PRESENT(unit_diag_tr)) THEN
1658 IF (unit_diag_tr) THEN
1659 unit_diag = 'U'
1660 ELSE
1661 unit_diag = 'N'
1662 END IF
1663 END IF
1664 IF (PRESENT(transpose_tr)) THEN
1665 IF (transpose_tr) THEN
1666 transa = 'T'
1667 ELSE
1668 transa = 'N'
1669 END IF
1670 END IF
1671 IF (PRESENT(alpha)) al = alpha
1672 IF (PRESENT(n_rows)) m = n_rows
1673 IF (PRESENT(n_cols)) n = n_cols
1674
1675 IF (invert) THEN
1676
1677#if defined(__parallel)
1678 CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1679 triangular_matrix%local_data(1, 1), 1, 1, &
1680 triangular_matrix%matrix_struct%descriptor, &
1681 matrix_b%local_data(1, 1), 1, 1, &
1682 matrix_b%matrix_struct%descriptor(1))
1683#else
1684 CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1685 triangular_matrix%local_data(1, 1), &
1686 SIZE(triangular_matrix%local_data, mdim), &
1687 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1688#endif
1689
1690 ELSE
1691
1692#if defined(__parallel)
1693 CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1694 triangular_matrix%local_data(1, 1), 1, 1, &
1695 triangular_matrix%matrix_struct%descriptor, &
1696 matrix_b%local_data(1, 1), 1, 1, &
1697 matrix_b%matrix_struct%descriptor(1))
1698#else
1699 CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1700 triangular_matrix%local_data(1, 1), &
1701 SIZE(triangular_matrix%local_data, mdim), &
1702 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1703#endif
1704
1705 END IF
1706
1707 CALL timestop(handle)
1708 END SUBROUTINE cp_fm_triangular_multiply
1709
1710! **************************************************************************************************
1711!> \brief scales a matrix
1712!> matrix_a = alpha * matrix_b
1713!> \param alpha ...
1714!> \param matrix_a ...
1715!> \note
1716!> use cp_fm_set_all to zero (avoids problems with nan)
1717! **************************************************************************************************
1718 SUBROUTINE cp_fm_scale(alpha, matrix_a)
1719 REAL(kind=dp), INTENT(IN) :: alpha
1720 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1721
1722 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_scale'
1723
1724 INTEGER :: handle, size_a
1725 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1726
1727 CALL timeset(routinen, handle)
1728
1729 NULLIFY (a)
1730
1731 a => matrix_a%local_data
1732 size_a = SIZE(a, 1)*SIZE(a, 2)
1733
1734 CALL dscal(size_a, alpha, a, 1)
1735
1736 CALL timestop(handle)
1737
1738 END SUBROUTINE cp_fm_scale
1739
1740! **************************************************************************************************
1741!> \brief transposes a matrix
1742!> matrixt = matrix ^ T
1743!> \param matrix ...
1744!> \param matrixt ...
1745!> \note
1746!> all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
1747! **************************************************************************************************
1748 SUBROUTINE cp_fm_transpose(matrix, matrixt)
1749 TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1750
1751 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_transpose'
1752
1753 INTEGER :: handle, ncol_global, &
1754 nrow_global, ncol_globalt, nrow_globalt
1755 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, c
1756#if defined(__parallel)
1757 INTEGER, DIMENSION(9) :: desca, descc
1758#elif !defined(__MKL)
1759 INTEGER :: i, j
1760#endif
1761
1762 nrow_global = matrix%matrix_struct%nrow_global
1763 ncol_global = matrix%matrix_struct%ncol_global
1764 nrow_globalt = matrixt%matrix_struct%nrow_global
1765 ncol_globalt = matrixt%matrix_struct%ncol_global
1766 cpassert(nrow_global == ncol_globalt)
1767 cpassert(nrow_globalt == ncol_global)
1768
1769 CALL timeset(routinen, handle)
1770
1771 a => matrix%local_data
1772 c => matrixt%local_data
1773
1774#if defined(__parallel)
1775 desca(:) = matrix%matrix_struct%descriptor(:)
1776 descc(:) = matrixt%matrix_struct%descriptor(:)
1777 CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1778#elif defined(__MKL)
1779 CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
1780#else
1781 DO j = 1, ncol_global
1782 DO i = 1, nrow_global
1783 c(j, i) = a(i, j)
1784 END DO
1785 END DO
1786#endif
1787 CALL timestop(handle)
1788
1789 END SUBROUTINE cp_fm_transpose
1790
1791! **************************************************************************************************
1792!> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
1793!> \param matrix the triangular matrix as input, the full matrix as output
1794!> \param work a matrix of the same size as matrix
1795!> \param uplo triangular format; defaults to 'U'
1796!> \author Matthias Krack
1797!> \note
1798!> the opposite triangular part is irrelevant
1799! **************************************************************************************************
1800 SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
1801
1802 TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1803 CHARACTER, INTENT(IN), OPTIONAL :: uplo
1804
1805 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_uplo_to_full'
1806
1807 CHARACTER :: myuplo
1808 INTEGER :: handle, icol_global, irow_global, &
1809 mypcol, myprow, ncol_global, &
1810 nrow_global
1811 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1812 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1813 TYPE(cp_blacs_env_type), POINTER :: context
1814
1815#if defined(__parallel)
1816 INTEGER :: icol_local, irow_local, &
1817 ncol_block, ncol_local, &
1818 nrow_block, nrow_local
1819 INTEGER, DIMENSION(9) :: desca, descc
1820 REAL(kind=dp), DIMENSION(:, :), POINTER :: c
1821 REAL(kind=sp), DIMENSION(:, :), POINTER :: c_sp
1822#endif
1823
1824 myuplo = 'U'
1825 IF (PRESENT(uplo)) myuplo = uplo
1826
1827 nrow_global = matrix%matrix_struct%nrow_global
1828 ncol_global = matrix%matrix_struct%ncol_global
1829 cpassert(nrow_global == ncol_global)
1830 nrow_global = work%matrix_struct%nrow_global
1831 ncol_global = work%matrix_struct%ncol_global
1832 cpassert(nrow_global == ncol_global)
1833 cpassert(matrix%use_sp .EQV. work%use_sp)
1834
1835 CALL timeset(routinen, handle)
1836
1837 context => matrix%matrix_struct%context
1838 myprow = context%mepos(1)
1839 mypcol = context%mepos(2)
1840
1841#if defined(__parallel)
1842
1843 nrow_block = matrix%matrix_struct%nrow_block
1844 ncol_block = matrix%matrix_struct%ncol_block
1845
1846 nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1847 ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1848
1849 a => work%local_data
1850 a_sp => work%local_data_sp
1851 desca(:) = work%matrix_struct%descriptor(:)
1852 c => matrix%local_data
1853 c_sp => matrix%local_data_sp
1854 descc(:) = matrix%matrix_struct%descriptor(:)
1855
1856 DO icol_local = 1, ncol_local
1857 icol_global = matrix%matrix_struct%col_indices(icol_local)
1858 DO irow_local = 1, nrow_local
1859 irow_global = matrix%matrix_struct%row_indices(irow_local)
1860 IF (merge(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
1861 IF (matrix%use_sp) THEN
1862 c_sp(irow_local, icol_local) = 0.0_sp
1863 ELSE
1864 c(irow_local, icol_local) = 0.0_dp
1865 END IF
1866 ELSE IF (irow_global == icol_global) THEN
1867 IF (matrix%use_sp) THEN
1868 c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1869 ELSE
1870 c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1871 END IF
1872 END IF
1873 END DO
1874 END DO
1875
1876 DO icol_local = 1, ncol_local
1877 DO irow_local = 1, nrow_local
1878 IF (matrix%use_sp) THEN
1879 a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1880 ELSE
1881 a(irow_local, icol_local) = c(irow_local, icol_local)
1882 END IF
1883 END DO
1884 END DO
1885
1886 IF (matrix%use_sp) THEN
1887 CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1888 ELSE
1889 CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1890 END IF
1891
1892#else
1893
1894 a => matrix%local_data
1895 a_sp => matrix%local_data_sp
1896
1897 IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1898 DO irow_global = 1, nrow_global
1899 DO icol_global = irow_global + 1, ncol_global
1900 IF (matrix%use_sp) THEN
1901 a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1902 ELSE
1903 a(icol_global, irow_global) = a(irow_global, icol_global)
1904 END IF
1905 END DO
1906 END DO
1907 ELSE
1908 DO icol_global = 1, ncol_global
1909 DO irow_global = icol_global + 1, nrow_global
1910 IF (matrix%use_sp) THEN
1911 a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
1912 ELSE
1913 a(irow_global, icol_global) = a(icol_global, irow_global)
1914 END IF
1915 END DO
1916 END DO
1917 END IF
1918
1919#endif
1920 CALL timestop(handle)
1921
1922 END SUBROUTINE cp_fm_uplo_to_full
1923
1924! **************************************************************************************************
1925!> \brief scales column i of matrix a with scaling(i)
1926!> \param matrixa ...
1927!> \param scaling : an array used for scaling the columns,
1928!> SIZE(scaling) determines the number of columns to be scaled
1929!> \author Joost VandeVondele
1930!> \note
1931!> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1932!> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1933!> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1934!> where every vector has a different prefactor
1935! **************************************************************************************************
1936 SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1937 TYPE(cp_fm_type), INTENT(IN) :: matrixa
1938 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: scaling
1939
1940 INTEGER :: k, mypcol, myprow, n, ncol_global, &
1941 npcol, nprow
1942 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
1943 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
1944#if defined(__parallel)
1945 INTEGER :: icol_global, icol_local, &
1946 ipcol, iprow, irow_local
1947#else
1948 INTEGER :: i
1949#endif
1950
1951 myprow = matrixa%matrix_struct%context%mepos(1)
1952 mypcol = matrixa%matrix_struct%context%mepos(2)
1953 nprow = matrixa%matrix_struct%context%num_pe(1)
1954 npcol = matrixa%matrix_struct%context%num_pe(2)
1955
1956 ncol_global = matrixa%matrix_struct%ncol_global
1957
1958 a => matrixa%local_data
1959 a_sp => matrixa%local_data_sp
1960 IF (matrixa%use_sp) THEN
1961 n = SIZE(a_sp, 1)
1962 ELSE
1963 n = SIZE(a, 1)
1964 END IF
1965 k = min(SIZE(scaling), ncol_global)
1966
1967#if defined(__parallel)
1968
1969 DO icol_global = 1, k
1970 CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1971 nprow, npcol, myprow, mypcol, &
1972 irow_local, icol_local, iprow, ipcol)
1973 IF ((ipcol == mypcol)) THEN
1974 IF (matrixa%use_sp) THEN
1975 CALL sscal(n, real(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1976 ELSE
1977 CALL dscal(n, scaling(icol_global), a(:, icol_local), 1)
1978 END IF
1979 END IF
1980 END DO
1981#else
1982 DO i = 1, k
1983 IF (matrixa%use_sp) THEN
1984 CALL sscal(n, real(scaling(i), sp), a_sp(:, i), 1)
1985 ELSE
1986 CALL dscal(n, scaling(i), a(:, i), 1)
1987 END IF
1988 END DO
1989#endif
1990 END SUBROUTINE cp_fm_column_scale
1991
1992! **************************************************************************************************
1993!> \brief scales row i of matrix a with scaling(i)
1994!> \param matrixa ...
1995!> \param scaling : an array used for scaling the columns,
1996!> \author JGH
1997!> \note
1998! **************************************************************************************************
1999 SUBROUTINE cp_fm_row_scale(matrixa, scaling)
2000 TYPE(cp_fm_type), INTENT(IN) :: matrixa
2001 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: scaling
2002
2003 INTEGER :: n, m, nrow_global, nrow_local, ncol_local
2004 INTEGER, DIMENSION(:), POINTER :: row_indices
2005 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2006 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
2007#if defined(__parallel)
2008 INTEGER :: irow_global, icol, irow
2009#else
2010 INTEGER :: j
2011#endif
2012
2013 CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
2014 nrow_local=nrow_local, ncol_local=ncol_local)
2015 cpassert(SIZE(scaling) == nrow_global)
2016
2017 a => matrixa%local_data
2018 a_sp => matrixa%local_data_sp
2019 IF (matrixa%use_sp) THEN
2020 n = SIZE(a_sp, 1)
2021 m = SIZE(a_sp, 2)
2022 ELSE
2023 n = SIZE(a, 1)
2024 m = SIZE(a, 2)
2025 END IF
2026
2027#if defined(__parallel)
2028 DO icol = 1, ncol_local
2029 IF (matrixa%use_sp) THEN
2030 DO irow = 1, nrow_local
2031 irow_global = row_indices(irow)
2032 a(irow, icol) = real(scaling(irow_global), dp)*a(irow, icol)
2033 END DO
2034 ELSE
2035 DO irow = 1, nrow_local
2036 irow_global = row_indices(irow)
2037 a(irow, icol) = scaling(irow_global)*a(irow, icol)
2038 END DO
2039 END IF
2040 END DO
2041#else
2042 IF (matrixa%use_sp) THEN
2043 DO j = 1, m
2044 a_sp(1:n, j) = real(scaling(1:n), sp)*a_sp(1:n, j)
2045 END DO
2046 ELSE
2047 DO j = 1, m
2048 a(1:n, j) = scaling(1:n)*a(1:n, j)
2049 END DO
2050 END IF
2051#endif
2052 END SUBROUTINE cp_fm_row_scale
2053
2054! **************************************************************************************************
2055!> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
2056!> \param matrix_a the matrix to invert
2057!> \param matrix_inverse the inverse of matrix_a
2058!> \param det_a the determinant of matrix_a
2059!> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
2060!> are screened
2061!> \param eigval optionally return matrix eigenvalues/singular values
2062!> \par History
2063!> note of Jan Wilhelm (12.2015)
2064!> - computation of determinant corrected
2065!> - determinant only computed if det_a is present
2066!> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
2067!> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
2068!> \author Florian Schiffmann(02.2007)
2069! **************************************************************************************************
2070 SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
2071
2072 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
2073 REAL(kind=dp), INTENT(OUT), OPTIONAL :: det_a
2074 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_svd
2075 REAL(kind=dp), DIMENSION(:), POINTER, &
2076 INTENT(INOUT), OPTIONAL :: eigval
2077
2078 INTEGER :: n
2079 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2080 REAL(kind=dp) :: determinant, my_eps_svd
2081 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2082 TYPE(cp_fm_type) :: matrix_lu
2083
2084#if defined(__parallel)
2085 TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
2086 TYPE(mp_comm_type) :: group
2087 INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
2088 INTEGER, DIMENSION(9) :: desca
2089 LOGICAL :: quenched
2090 REAL(kind=dp) :: alpha, beta
2091 REAL(kind=dp), DIMENSION(:), POINTER :: diag
2092 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: work
2093#else
2094 LOGICAL :: sign
2095 REAL(kind=dp) :: eps1
2096#endif
2097
2098 my_eps_svd = 0.0_dp
2099 IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
2100
2101 CALL cp_fm_create(matrix=matrix_lu, &
2102 matrix_struct=matrix_a%matrix_struct, &
2103 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
2104 CALL cp_fm_to_fm(matrix_a, matrix_lu)
2105
2106 a => matrix_lu%local_data
2107 n = matrix_lu%matrix_struct%nrow_global
2108 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2109 ipivot(:) = 0
2110#if defined(__parallel)
2111 IF (my_eps_svd == 0.0_dp) THEN
2112 ! Use LU decomposition
2113 lwork = 3*n
2114 liwork = 3*n
2115 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2116 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2117
2118 IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
2119
2120 ALLOCATE (diag(n))
2121 diag(:) = 0.0_dp
2122 CALL cp_fm_get_diag(matrix_lu, diag)
2123
2124 exponent_of_minus_one = 0
2125 determinant = 1.0_dp
2126 DO i = 1, n
2127 determinant = determinant*diag(i)
2128 IF (ipivot(i) /= i) THEN
2129 exponent_of_minus_one = exponent_of_minus_one + 1
2130 END IF
2131 END DO
2132 IF (PRESENT(eigval)) THEN
2133 cpassert(.NOT. ASSOCIATED(eigval))
2134 ALLOCATE (eigval(n))
2135 eigval(:) = diag
2136 END IF
2137 DEALLOCATE (diag)
2138
2139 group = matrix_lu%matrix_struct%para_env
2140 CALL group%sum(exponent_of_minus_one)
2141
2142 determinant = determinant*(-1.0_dp)**exponent_of_minus_one
2143
2144 END IF
2145
2146 alpha = 0.0_dp
2147 beta = 1.0_dp
2148 CALL cp_fm_set_all(matrix_inverse, alpha, beta)
2149 CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
2150 ELSE
2151 ! Use singular value decomposition
2152 CALL cp_fm_create(matrix=u, &
2153 matrix_struct=matrix_a%matrix_struct, &
2154 name="LEFT_SINGULAR_MATRIX")
2155 CALL cp_fm_set_all(u, alpha=0.0_dp)
2156 CALL cp_fm_create(matrix=vt, &
2157 matrix_struct=matrix_a%matrix_struct, &
2158 name="RIGHT_SINGULAR_MATRIX")
2159 CALL cp_fm_set_all(vt, alpha=0.0_dp)
2160 ALLOCATE (diag(n))
2161 diag(:) = 0.0_dp
2162 desca(:) = matrix_lu%matrix_struct%descriptor(:)
2163 ALLOCATE (work(1))
2164 ! Workspace query
2165 lwork = -1
2166 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2167 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2168 lwork = int(work(1))
2169 DEALLOCATE (work)
2170 ALLOCATE (work(lwork))
2171 ! SVD
2172 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
2173 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
2174 ! info == n+1 implies homogeneity error when the number of procs is large
2175 ! this likely isnt a problem, but maybe we should handle it separately
2176 IF (info /= 0 .AND. info /= n + 1) &
2177 cpabort("Singular value decomposition of matrix failed.")
2178 ! (Pseudo)inverse and (pseudo)determinant
2179 CALL cp_fm_create(matrix=sigma, &
2180 matrix_struct=matrix_a%matrix_struct, &
2181 name="SINGULAR_VALUE_MATRIX")
2182 CALL cp_fm_set_all(sigma, alpha=0.0_dp)
2183 determinant = 1.0_dp
2184 quenched = .false.
2185 IF (PRESENT(eigval)) THEN
2186 cpassert(.NOT. ASSOCIATED(eigval))
2187 ALLOCATE (eigval(n))
2188 eigval(:) = diag
2189 END IF
2190 DO i = 1, n
2191 IF (diag(i) < my_eps_svd) THEN
2192 diag(i) = 0.0_dp
2193 quenched = .true.
2194 ELSE
2195 determinant = determinant*diag(i)
2196 diag(i) = 1.0_dp/diag(i)
2197 END IF
2198 CALL cp_fm_set_element(sigma, i, i, diag(i))
2199 END DO
2200 DEALLOCATE (diag)
2201 IF (quenched) &
2202 CALL cp_warn(__location__, &
2203 "Linear dependencies were detected in the SVD inversion of matrix "//trim(adjustl(matrix_a%name))// &
2204 ". At least one singular value has been quenched.")
2205 ! Sigma^-1 * U^T
2206 CALL cp_fm_create(matrix=inv_sigma_ut, &
2207 matrix_struct=matrix_a%matrix_struct, &
2208 name="SINGULAR_VALUE_MATRIX")
2209 CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
2210 CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
2211 u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
2212 ! A^-1 = V * (Sigma^-1 * U^T)
2213 CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
2214 CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
2215 inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
2216 ! Clean up
2217 DEALLOCATE (work)
2218 CALL cp_fm_release(u)
2219 CALL cp_fm_release(vt)
2220 CALL cp_fm_release(sigma)
2221 CALL cp_fm_release(inv_sigma_ut)
2222 END IF
2223#else
2224 IF (my_eps_svd == 0.0_dp) THEN
2225 sign = .true.
2226 CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
2227 eval_error=eps1)
2228 CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
2229 IF (PRESENT(eigval)) &
2230 CALL cp_abort(__location__, &
2231 "NYI. Eigenvalues not available for return without SCALAPACK.")
2232 ELSE
2233 CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
2234 determinant, eigval)
2235 END IF
2236#endif
2237 CALL cp_fm_release(matrix_lu)
2238 DEALLOCATE (ipivot)
2239 IF (PRESENT(det_a)) det_a = determinant
2240 END SUBROUTINE cp_fm_invert
2241
2242! **************************************************************************************************
2243!> \brief inverts a triangular matrix
2244!> \param matrix_a ...
2245!> \param uplo_tr triangular format; defaults to 'U'
2246!> \author MI
2247! **************************************************************************************************
2248 SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
2249
2250 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2251 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
2252
2253 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_triangular_invert'
2254
2255 CHARACTER :: unit_diag, uplo
2256 INTEGER :: handle, info, ncol_global
2257 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2258#if defined(__parallel)
2259 INTEGER, DIMENSION(9) :: desca
2260#endif
2261
2262 CALL timeset(routinen, handle)
2263
2264 unit_diag = 'N'
2265 uplo = 'U'
2266 IF (PRESENT(uplo_tr)) uplo = uplo_tr
2267
2268 ncol_global = matrix_a%matrix_struct%ncol_global
2269
2270 a => matrix_a%local_data
2271
2272#if defined(__parallel)
2273 desca(:) = matrix_a%matrix_struct%descriptor(:)
2274
2275 CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
2276
2277#else
2278 CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
2279#endif
2280
2281 CALL timestop(handle)
2282 END SUBROUTINE cp_fm_triangular_invert
2283
2284! **************************************************************************************************
2285!> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
2286!> the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
2287!> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
2288!> \param matrix_a ...
2289!> \param matrix_r ...
2290!> \param nrow_fact ...
2291!> \param ncol_fact ...
2292!> \param first_row ...
2293!> \param first_col ...
2294!> \author MI
2295! **************************************************************************************************
2296 SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
2297 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
2298 INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
2299 first_row, first_col
2300 CHARACTER, INTENT(IN), OPTIONAL :: uplo
2301
2302 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_qr_factorization'
2303
2304 CHARACTER :: myuplo
2305 INTEGER :: handle, i, icol, info, irow, &
2306 j, lda, lwork, ncol, &
2307 ndim, nrow
2308 REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
2309 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
2310 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2311#if defined(__parallel)
2312 INTEGER, DIMENSION(9) :: desca
2313#endif
2314
2315 CALL timeset(routinen, handle)
2316
2317 myuplo = 'U'
2318 IF (PRESENT(uplo)) myuplo = uplo
2319
2320 ncol = matrix_a%matrix_struct%ncol_global
2321 nrow = matrix_a%matrix_struct%nrow_global
2322 lda = nrow
2323
2324 a => matrix_a%local_data
2325
2326 IF (PRESENT(nrow_fact)) nrow = nrow_fact
2327 IF (PRESENT(ncol_fact)) ncol = ncol_fact
2328 irow = 1
2329 IF (PRESENT(first_row)) irow = first_row
2330 icol = 1
2331 IF (PRESENT(first_col)) icol = first_col
2332
2333 cpassert(nrow >= ncol)
2334 ndim = SIZE(a, 2)
2335 ALLOCATE (tau(ndim))
2336
2337#if defined(__parallel)
2338
2339 desca(:) = matrix_a%matrix_struct%descriptor(:)
2340
2341 lwork = -1
2342 ALLOCATE (work(2*ndim))
2343 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2344 lwork = int(work(1))
2345 DEALLOCATE (work)
2346 ALLOCATE (work(lwork))
2347 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
2348
2349#else
2350 lwork = -1
2351 ALLOCATE (work(2*ndim))
2352 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2353 lwork = int(work(1))
2354 DEALLOCATE (work)
2355 ALLOCATE (work(lwork))
2356 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
2357
2358#endif
2359
2360 ALLOCATE (r_mat(ncol, ncol))
2361 CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
2362 IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
2363 DO i = 1, ncol
2364 DO j = i + 1, ncol
2365 r_mat(j, i) = 0.0_dp
2366 END DO
2367 END DO
2368 ELSE
2369 DO j = 1, ncol
2370 DO i = j + 1, ncol
2371 r_mat(i, j) = 0.0_dp
2372 END DO
2373 END DO
2374 END IF
2375 CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
2376
2377 DEALLOCATE (tau, work, r_mat)
2378
2379 CALL timestop(handle)
2380
2381 END SUBROUTINE cp_fm_qr_factorization
2382
2383! **************************************************************************************************
2384!> \brief computes the the solution to A*b=A_general using lu decomposition
2385!> \param matrix_a input matrix; will be overwritten
2386!> \param general_a contains the result
2387!> \author Florian Schiffmann
2388! **************************************************************************************************
2389 SUBROUTINE cp_fm_solve(matrix_a, general_a)
2390 TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
2391
2392 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_solve'
2393
2394 INTEGER :: handle, info, n, nrhs
2395 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
2396 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, a_general
2397#if defined(__parallel)
2398 INTEGER, DIMENSION(9) :: desca, descb
2399#else
2400 INTEGER :: lda, ldb
2401#endif
2402
2403 CALL timeset(routinen, handle)
2404
2405 a => matrix_a%local_data
2406 a_general => general_a%local_data
2407 n = matrix_a%matrix_struct%nrow_global
2408 nrhs = general_a%matrix_struct%ncol_global
2409 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
2410
2411#if defined(__parallel)
2412 desca(:) = matrix_a%matrix_struct%descriptor(:)
2413 descb(:) = general_a%matrix_struct%descriptor(:)
2414 CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
2415 CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
2416 1, 1, descb, info)
2417
2418#else
2419 lda = SIZE(a, 1)
2420 ldb = SIZE(a_general, 1)
2421 CALL dgetrf(n, n, a, lda, ipivot, info)
2422 CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
2423
2424#endif
2425 ! info is allowed to be zero
2426 ! this does just signal a zero diagonal element
2427 DEALLOCATE (ipivot)
2428 CALL timestop(handle)
2429 END SUBROUTINE
2430
2431! **************************************************************************************************
2432!> \brief Convenience function. Computes the matrix multiplications needed
2433!> for the multiplication of complex matrices.
2434!> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
2435!> \param transa : 'N' -> normal 'T' -> transpose
2436!> alpha,beta :: can be 0.0_dp and 1.0_dp
2437!> \param transb ...
2438!> \param m ...
2439!> \param n ...
2440!> \param k ...
2441!> \param alpha ...
2442!> \param A_re m x k matrix ( ! for transa = 'N'), real part
2443!> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
2444!> \param B_re k x n matrix ( ! for transa = 'N'), real part
2445!> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
2446!> \param beta ...
2447!> \param C_re m x n matrix, real part
2448!> \param C_im m x n matrix, imaginary part
2449!> \param a_first_col ...
2450!> \param a_first_row ...
2451!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
2452!> \param b_first_row ...
2453!> \param c_first_col ...
2454!> \param c_first_row ...
2455!> \author Samuel Andermatt
2456!> \note
2457!> C should have no overlap with A, B
2458! **************************************************************************************************
2459 SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
2460 C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
2461 c_first_row)
2462 CHARACTER(LEN=1), INTENT(IN) :: transa, transb
2463 INTEGER, INTENT(IN) :: m, n, k
2464 REAL(kind=dp), INTENT(IN) :: alpha
2465 TYPE(cp_fm_type), INTENT(IN) :: a_re, a_im, b_re, b_im
2466 REAL(kind=dp), INTENT(IN) :: beta
2467 TYPE(cp_fm_type), INTENT(IN) :: c_re, c_im
2468 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
2469 b_first_row, c_first_col, c_first_row
2470
2471 CHARACTER(len=*), PARAMETER :: routinen = 'cp_complex_fm_gemm'
2472
2473 INTEGER :: handle
2474
2475 CALL timeset(routinen, handle)
2476
2477 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_re, beta, c_re, &
2478 a_first_col=a_first_col, &
2479 a_first_row=a_first_row, &
2480 b_first_col=b_first_col, &
2481 b_first_row=b_first_row, &
2482 c_first_col=c_first_col, &
2483 c_first_row=c_first_row)
2484 CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, a_im, b_im, 1.0_dp, c_re, &
2485 a_first_col=a_first_col, &
2486 a_first_row=a_first_row, &
2487 b_first_col=b_first_col, &
2488 b_first_row=b_first_row, &
2489 c_first_col=c_first_col, &
2490 c_first_row=c_first_row)
2491 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_re, b_im, beta, c_im, &
2492 a_first_col=a_first_col, &
2493 a_first_row=a_first_row, &
2494 b_first_col=b_first_col, &
2495 b_first_row=b_first_row, &
2496 c_first_col=c_first_col, &
2497 c_first_row=c_first_row)
2498 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, a_im, b_re, 1.0_dp, c_im, &
2499 a_first_col=a_first_col, &
2500 a_first_row=a_first_row, &
2501 b_first_col=b_first_col, &
2502 b_first_row=b_first_row, &
2503 c_first_col=c_first_col, &
2504 c_first_row=c_first_row)
2505
2506 CALL timestop(handle)
2507
2508 END SUBROUTINE cp_complex_fm_gemm
2509
2510! **************************************************************************************************
2511!> \brief inverts a matrix using LU decomposition
2512!> the input matrix will be overwritten
2513!> \param matrix : input a general square non-singular matrix, outputs its inverse
2514!> \param info_out : optional, if present outputs the info from (p)zgetri
2515!> \author Lianheng Tong
2516! **************************************************************************************************
2517 SUBROUTINE cp_fm_lu_invert(matrix, info_out)
2518 TYPE(cp_fm_type), INTENT(IN) :: matrix
2519 INTEGER, INTENT(OUT), OPTIONAL :: info_out
2520
2521 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_lu_invert'
2522
2523 INTEGER :: nrows_global, handle, info, lwork
2524 INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
2525 REAL(kind=dp), DIMENSION(:, :), POINTER :: mat
2526 REAL(kind=sp), DIMENSION(:, :), POINTER :: mat_sp
2527 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2528 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2529#if defined(__parallel)
2530 INTEGER :: liwork
2531 INTEGER, DIMENSION(9) :: desca
2532 INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
2533#else
2534 INTEGER :: lda
2535#endif
2536
2537 CALL timeset(routinen, handle)
2538
2539 mat => matrix%local_data
2540 mat_sp => matrix%local_data_sp
2541 nrows_global = matrix%matrix_struct%nrow_global
2542 cpassert(nrows_global == matrix%matrix_struct%ncol_global)
2543 ALLOCATE (ipivot(nrows_global))
2544 ! do LU decomposition
2545#if defined(__parallel)
2546 desca = matrix%matrix_struct%descriptor
2547 IF (matrix%use_sp) THEN
2548 CALL psgetrf(nrows_global, nrows_global, &
2549 mat_sp, 1, 1, desca, ipivot, info)
2550 ELSE
2551 CALL pdgetrf(nrows_global, nrows_global, &
2552 mat, 1, 1, desca, ipivot, info)
2553 END IF
2554#else
2555 lda = SIZE(mat, 1)
2556 IF (matrix%use_sp) THEN
2557 CALL sgetrf(nrows_global, nrows_global, &
2558 mat_sp, lda, ipivot, info)
2559 ELSE
2560 CALL dgetrf(nrows_global, nrows_global, &
2561 mat, lda, ipivot, info)
2562 END IF
2563#endif
2564 IF (info /= 0) THEN
2565 CALL cp_abort(__location__, "LU decomposition has failed")
2566 END IF
2567 ! do inversion
2568 IF (matrix%use_sp) THEN
2569 ALLOCATE (work(1))
2570 ELSE
2571 ALLOCATE (work_sp(1))
2572 END IF
2573#if defined(__parallel)
2574 ALLOCATE (iwork(1))
2575 IF (matrix%use_sp) THEN
2576 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2577 ipivot, work_sp, -1, iwork, -1, info)
2578 lwork = int(work_sp(1))
2579 DEALLOCATE (work_sp)
2580 ALLOCATE (work_sp(lwork))
2581 ELSE
2582 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2583 ipivot, work, -1, iwork, -1, info)
2584 lwork = int(work(1))
2585 DEALLOCATE (work)
2586 ALLOCATE (work(lwork))
2587 END IF
2588 liwork = int(iwork(1))
2589 DEALLOCATE (iwork)
2590 ALLOCATE (iwork(liwork))
2591 IF (matrix%use_sp) THEN
2592 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2593 ipivot, work_sp, lwork, iwork, liwork, info)
2594 ELSE
2595 CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2596 ipivot, work, lwork, iwork, liwork, info)
2597 END IF
2598 DEALLOCATE (iwork)
2599#else
2600 IF (matrix%use_sp) THEN
2601 CALL sgetri(nrows_global, mat_sp, lda, &
2602 ipivot, work_sp, -1, info)
2603 lwork = int(work_sp(1))
2604 DEALLOCATE (work_sp)
2605 ALLOCATE (work_sp(lwork))
2606 CALL sgetri(nrows_global, mat_sp, lda, &
2607 ipivot, work_sp, lwork, info)
2608 ELSE
2609 CALL dgetri(nrows_global, mat, lda, &
2610 ipivot, work, -1, info)
2611 lwork = int(work(1))
2612 DEALLOCATE (work)
2613 ALLOCATE (work(lwork))
2614 CALL dgetri(nrows_global, mat, lda, &
2615 ipivot, work, lwork, info)
2616 END IF
2617#endif
2618 IF (matrix%use_sp) THEN
2619 DEALLOCATE (work_sp)
2620 ELSE
2621 DEALLOCATE (work)
2622 END IF
2623 DEALLOCATE (ipivot)
2624
2625 IF (PRESENT(info_out)) THEN
2626 info_out = info
2627 ELSE
2628 IF (info /= 0) &
2629 CALL cp_abort(__location__, "LU inversion has failed")
2630 END IF
2631
2632 CALL timestop(handle)
2633
2634 END SUBROUTINE cp_fm_lu_invert
2635
2636! **************************************************************************************************
2637!> \brief norm of matrix using (p)dlange
2638!> \param matrix : input a general matrix
2639!> \param mode : 'M' max abs element value,
2640!> '1' or 'O' one norm, i.e. maximum column sum
2641!> 'I' infinity norm, i.e. maximum row sum
2642!> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2643!> \return : the norm according to mode
2644!> \author Lianheng Tong
2645! **************************************************************************************************
2646 FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2647 TYPE(cp_fm_type), INTENT(IN) :: matrix
2648 CHARACTER, INTENT(IN) :: mode
2649 REAL(kind=dp) :: res
2650
2651 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_norm'
2652
2653 INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2654 REAL(kind=sp) :: res_sp
2655 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2656 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2657 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: work
2658 REAL(kind=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2659#if defined(__parallel)
2660 INTEGER, DIMENSION(9) :: desca
2661#else
2662 INTEGER :: lda
2663#endif
2664
2665 CALL timeset(routinen, handle)
2666
2667 CALL cp_fm_get_info(matrix=matrix, &
2668 nrow_global=nrows, &
2669 ncol_global=ncols, &
2670 nrow_local=nrows_local, &
2671 ncol_local=ncols_local)
2672 aa => matrix%local_data
2673 aa_sp => matrix%local_data_sp
2674
2675#if defined(__parallel)
2676 desca = matrix%matrix_struct%descriptor
2677 SELECT CASE (mode)
2678 CASE ('M', 'm')
2679 lwork = 1
2680 CASE ('1', 'O', 'o')
2681 lwork = ncols_local
2682 CASE ('I', 'i')
2683 lwork = nrows_local
2684 CASE ('F', 'f', 'E', 'e')
2685 lwork = 1
2686 CASE DEFAULT
2687 cpabort("mode input is not valid")
2688 END SELECT
2689 IF (matrix%use_sp) THEN
2690 ALLOCATE (work_sp(lwork))
2691 res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2692 DEALLOCATE (work_sp)
2693 res = real(res_sp, kind=dp)
2694 ELSE
2695 ALLOCATE (work(lwork))
2696 res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2697 DEALLOCATE (work)
2698 END IF
2699#else
2700 SELECT CASE (mode)
2701 CASE ('M', 'm')
2702 lwork = 1
2703 CASE ('1', 'O', 'o')
2704 lwork = 1
2705 CASE ('I', 'i')
2706 lwork = nrows
2707 CASE ('F', 'f', 'E', 'e')
2708 lwork = 1
2709 CASE DEFAULT
2710 cpabort("mode input is not valid")
2711 END SELECT
2712 IF (matrix%use_sp) THEN
2713 ALLOCATE (work_sp(lwork))
2714 lda = SIZE(aa_sp, 1)
2715 res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2716 DEALLOCATE (work_sp)
2717 res = real(res_sp, kind=dp)
2718 ELSE
2719 ALLOCATE (work(lwork))
2720 lda = SIZE(aa, 1)
2721 res = dlange(mode, nrows, ncols, aa, lda, work)
2722 DEALLOCATE (work)
2723 END IF
2724#endif
2725
2726 CALL timestop(handle)
2727
2728 END FUNCTION cp_fm_norm
2729
2730! **************************************************************************************************
2731!> \brief trace of a matrix using pdlatra
2732!> \param matrix : input a square matrix
2733!> \return : the trace
2734!> \author Lianheng Tong
2735! **************************************************************************************************
2736 FUNCTION cp_fm_latra(matrix) RESULT(res)
2737 TYPE(cp_fm_type), INTENT(IN) :: matrix
2738 REAL(kind=dp) :: res
2739
2740 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_latra'
2741
2742 INTEGER :: nrows, ncols, handle
2743 REAL(kind=sp) :: res_sp
2744 REAL(kind=dp), DIMENSION(:, :), POINTER :: aa
2745 REAL(kind=sp), DIMENSION(:, :), POINTER :: aa_sp
2746#if defined(__parallel)
2747 INTEGER, DIMENSION(9) :: desca
2748#else
2749 INTEGER :: ii
2750#endif
2751
2752 CALL timeset(routinen, handle)
2753
2754 nrows = matrix%matrix_struct%nrow_global
2755 ncols = matrix%matrix_struct%ncol_global
2756 cpassert(nrows == ncols)
2757 aa => matrix%local_data
2758 aa_sp => matrix%local_data_sp
2759
2760#if defined(__parallel)
2761 desca = matrix%matrix_struct%descriptor
2762 IF (matrix%use_sp) THEN
2763 res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2764 res = real(res_sp, kind=dp)
2765 ELSE
2766 res = pdlatra(nrows, aa, 1, 1, desca)
2767 END IF
2768#else
2769 IF (matrix%use_sp) THEN
2770 res_sp = 0.0_sp
2771 DO ii = 1, nrows
2772 res_sp = res_sp + aa_sp(ii, ii)
2773 END DO
2774 res = real(res_sp, kind=dp)
2775 ELSE
2776 res = 0.0_dp
2777 DO ii = 1, nrows
2778 res = res + aa(ii, ii)
2779 END DO
2780 END IF
2781#endif
2782
2783 CALL timestop(handle)
2784
2785 END FUNCTION cp_fm_latra
2786
2787! **************************************************************************************************
2788!> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2789!> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2790!> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2791!> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2792!> \param nrow ...
2793!> \param ncol ...
2794!> \param first_row ...
2795!> \param first_col ...
2796!> \author MI
2797! **************************************************************************************************
2798 SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2799
2800 TYPE(cp_fm_type), INTENT(IN) :: matrix
2801 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2802 INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2803
2804 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdgeqpf'
2805
2806 INTEGER :: handle
2807 INTEGER :: info, lwork
2808 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2809 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2810 REAL(kind=dp), DIMENSION(:), POINTER :: work
2811#if defined(__parallel)
2812 INTEGER, DIMENSION(9) :: descc
2813#else
2814 INTEGER :: lda
2815#endif
2816
2817 CALL timeset(routinen, handle)
2818
2819 a => matrix%local_data
2820 lwork = -1
2821 ALLOCATE (work(2*nrow))
2822 ALLOCATE (ipiv(ncol))
2823 info = 0
2824
2825#if defined(__parallel)
2826 descc(:) = matrix%matrix_struct%descriptor(:)
2827 ! Call SCALAPACK routine to get optimal work dimension
2828 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2829 lwork = int(work(1))
2830 DEALLOCATE (work)
2831 ALLOCATE (work(lwork))
2832 tau = 0.0_dp
2833 ipiv = 0
2834
2835 ! Call SCALAPACK routine to get QR decomposition of CTs
2836 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2837#else
2838 cpassert(first_row == 1 .AND. first_col == 1)
2839 lda = SIZE(a, 1)
2840 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2841 lwork = int(work(1))
2842 DEALLOCATE (work)
2843 ALLOCATE (work(lwork))
2844 tau = 0.0_dp
2845 ipiv = 0
2846 CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2847#endif
2848 cpassert(info == 0)
2849
2850 DEALLOCATE (work)
2851 DEALLOCATE (ipiv)
2852
2853 CALL timestop(handle)
2854
2855 END SUBROUTINE cp_fm_pdgeqpf
2856
2857! **************************************************************************************************
2858!> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2859!> with orthonormal columns, which is defined as the first N columns of a product of K
2860!> elementary reflectors of order M
2861!> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2862!> as returned from PDGEQRF
2863!> On exit it contains the M-by-N distributed matrix Q
2864!> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2865!> \param nrow ...
2866!> \param first_row ...
2867!> \param first_col ...
2868!> \author MI
2869! **************************************************************************************************
2870 SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2871
2872 TYPE(cp_fm_type), INTENT(IN) :: matrix
2873 REAL(kind=dp), DIMENSION(:), POINTER :: tau
2874 INTEGER, INTENT(IN) :: nrow, first_row, first_col
2875
2876 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_pdorgqr'
2877
2878 INTEGER :: handle
2879 INTEGER :: info, lwork
2880 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2881 REAL(kind=dp), DIMENSION(:), POINTER :: work
2882#if defined(__parallel)
2883 INTEGER, DIMENSION(9) :: descc
2884#else
2885 INTEGER :: lda
2886#endif
2887
2888 CALL timeset(routinen, handle)
2889
2890 a => matrix%local_data
2891 lwork = -1
2892 ALLOCATE (work(2*nrow))
2893 info = 0
2894
2895#if defined(__parallel)
2896 descc(:) = matrix%matrix_struct%descriptor(:)
2897
2898 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2899 cpassert(info == 0)
2900 lwork = int(work(1))
2901 DEALLOCATE (work)
2902 ALLOCATE (work(lwork))
2903
2904 ! Call SCALAPACK routine to get Q
2905 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2906#else
2907 cpassert(first_row == 1 .AND. first_col == 1)
2908 lda = SIZE(a, 1)
2909 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2910 lwork = int(work(1))
2911 DEALLOCATE (work)
2912 ALLOCATE (work(lwork))
2913 CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2914#endif
2915 cpassert(info == 0)
2916
2917 DEALLOCATE (work)
2918 CALL timestop(handle)
2919
2920 END SUBROUTINE cp_fm_pdorgqr
2921
2922! **************************************************************************************************
2923!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2924!> \param matrix ...
2925!> \param irow ...
2926!> \param jrow ...
2927!> \param cs cosine of the rotation angle
2928!> \param sn sinus of the rotation angle
2929!> \author Ole Schuett
2930! **************************************************************************************************
2931 SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2932 TYPE(cp_fm_type), INTENT(IN) :: matrix
2933 INTEGER, INTENT(IN) :: irow, jrow
2934 REAL(dp), INTENT(IN) :: cs, sn
2935
2936 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_rows'
2937 INTEGER :: handle, ncol
2938
2939#if defined(__parallel)
2940 INTEGER :: info, lwork
2941 INTEGER, DIMENSION(9) :: desc
2942 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2943#endif
2944 CALL timeset(routinen, handle)
2945 CALL cp_fm_get_info(matrix, ncol_global=ncol)
2946#if defined(__parallel)
2947 IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2948 lwork = 2*ncol + 1
2949 ALLOCATE (work(lwork))
2950 desc(:) = matrix%matrix_struct%descriptor(:)
2951 CALL pdrot(ncol, &
2952 matrix%local_data(1, 1), irow, 1, desc, ncol, &
2953 matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2954 cs, sn, work, lwork, info)
2955 cpassert(info == 0)
2956 DEALLOCATE (work)
2957 ELSE
2958#endif
2959 CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2960#if defined(__parallel)
2961 END IF
2962#endif
2963 CALL timestop(handle)
2964 END SUBROUTINE cp_fm_rot_rows
2965
2966! **************************************************************************************************
2967!> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2968!> \param matrix ...
2969!> \param icol ...
2970!> \param jcol ...
2971!> \param cs cosine of the rotation angle
2972!> \param sn sinus of the rotation angle
2973!> \author Ole Schuett
2974! **************************************************************************************************
2975 SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2976 TYPE(cp_fm_type), INTENT(IN) :: matrix
2977 INTEGER, INTENT(IN) :: icol, jcol
2978 REAL(dp), INTENT(IN) :: cs, sn
2979
2980 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_rot_cols'
2981 INTEGER :: handle, nrow
2982
2983#if defined(__parallel)
2984 INTEGER :: info, lwork
2985 INTEGER, DIMENSION(9) :: desc
2986 REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2987#endif
2988 CALL timeset(routinen, handle)
2989 CALL cp_fm_get_info(matrix, nrow_global=nrow)
2990#if defined(__parallel)
2991 IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2992 lwork = 2*nrow + 1
2993 ALLOCATE (work(lwork))
2994 desc(:) = matrix%matrix_struct%descriptor(:)
2995 CALL pdrot(nrow, &
2996 matrix%local_data(1, 1), 1, icol, desc, 1, &
2997 matrix%local_data(1, 1), 1, jcol, desc, 1, &
2998 cs, sn, work, lwork, info)
2999 cpassert(info == 0)
3000 DEALLOCATE (work)
3001 ELSE
3002#endif
3003 CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
3004#if defined(__parallel)
3005 END IF
3006#endif
3007 CALL timestop(handle)
3008 END SUBROUTINE cp_fm_rot_cols
3009
3010! **************************************************************************************************
3011!> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
3012!> \param matrix_a ...
3013!> \param B ...
3014!> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
3015!> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
3016!> \param start_row starting index of rows, optional, defaults to 1
3017!> \param start_col starting index of columns, optional, defaults to 1
3018!> \param do_norm ...
3019!> \param do_print ...
3020! **************************************************************************************************
3021 SUBROUTINE cp_fm_gram_schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
3022 do_norm, do_print)
3023
3024 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
3025 REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: b
3026 INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
3027 LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
3028
3029 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_Gram_Schmidt_orthonorm'
3030
3031 INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
3032 j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
3033 start_col_local, start_row_global, start_row_local, this_col, unit_nr
3034 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
3035 LOGICAL :: my_do_norm, my_do_print
3036 REAL(kind=dp) :: norm
3037 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3038
3039 CALL timeset(routinen, handle)
3040
3041 my_do_norm = .true.
3042 IF (PRESENT(do_norm)) my_do_norm = do_norm
3043
3044 my_do_print = .false.
3045 IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
3046
3047 unit_nr = -1
3048 IF (my_do_print) THEN
3050 IF (unit_nr < 1) my_do_print = .false.
3051 END IF
3052
3053 IF (SIZE(b) /= 0) THEN
3054 IF (PRESENT(nrows)) THEN
3055 nrow_global = nrows
3056 ELSE
3057 nrow_global = SIZE(b, 1)
3058 END IF
3059
3060 IF (PRESENT(ncols)) THEN
3061 ncol_global = ncols
3062 ELSE
3063 ncol_global = SIZE(b, 2)
3064 END IF
3065
3066 IF (PRESENT(start_row)) THEN
3067 start_row_global = start_row
3068 ELSE
3069 start_row_global = 1
3070 END IF
3071
3072 IF (PRESENT(start_col)) THEN
3073 start_col_global = start_col
3074 ELSE
3075 start_col_global = 1
3076 END IF
3077
3078 end_row_global = start_row_global + nrow_global - 1
3079 end_col_global = start_col_global + ncol_global - 1
3080
3081 CALL cp_fm_get_info(matrix=matrix_a, &
3082 nrow_global=nrow_global, ncol_global=ncol_global, &
3083 nrow_local=nrow_local, ncol_local=ncol_local, &
3084 row_indices=row_indices, col_indices=col_indices)
3085 IF (end_row_global > nrow_global) THEN
3086 end_row_global = nrow_global
3087 END IF
3088 IF (end_col_global > ncol_global) THEN
3089 end_col_global = ncol_global
3090 END IF
3091
3092 ! find out row/column indices of locally stored matrix elements that
3093 ! needs to be copied.
3094 ! Arrays row_indices and col_indices are assumed to be sorted in
3095 ! ascending order
3096 DO start_row_local = 1, nrow_local
3097 IF (row_indices(start_row_local) >= start_row_global) EXIT
3098 END DO
3099
3100 DO end_row_local = start_row_local, nrow_local
3101 IF (row_indices(end_row_local) > end_row_global) EXIT
3102 END DO
3103 end_row_local = end_row_local - 1
3104
3105 DO start_col_local = 1, ncol_local
3106 IF (col_indices(start_col_local) >= start_col_global) EXIT
3107 END DO
3108
3109 DO end_col_local = start_col_local, ncol_local
3110 IF (col_indices(end_col_local) > end_col_global) EXIT
3111 END DO
3112 end_col_local = end_col_local - 1
3113
3114 a => matrix_a%local_data
3115
3116 this_col = col_indices(start_col_local) - start_col_global + 1
3117
3118 b(:, this_col) = a(:, start_col_local)
3119
3120 IF (my_do_norm) THEN
3121 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3122 b(:, this_col) = b(:, this_col)/norm
3123 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3124 END IF
3125
3126 DO i = start_col_local + 1, end_col_local
3127 this_col = col_indices(i) - start_col_global + 1
3128 b(:, this_col) = a(:, i)
3129 DO j = start_col_local, i - 1
3130 j_col = col_indices(j) - start_col_global + 1
3131 b(:, this_col) = b(:, this_col) - &
3132 accurate_dot_product(b(:, j_col), b(:, this_col))* &
3133 b(:, j_col)/accurate_dot_product(b(:, j_col), b(:, j_col))
3134 END DO
3135
3136 IF (my_do_norm) THEN
3137 norm = sqrt(accurate_dot_product(b(:, this_col), b(:, this_col)))
3138 b(:, this_col) = b(:, this_col)/norm
3139 IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
3140 END IF
3141
3142 END DO
3143 CALL matrix_a%matrix_struct%para_env%sum(b)
3144 END IF
3145
3146 CALL timestop(handle)
3147
3148 END SUBROUTINE cp_fm_gram_schmidt_orthonorm
3149
3150! **************************************************************************************************
3151!> \brief Cholesky decomposition
3152!> \param fm_matrix ...
3153!> \param n ...
3154!> \param uplo triangular format; defaults to 'U'
3155! **************************************************************************************************
3156 SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
3157 TYPE(cp_fm_type) :: fm_matrix
3158 INTEGER, INTENT(IN) :: n
3159 CHARACTER, INTENT(IN), OPTIONAL :: uplo
3160
3161 CHARACTER :: myuplo
3162 INTEGER :: info
3163 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3164 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3165#if defined(__parallel)
3166 INTEGER, DIMENSION(9) :: desca
3167#endif
3168
3169 myuplo = 'U'
3170 IF (PRESENT(uplo)) myuplo = uplo
3171
3172 a => fm_matrix%local_data
3173 a_sp => fm_matrix%local_data_sp
3174#if defined(__parallel)
3175 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3176 IF (fm_matrix%use_sp) THEN
3177 CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
3178 ELSE
3179 CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
3180 END IF
3181#else
3182 IF (fm_matrix%use_sp) THEN
3183 CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
3184 ELSE
3185 CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
3186 END IF
3187#endif
3188 IF (info /= 0) &
3189 cpabort("Cholesky decomposition failed. Matrix ill-conditioned?")
3190
3191 END SUBROUTINE cp_fm_potrf
3192
3193! **************************************************************************************************
3194!> \brief Invert trianguar matrix
3195!> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
3196!> \param n size of the matrix to invert
3197!> \param uplo triangular format; defaults to 'U'
3198! **************************************************************************************************
3199 SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
3200 TYPE(cp_fm_type) :: fm_matrix
3201 INTEGER, INTENT(IN) :: n
3202 CHARACTER, INTENT(IN), OPTIONAL :: uplo
3203
3204 CHARACTER :: myuplo
3205 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
3206 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
3207 INTEGER :: info
3208#if defined(__parallel)
3209 INTEGER, DIMENSION(9) :: desca
3210#endif
3211
3212 myuplo = 'U'
3213 IF (PRESENT(uplo)) myuplo = uplo
3214
3215 a => fm_matrix%local_data
3216 a_sp => fm_matrix%local_data_sp
3217#if defined(__parallel)
3218 desca(:) = fm_matrix%matrix_struct%descriptor(:)
3219 IF (fm_matrix%use_sp) THEN
3220 CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
3221 ELSE
3222 CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
3223 END IF
3224#else
3225 IF (fm_matrix%use_sp) THEN
3226 CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
3227 ELSE
3228 CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
3229 END IF
3230#endif
3231 cpassert(info == 0)
3232 END SUBROUTINE cp_fm_potri
3233
3234! **************************************************************************************************
3235!> \brief Calculates
3236!> yv = alpha*amat*xv + beta*yv
3237!> where amat: fm matrix
3238!> xv : vector replicated
3239!> yv : vector replicated
3240!> Defaults: alpha = 1, beta = 0
3241! **************************************************************************************************
3242 SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
3243 TYPE(cp_fm_type), INTENT(IN) :: amat
3244 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: xv
3245 REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: yv
3246 REAL(kind=dp), OPTIONAL, INTENT(IN) :: alpha, beta
3247
3248 INTEGER :: na, nc, nx, ny
3249 REAL(kind=dp) :: aval, bval
3250#if defined(__parallel)
3251 INTEGER :: nrl, ncl, ic, ir
3252 INTEGER, DIMENSION(:), POINTER :: rind, cind
3253 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: xvl, yvl, yvm
3254#endif
3255
3256 IF (amat%use_sp) THEN
3257 cpabort("cp_fm_matvec: SP option not available")
3258 END IF
3259 aval = 1.0_dp
3260 IF (PRESENT(alpha)) aval = alpha
3261 bval = 0.0_dp
3262 IF (PRESENT(beta)) bval = beta
3263
3264 CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
3265 nx = SIZE(xv)
3266 ny = SIZE(yv)
3267 IF ((nx /= ny) .OR. (nc /= nx)) THEN
3268 cpabort("cp_fm_matvec: incompatible dimensions")
3269 END IF
3270#if defined(__parallel)
3271 CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
3272 row_indices=rind, col_indices=cind)
3273 ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
3274 DO ic = 1, ncl
3275 xvl(ic) = xv(cind(ic))
3276 END DO
3277 yvl(1:nrl) = matmul(amat%local_data, xvl(1:ncl))
3278 yvm = 0.0_dp
3279 DO ir = 1, nrl
3280 yvm(rind(ir)) = yvl(ir)
3281 END DO
3282 CALL amat%matrix_struct%para_env%sum(yvm)
3283 IF (bval == 0.0_dp) THEN
3284 yv = aval*yvm
3285 ELSE
3286 yv = bval*yv + aval*yvm
3287 END IF
3288#else
3289 IF (bval == 0.0_dp) THEN
3290 yv = aval*matmul(amat%local_data, xv)
3291 ELSE
3292 yv = bval*yv + aval*matmul(amat%local_data, xv)
3293 END IF
3294#endif
3295
3296 END SUBROUTINE cp_fm_matvec
3297
3298END MODULE cp_fm_basic_linalg
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
methods related to the blacs parallel environment
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_fm_row_scale(matrixa, scaling)
scales row i of matrix a with scaling(i)
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
Add (and scale) a subset of columns of a fm to a fm b = alpha*a + b.
subroutine, public cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_fm_solve(matrix_a, general_a)
computes the the solution to A*b=A_general using lu decomposition
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_det(matrix_a, det_a)
Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix.
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
performs a QR factorization of the input rectangular matrix A or of a submatrix of A the computed tri...
subroutine, public cp_fm_gram_schmidt_orthonorm(matrix_a, b, nrows, ncols, start_row, start_col, do_norm, do_print)
Orthonormalizes selected rows and columns of a full matrix, matrix_a.
subroutine, public cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
performs a rank-k update of a symmetric matrix_c matrix_c = beta * matrix_c + alpha * matrix_a * tran...
subroutine, public cp_fm_potrf(fm_matrix, n, uplo)
Cholesky decomposition.
subroutine, public cp_fm_potri(fm_matrix, n, uplo)
Invert trianguar matrix.
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
computes the schur product of two matrices c_ij = a_ij * b_ij
real(kind=dp) function, public cp_fm_norm(matrix, mode)
norm of matrix using (p)dlange
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
subroutine, public cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix.
subroutine, public cp_complex_fm_gemm(transa, transb, m, n, k, alpha, a_re, a_im, b_re, b_im, beta, c_re, c_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
Convenience function. Computes the matrix multiplications needed for the multiplication of complex ma...
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_invert(matrix_a, uplo_tr)
inverts a triangular matrix
subroutine, public cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b computes matrix_c = beta * matrix_c...
subroutine, public cp_fm_matvec(amat, xv, yv, alpha, beta)
Calculates yv = alpha*amat*xv + beta*yv where amat: fm matrix xv : vector replicated yv : vector repl...
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
represent the structure of a full matrix
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:456
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
subroutine, public get_pseudo_inverse_svd(a, a_pinverse, rskip, determinant, sval)
returns the pseudoinverse of a real, square matrix using singular value decomposition
Definition mathlib.F:945
subroutine, public diag(n, a, d, v)
Diagonalize matrix a. The eigenvalues are returned in vector d and the eigenvectors are returned in m...
Definition mathlib.F:1503
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
just to build arrays of pointers to matrices
represent a full matrix