(git:d04fa31)
Loading...
Searching...
No Matches
cp_fm_diag.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief used for collecting some of the diagonalization schemes available for
10!> cp_fm_type. cp_fm_power also moved here as it is very related
11!> \note
12!> first version : most routines imported
13!> \par History
14!> - unused Jacobi routines removed, cosmetics (05.04.06,MK)
15!> \author Joost VandeVondele (2003-08)
16! **************************************************************************************************
21 cp_fm_gemm, &
23 cp_fm_syrk, &
30 USE cp_fm_elpa, ONLY: cp_fm_diag_elpa, &
36#if defined(__DLAF)
39#endif
40 USE cp_fm_types, ONLY: cp_fm_get_info, &
43 cp_fm_type, &
64 USE kinds, ONLY: default_string_length, &
65 dp
66 USE machine, ONLY: default_output_unit, &
68#if defined (__parallel)
70#endif
71#if defined (__HAS_IEEE_EXCEPTIONS)
72 USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
73 ieee_set_halting_mode, &
74 ieee_all
75#endif
76#include "../base/base_uses.f90"
77
78 IMPLICIT NONE
79 PRIVATE
80
81 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag'
82
83 REAL(kind=dp), PARAMETER, PUBLIC :: eps_check_diag_default = 5.0e-14_dp
84
85 ! The following saved variables are diagonalization global
86 ! Stores the default library for diagonalization
87 INTEGER, SAVE, PUBLIC :: diag_type = 0
88 ! Minimum number of eigenvectors for the use of the ELPA eigensolver.
89 ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
90 INTEGER, SAVE :: elpa_neigvec_min = 0
91#if defined(__DLAF)
92 ! Minimum number of eigenvectors for the use of the DLAF eigensolver.
93 ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
94 INTEGER, SAVE, PUBLIC :: dlaf_neigvec_min = 0
95#endif
96 LOGICAL, SAVE, PUBLIC :: cusolver_generalized = .true.
97 ! Threshold value for the orthonormality check of the eigenvectors obtained
98 ! after a diagonalization. A negative value disables the check.
99 REAL(kind=dp), SAVE :: eps_check_diag = -1.0_dp
100
101 ! Constants for the diag_type above
102 INTEGER, PARAMETER, PUBLIC :: fm_diag_type_scalapack = 101, &
103 fm_diag_type_elpa = 102, &
104 fm_diag_type_cusolver = 103, &
106#if defined(__CUSOLVERMP)
107 INTEGER, PARAMETER, PUBLIC :: fm_diag_type_default = fm_diag_type_cusolver
108#elif defined(__ELPA)
109 INTEGER, PARAMETER, PUBLIC :: fm_diag_type_default = fm_diag_type_elpa
110#else
111 INTEGER, PARAMETER, PUBLIC :: fm_diag_type_default = fm_diag_type_scalapack
112#endif
113
114 ! Public subroutines
115 PUBLIC :: choose_eigv_solver, &
117 cp_fm_power, &
118 cp_fm_syevd, &
119 cp_fm_syevx, &
120 cp_fm_svd, &
121 cp_fm_geeig, &
123 diag_init, &
125
126CONTAINS
127
128! **************************************************************************************************
129!> \brief Setup the diagonalization library to be used
130!> \param diag_lib diag_library flag from GLOBAL section in input
131!> \param fallback_applied .TRUE. if support for the requested library was not compiled-in and fallback
132!> to ScaLAPACK was applied, .FALSE. otherwise.
133!> \param elpa_kernel integer that determines which ELPA kernel to use for diagonalization
134!> \param elpa_neigvec_min_input ...
135!> \param elpa_qr logical that determines if ELPA should try to use QR to accelerate the
136!> diagonalization procedure of suitably sized matrices
137!> \param elpa_print logical that determines if information about the ELPA diagonalization should
138!> be printed
139!> \param elpa_one_stage logical that enables the one-stage solver
140!> \param dlaf_neigvec_min_input ...
141!> \param eps_check_diag_input ...
142!> \param cusolver_generalized_input ...
143!> \par History
144!> - Add support for DLA-Future (05.09.2023, RMeli)
145!> \author MI 11.2013
146! **************************************************************************************************
147 SUBROUTINE diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, &
148 elpa_print, elpa_one_stage, dlaf_neigvec_min_input, eps_check_diag_input, &
149 cusolver_generalized_input)
150 CHARACTER(LEN=*), INTENT(IN) :: diag_lib
151 LOGICAL, INTENT(OUT) :: fallback_applied
152 INTEGER, INTENT(IN) :: elpa_kernel, elpa_neigvec_min_input
153 LOGICAL, INTENT(IN) :: elpa_qr, elpa_print, elpa_one_stage
154 INTEGER, INTENT(IN) :: dlaf_neigvec_min_input
155 REAL(kind=dp), INTENT(IN) :: eps_check_diag_input
156 LOGICAL, INTENT(IN), OPTIONAL :: cusolver_generalized_input
157
158 LOGICAL, SAVE :: initialized = .false.
159
160 fallback_applied = .false.
161
162 IF (diag_lib == "ScaLAPACK") THEN
164 ELSE IF (diag_lib == "ELPA") THEN
165#if defined (__ELPA)
166 ! ELPA is requested and available
168#else
169 ! ELPA library requested but not linked, switch back to SL
171 fallback_applied = .true.
172#endif
173 ELSE IF (diag_lib == "cuSOLVER") THEN
175 ELSE IF (diag_lib == "DLAF") THEN
176#if defined (__DLAF)
178#else
179 cpabort("ERROR in diag_init: CP2K was not compiled with DLA-Future support")
180#endif
181 ELSE
182 cpabort("ERROR in diag_init: Initialization of unknown diagonalization library requested")
183 END IF
184
185 ! Initialization of requested diagonalization library
186 IF (.NOT. initialized .AND. diag_type == fm_diag_type_elpa) THEN
187 CALL initialize_elpa_library(one_stage=elpa_one_stage, qr=elpa_qr, should_print=elpa_print)
188 CALL set_elpa_kernel(elpa_kernel)
189 initialized = .true.
190 END IF
191#if defined(__DLAF)
192 IF (.NOT. initialized .AND. diag_type == fm_diag_type_dlaf) THEN
193 CALL cp_dlaf_initialize()
194 initialized = .true.
195 END IF
196 dlaf_neigvec_min = dlaf_neigvec_min_input
197#else
198 mark_used(dlaf_neigvec_min_input)
199#endif
200
201 elpa_neigvec_min = elpa_neigvec_min_input
202 eps_check_diag = eps_check_diag_input
203 IF (PRESENT(cusolver_generalized_input)) THEN
204 cusolver_generalized = cusolver_generalized_input
205 ELSE
206 cusolver_generalized = .true.
207 END IF
208
209 END SUBROUTINE diag_init
210
211! **************************************************************************************************
212!> \brief Finalize the diagonalization library
213! **************************************************************************************************
214 SUBROUTINE diag_finalize()
215#if defined (__ELPA)
218#endif
219#if defined (__DLAF)
221 CALL cp_dlaf_finalize()
222#endif
223 END SUBROUTINE diag_finalize
224
225! **************************************************************************************************
226!> \brief Choose the Eigensolver depending on which library is available
227!> ELPA seems to be unstable for small systems
228!> \param matrix ...
229!> \param eigenvectors ...
230!> \param eigenvalues ...
231!> \param info ...
232!> \par info If present returns error code and prevents program stops.
233!> Works currently only for cp_fm_syevd with ScaLAPACK.
234!> Other solvers will end the program regardless of PRESENT(info).
235!> \par History
236!> - Do not use ELPA for small matrices and use instead ScaLAPACK as fallback (10.05.2021, MK)
237! **************************************************************************************************
238 SUBROUTINE choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
239
240 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
241 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
242 INTEGER, INTENT(OUT), OPTIONAL :: info
243
244 CHARACTER(LEN=*), PARAMETER :: routinen = 'choose_eigv_solver'
245
246 ! Sample peak memory
247 CALL m_memory()
248
249 IF (PRESENT(info)) info = 0 ! Default for solvers that do not return an info.
250
252 CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
253
254 ELSE IF (diag_type == fm_diag_type_elpa) THEN
255 IF (matrix%matrix_struct%nrow_global < elpa_neigvec_min) THEN
256 ! We don't trust ELPA with very small matrices.
257 CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
258 ELSE
259 CALL cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
260 END IF
261
262 ELSE IF (diag_type == fm_diag_type_cusolver) THEN
263 IF (matrix%matrix_struct%nrow_global < 64) THEN
264 ! We don't trust cuSolver with very small matrices.
265 CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
266 ELSE
267 CALL cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
268 END IF
269
270#if defined(__DLAF)
271 ELSE IF (diag_type == fm_diag_type_dlaf) THEN
272 IF (matrix%matrix_struct%nrow_global < dlaf_neigvec_min) THEN
273 ! Use ScaLAPACK for small matrices
274 CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
275 ELSE
276 CALL cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
277 END IF
278#endif
279
280 ELSE
281 cpabort("ERROR in "//routinen//": Invalid diagonalization type requested")
282 END IF
283
284 CALL check_diag(matrix, eigenvectors, nvec=SIZE(eigenvalues))
285
286 END SUBROUTINE choose_eigv_solver
287
288! **************************************************************************************************
289!> \brief Check result of diagonalization, i.e. the orthonormality of the eigenvectors
290!> \param matrix Work matrix
291!> \param eigenvectors Eigenvectors to be checked
292!> \param nvec ...
293! **************************************************************************************************
294 SUBROUTINE check_diag(matrix, eigenvectors, nvec)
295
296 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
297 INTEGER, INTENT(IN) :: nvec
298
299 CHARACTER(LEN=*), PARAMETER :: routinen = 'check_diag'
300
301 CHARACTER(LEN=default_string_length) :: diag_type_name
302 REAL(kind=dp) :: eps, eps_abort, eps_warning, gold, test
303 INTEGER :: handle, i, j, ncol, nrow, output_unit
304 LOGICAL :: check_eigenvectors
305#if defined(__parallel)
306 TYPE(cp_blacs_env_type), POINTER :: context
307 INTEGER :: il, jl, ipcol, iprow, &
308 mypcol, myprow, npcol, nprow
309 INTEGER, DIMENSION(9) :: desca
310#endif
311
312 CALL timeset(routinen, handle)
313
314 output_unit = default_output_unit
315 eps_warning = eps_check_diag_default
316#if defined(__CHECK_DIAG)
317 check_eigenvectors = .true.
318 IF (eps_check_diag >= 0.0_dp) THEN
319 eps_warning = eps_check_diag
320 END IF
321#else
322 IF (eps_check_diag >= 0.0_dp) THEN
323 check_eigenvectors = .true.
324 eps_warning = eps_check_diag
325 ELSE
326 check_eigenvectors = .false.
327 END IF
328#endif
329 eps_abort = 10.0_dp*eps_warning
330
331 gold = 0.0_dp
332 test = 0.0_dp
333 eps = 0.0_dp
334
335 IF (check_eigenvectors) THEN
336#if defined(__parallel)
337 nrow = eigenvectors%matrix_struct%nrow_global
338 ncol = min(eigenvectors%matrix_struct%ncol_global, nvec)
339 CALL cp_fm_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, eigenvectors, 0.0_dp, matrix)
340 context => matrix%matrix_struct%context
341 myprow = context%mepos(1)
342 mypcol = context%mepos(2)
343 nprow = context%num_pe(1)
344 npcol = context%num_pe(2)
345 desca(:) = matrix%matrix_struct%descriptor(:)
346 outer: DO j = 1, ncol
347 DO i = 1, ncol
348 CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
349 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
350 gold = merge(0.0_dp, 1.0_dp, i /= j)
351 test = matrix%local_data(il, jl)
352 eps = abs(test - gold)
353 IF (eps > eps_warning) EXIT outer
354 END IF
355 END DO
356 END DO outer
357#else
358 nrow = SIZE(eigenvectors%local_data, 1)
359 ncol = min(SIZE(eigenvectors%local_data, 2), nvec)
360 CALL dgemm("T", "N", ncol, ncol, nrow, 1.0_dp, &
361 eigenvectors%local_data(1, 1), nrow, &
362 eigenvectors%local_data(1, 1), nrow, &
363 0.0_dp, matrix%local_data(1, 1), nrow)
364 outer: DO j = 1, ncol
365 DO i = 1, ncol
366 gold = merge(0.0_dp, 1.0_dp, i /= j)
367 test = matrix%local_data(i, j)
368 eps = abs(test - gold)
369 IF (eps > eps_warning) EXIT outer
370 END DO
371 END DO outer
372#endif
373 IF (eps > eps_warning) THEN
375 diag_type_name = "SYEVD"
376 ELSE IF (diag_type == fm_diag_type_elpa) THEN
377 diag_type_name = "ELPA"
378 ELSE IF (diag_type == fm_diag_type_cusolver) THEN
379 diag_type_name = "CUSOLVER"
380 ELSE IF (diag_type == fm_diag_type_dlaf) THEN
381 diag_type_name = "DLAF"
382 ELSE
383 cpabort("Unknown diag_type")
384 END IF
385 WRITE (unit=output_unit, fmt="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,F0.0,A,ES10.3)") &
386 "The eigenvectors returned by "//trim(diag_type_name)//" are not orthonormal", &
387 "Matrix element (", i, ", ", j, ") = ", test, &
388 "The deviation from the expected value ", gold, " is", eps
389 IF (eps > eps_abort) THEN
390 cpabort("ERROR in "//routinen//": Check of matrix diagonalization failed")
391 ELSE
392 cpwarn("Check of matrix diagonalization failed in routine "//routinen)
393 END IF
394 END IF
395 END IF
396
397 CALL timestop(handle)
398
399 END SUBROUTINE check_diag
400
401! **************************************************************************************************
402!> \brief Issues an error messages and exits (optionally only warns).
403!> \param mesg message to be issued
404!> \param info error code (optional)
405!> \param warn only warn (optional)
406! **************************************************************************************************
407 SUBROUTINE cp_fm_error(mesg, info, warn)
408 CHARACTER(LEN=*), INTENT(IN) :: mesg
409 INTEGER, INTENT(IN), OPTIONAL :: info
410 LOGICAL, INTENT(IN), OPTIONAL :: warn
411
412 CHARACTER(LEN=2*default_string_length) :: message
413 LOGICAL :: warning
414
415 IF (PRESENT(info)) THEN
416 WRITE (message, "(A,A,I0,A)") mesg, " (INFO = ", info, ")"
417 ELSE
418 WRITE (message, "(A)") mesg
419 END IF
420
421 IF (PRESENT(warn)) THEN
422 warning = warn
423 ELSE ! abort
424 warning = .false.
425 END IF
426
427 IF (warning) THEN
428 cpwarn(trim(message))
429 ELSE
430 cpabort(trim(message))
431 END IF
432 END SUBROUTINE cp_fm_error
433
434! **************************************************************************************************
435!> \brief Computes all eigenvalues and vectors of a real symmetric matrix
436!> significantly faster than syevx, scales also much better.
437!> Needs workspace to allocate all the eigenvectors
438!> \param matrix ...
439!> \param eigenvectors ...
440!> \param eigenvalues ...
441!> \param info ...
442!> \par matrix is supposed to be in upper triangular form, and overwritten by this routine
443!> \par info If present returns error code and prevents program stops.
444!> Works currently only for scalapack.
445!> Other solvers will end the program regardless of PRESENT(info).
446! **************************************************************************************************
447 SUBROUTINE cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
448
449 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
450 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
451 INTEGER, INTENT(OUT), OPTIONAL :: info
452
453 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_syevd'
454
455 INTEGER :: handle, myinfo, n, nmo
456 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eig
457#if defined(__parallel)
458 TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
459#else
460 INTEGER :: liwork, lwork
461 INTEGER, DIMENSION(:), POINTER :: iwork
462 REAL(kind=dp), DIMENSION(:, :), POINTER :: m
463 REAL(kind=dp), DIMENSION(:), POINTER :: work
464 INTEGER, TARGET :: v(1)
465 REAL(kind=dp), TARGET :: w(1)
466#endif
467
468 CALL timeset(routinen, handle)
469
470 myinfo = 0
471
472 n = matrix%matrix_struct%nrow_global
473 ALLOCATE (eig(n))
474
475#if defined(__parallel)
476 ! Determine if the input matrix needs to be redistributed before diagonalization.
477 ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
478 ! The redistributed matrix is stored in matrix_new, which is just a pointer
479 ! to the original matrix if no redistribution is required
480 CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new)
481
482 ! Call scalapack on CPUs that hold the new matrix
483 IF (ASSOCIATED(matrix_new%matrix_struct)) THEN
484 IF (PRESENT(info)) THEN
485 CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig, myinfo)
486 ELSE
487 CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig)
488 END IF
489 END IF
490 ! Redistribute results and clean up
491 CALL cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
492#else
493 ! Retrieve the optimal work array sizes first
494 lwork = -1
495 liwork = -1
496 m => matrix%local_data
497 iwork => v
498 work => w
499
500 CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
501
502 IF (myinfo /= 0) THEN
503 CALL cp_fm_error("ERROR in DSYEVD: Work space query failed", myinfo, PRESENT(info))
504 END IF
505
506 ! Reallocate work arrays and perform diagonalisation
507 lwork = nint(work(1))
508 ALLOCATE (work(lwork))
509
510 liwork = iwork(1)
511 ALLOCATE (iwork(liwork))
512
513 CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
514
515 IF (myinfo /= 0) THEN
516 CALL cp_fm_error("ERROR in DSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
517 END IF
518
519 CALL cp_fm_to_fm(matrix, eigenvectors)
520
521 DEALLOCATE (iwork)
522 DEALLOCATE (work)
523#endif
524
525 IF (PRESENT(info)) info = myinfo
526
527 nmo = SIZE(eigenvalues, 1)
528 IF (nmo > n) THEN
529 eigenvalues(1:n) = eig(1:n)
530 ELSE
531 eigenvalues(1:nmo) = eig(1:nmo)
532 END IF
533
534 DEALLOCATE (eig)
535
536 CALL check_diag(matrix, eigenvectors, n)
537
538 CALL timestop(handle)
539
540 END SUBROUTINE cp_fm_syevd
541
542! **************************************************************************************************
543!> \brief ...
544!> \param matrix ...
545!> \param eigenvectors ...
546!> \param eigenvalues ...
547!> \param info ...
548! **************************************************************************************************
549 SUBROUTINE cp_fm_syevd_base(matrix, eigenvectors, eigenvalues, info)
550
551 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
552 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
553 INTEGER, INTENT(OUT), OPTIONAL :: info
554
555 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_syevd_base'
556
557 INTEGER :: handle, myinfo
558#if defined(__parallel)
559 TYPE(cp_blacs_env_type), POINTER :: context
560 INTEGER :: liwork, lwork, n
561 INTEGER, DIMENSION(9) :: descm, descv
562 INTEGER, DIMENSION(:), POINTER :: iwork
563 REAL(kind=dp), DIMENSION(:), POINTER :: work
564 REAL(kind=dp), DIMENSION(:, :), POINTER :: m, v
565 REAL(kind=dp), TARGET :: w(1)
566#if defined (__HAS_IEEE_EXCEPTIONS)
567 LOGICAL, DIMENSION(5) :: halt
568#endif
569#endif
570
571 CALL timeset(routinen, handle)
572
573 myinfo = 0
574
575#if defined(__parallel)
576
577 n = matrix%matrix_struct%nrow_global
578 m => matrix%local_data
579 context => matrix%matrix_struct%context
580 descm(:) = matrix%matrix_struct%descriptor(:)
581
582 v => eigenvectors%local_data
583 descv(:) = eigenvectors%matrix_struct%descriptor(:)
584
585 liwork = 7*n + 8*context%num_pe(2) + 2
586 ALLOCATE (iwork(liwork))
587
588 ! Work space query
589 lwork = -1
590 work => w
591
592 CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
593 work(1), lwork, iwork(1), liwork, myinfo)
594
595 IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
596 CALL cp_fm_error("ERROR in PDSYEVD: Work space query failed", myinfo, PRESENT(info))
597 END IF
598
599 lwork = nint(work(1)) ! can be insufficient due to bug in reference ScaLAPACK
600#if !defined(__SCALAPACK_NO_WA)
601 ! Query workspace for QDORMTR as called by reference ScaLAPACK (PDSYEVD).
602 CALL pdormtr('L', 'U', 'N', n, n, m(1, 1), 1, 1, descm, m(1, 1), &
603 v(1, 1), 1, 1, descv, work(1), -1, myinfo)
604
605 IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
606 CALL cp_fm_error("ERROR in PDORMTR: Work space query failed", myinfo, PRESENT(info))
607 END IF
608
609 IF (lwork < (work(1) + 2*n)) THEN
610 lwork = nint(work(1)) + 2*n ! still wrong by 2*N
611 END IF
612#endif
613 ALLOCATE (work(lwork))
614
615 ! Initial/documented amount of liwork is exceeded (slightly worrisome too).
616 IF (liwork < iwork(1)) THEN
617 liwork = iwork(1)
618 DEALLOCATE (iwork)
619 ALLOCATE (iwork(liwork))
620 END IF
621
622 ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
623 ! Therefore, we disable floating point traps temporarily.
624#if defined (__HAS_IEEE_EXCEPTIONS)
625 CALL ieee_get_halting_mode(ieee_all, halt)
626 CALL ieee_set_halting_mode(ieee_all, .false.)
627#endif
628
629 CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
630 work(1), lwork, iwork(1), liwork, myinfo)
631
632#if defined (__HAS_IEEE_EXCEPTIONS)
633 CALL ieee_set_halting_mode(ieee_all, halt)
634#endif
635 IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
636 CALL cp_fm_error("ERROR in PDSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
637 END IF
638
639 IF (PRESENT(info)) info = myinfo
640
641 DEALLOCATE (work)
642 DEALLOCATE (iwork)
643#else
644 mark_used(matrix)
645 mark_used(eigenvectors)
646 mark_used(eigenvalues)
647 myinfo = -1
648 IF (PRESENT(info)) info = myinfo
649 CALL cp_fm_error("ERROR in "//trim(routinen)// &
650 ": Matrix diagonalization using PDSYEVD requested without ScaLAPACK support")
651#endif
652
653 CALL timestop(handle)
654
655 END SUBROUTINE cp_fm_syevd_base
656
657! **************************************************************************************************
658!> \brief compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack.
659!> If eigenvectors are required this routine will replicate a full matrix on each CPU...
660!> if more than a handful of vectors are needed, use cp_fm_syevd instead
661!> \param matrix ...
662!> \param eigenvectors ...
663!> \param eigenvalues ...
664!> \param neig ...
665!> \param work_syevx ...
666!> \par matrix is supposed to be in upper triangular form, and overwritten by this routine
667!> neig is the number of vectors needed (default all)
668!> work_syevx evec calculation only, is the fraction of the working buffer allowed (1.0 use full buffer)
669!> reducing this saves time, but might cause the routine to fail
670! **************************************************************************************************
671 SUBROUTINE cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
672
673 ! Diagonalise the symmetric n by n matrix using the LAPACK library.
674
675 TYPE(cp_fm_type), INTENT(IN) :: matrix
676 TYPE(cp_fm_type), OPTIONAL, INTENT(IN) :: eigenvectors
677 REAL(kind=dp), OPTIONAL, INTENT(IN) :: work_syevx
678 INTEGER, INTENT(IN), OPTIONAL :: neig
679 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
680
681 CHARACTER(LEN=*), PARAMETER :: routinen = "cp_fm_syevx"
682
683#if defined(__parallel)
684 REAL(kind=dp), PARAMETER :: orfac = -1.0_dp
685#endif
686 REAL(kind=dp), PARAMETER :: vl = 0.0_dp, &
687 vu = 0.0_dp
688
689 TYPE(cp_blacs_env_type), POINTER :: context
690 TYPE(cp_logger_type), POINTER :: logger
691 CHARACTER(LEN=1) :: job_type
692 REAL(kind=dp) :: abstol, work_syevx_local
693 INTEGER :: handle, info, liwork, lwork, &
694 m, n, nb, npcol, nprow, &
695 output_unit, neig_local
696 LOGICAL :: ionode, needs_evecs
697 INTEGER, DIMENSION(:), ALLOCATABLE :: ifail, iwork
698 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: w, work
699 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, z
700
701 REAL(kind=dp), EXTERNAL :: dlamch
702
703#if defined(__parallel)
704 INTEGER :: nn, np0, npe, nq0, nz
705 INTEGER, DIMENSION(9) :: desca, descz
706 INTEGER, DIMENSION(:), ALLOCATABLE :: iclustr
707 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: gap
708 INTEGER, EXTERNAL :: iceil, numroc
709#else
710 INTEGER :: nla, nlz
711 INTEGER, EXTERNAL :: ilaenv
712#endif
713#if defined (__HAS_IEEE_EXCEPTIONS)
714 LOGICAL, DIMENSION(5) :: halt
715#endif
716
717 ! by default all
718 n = matrix%matrix_struct%nrow_global
719 neig_local = n
720 IF (PRESENT(neig)) neig_local = neig
721 IF (neig_local == 0) RETURN
722
723 CALL timeset(routinen, handle)
724
725 needs_evecs = PRESENT(eigenvectors)
726
727 logger => cp_get_default_logger()
728 ionode = logger%para_env%is_source()
729 n = matrix%matrix_struct%nrow_global
730
731 ! by default allocate all needed space
732 work_syevx_local = 1.0_dp
733 IF (PRESENT(work_syevx)) work_syevx_local = work_syevx
734
735 ! set scalapack job type
736 IF (needs_evecs) THEN
737 job_type = "V"
738 ELSE
739 job_type = "N"
740 END IF
741
742 ! target the most accurate calculation of the eigenvalues
743 abstol = 2.0_dp*dlamch("S")
744
745 context => matrix%matrix_struct%context
746 nprow = context%num_pe(1)
747 npcol = context%num_pe(2)
748
749 ALLOCATE (w(n))
750 eigenvalues(:) = 0.0_dp
751#if defined(__parallel)
752
753 IF (matrix%matrix_struct%nrow_block /= matrix%matrix_struct%ncol_block) THEN
754 cpabort("ERROR in "//routinen//": Invalid blocksize (no square blocks) found")
755 END IF
756
757 a => matrix%local_data
758 desca(:) = matrix%matrix_struct%descriptor(:)
759
760 IF (needs_evecs) THEN
761 z => eigenvectors%local_data
762 descz(:) = eigenvectors%matrix_struct%descriptor(:)
763 ELSE
764 ! z will not be referenced
765 z => matrix%local_data
766 descz = desca
767 END IF
768
769 ! Get the optimal work storage size
770
771 npe = nprow*npcol
772 nb = matrix%matrix_struct%nrow_block
773 nn = max(n, nb, 2)
774 np0 = numroc(nn, nb, 0, 0, nprow)
775 nq0 = max(numroc(nn, nb, 0, 0, npcol), nb)
776
777 IF (needs_evecs) THEN
778 lwork = 5*n + max(5*nn, np0*nq0) + iceil(neig_local, npe)*nn + 2*nb*nb + &
779 int(work_syevx_local*real((neig_local - 1)*n, dp)) !!!! allocates a full matrix on every CPU !!!!!
780 ELSE
781 lwork = 5*n + max(5*nn, nb*(np0 + 1))
782 END IF
783 liwork = 6*max(n, npe + 1, 4)
784
785 ALLOCATE (gap(npe))
786 gap = 0.0_dp
787 ALLOCATE (iclustr(2*npe))
788 iclustr = 0
789 ALLOCATE (ifail(n))
790 ifail = 0
791 ALLOCATE (iwork(liwork))
792 ALLOCATE (work(lwork))
793
794 ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
795 ! Therefore, we disable floating point traps temporarily.
796#if defined (__HAS_IEEE_EXCEPTIONS)
797 CALL ieee_get_halting_mode(ieee_all, halt)
798 CALL ieee_set_halting_mode(ieee_all, .false.)
799#endif
800 CALL pdsyevx(job_type, "I", "U", n, a(1, 1), 1, 1, desca, vl, vu, 1, neig_local, abstol, m, nz, w(1), orfac, &
801 z(1, 1), 1, 1, descz, work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap, info)
802#if defined (__HAS_IEEE_EXCEPTIONS)
803 CALL ieee_set_halting_mode(ieee_all, halt)
804#endif
805
806 ! Error handling
807 IF (info /= 0) THEN
808 IF (ionode) THEN
809 output_unit = cp_logger_get_unit_nr(logger, local=.false.)
810 WRITE (unit=output_unit, fmt="(/,(T3,A,T12,1X,I10))") &
811 "info = ", info, &
812 "lwork = ", lwork, &
813 "liwork = ", liwork, &
814 "nz = ", nz
815 IF (info > 0) THEN
816 WRITE (unit=output_unit, fmt="(/,T3,A,(T12,6(1X,I10)))") &
817 "ifail = ", ifail
818 WRITE (unit=output_unit, fmt="(/,T3,A,(T12,6(1X,I10)))") &
819 "iclustr = ", iclustr
820 WRITE (unit=output_unit, fmt="(/,T3,A,(T12,6(1X,E10.3)))") &
821 "gap = ", gap
822 END IF
823 END IF
824 cpabort("ERROR in PDSYEVX (ScaLAPACK)")
825 END IF
826
827 ! Release work storage
828 DEALLOCATE (gap)
829 DEALLOCATE (iclustr)
830
831#else
832
833 a => matrix%local_data
834 IF (needs_evecs) THEN
835 z => eigenvectors%local_data
836 ELSE
837 ! z will not be referenced
838 z => matrix%local_data
839 END IF
840
841 ! Get the optimal work storage size
842
843 nb = max(ilaenv(1, "DSYTRD", "U", n, -1, -1, -1), &
844 ilaenv(1, "DORMTR", "U", n, -1, -1, -1))
845
846 lwork = max((nb + 3)*n, 8*n) + n ! sun bug fix
847 liwork = 5*n
848
849 ALLOCATE (ifail(n))
850 ifail = 0
851 ALLOCATE (iwork(liwork))
852 ALLOCATE (work(lwork))
853 info = 0
854 nla = SIZE(a, 1)
855 nlz = SIZE(z, 1)
856
857 ! LAPACK takes advantage of IEEE754 exceptions for speedup.
858 ! Therefore, we disable floating point traps temporarily.
859#if defined (__HAS_IEEE_EXCEPTIONS)
860 CALL ieee_get_halting_mode(ieee_all, halt)
861 CALL ieee_set_halting_mode(ieee_all, .false.)
862#endif
863 CALL dsyevx(job_type, "I", "U", n, a(1, 1), nla, vl, vu, 1, neig_local, &
864 abstol, m, w, z(1, 1), nlz, work(1), lwork, iwork(1), ifail(1), info)
865#if defined (__HAS_IEEE_EXCEPTIONS)
866 CALL ieee_set_halting_mode(ieee_all, halt)
867#endif
868
869 ! Error handling
870 IF (info /= 0) THEN
871 output_unit = cp_logger_get_unit_nr(logger, local=.false.)
872 WRITE (unit=output_unit, fmt="(/,(T3,A,T12,1X,I10))") &
873 "info = ", info
874 IF (info > 0) THEN
875 WRITE (unit=output_unit, fmt="(/,T3,A,(T12,6(1X,I10)))") &
876 "ifail = ", ifail
877 END IF
878 cpabort("Error in DSYEVX (ScaLAPACK)")
879 END IF
880
881#endif
882 ! Release work storage
883 DEALLOCATE (ifail)
884 DEALLOCATE (iwork)
885 DEALLOCATE (work)
886 eigenvalues(1:neig_local) = w(1:neig_local)
887 DEALLOCATE (w)
888
889 IF (needs_evecs) CALL check_diag(matrix, eigenvectors, neig_local)
890
891 CALL timestop(handle)
892
893 END SUBROUTINE cp_fm_syevx
894
895! **************************************************************************************************
896!> \brief decomposes a quadratic matrix into its singular value decomposition
897!> \param matrix_a ...
898!> \param matrix_eigvl ...
899!> \param matrix_eigvr_t ...
900!> \param eigval ...
901!> \param info ...
902!> \author Maximilian Graml
903! **************************************************************************************************
904 SUBROUTINE cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
905
906 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
907 TYPE(cp_fm_type), INTENT(INOUT) :: matrix_eigvl, matrix_eigvr_t
908 REAL(kind=dp), DIMENSION(:), POINTER, &
909 INTENT(INOUT) :: eigval
910 INTEGER, INTENT(OUT), OPTIONAL :: info
911
912 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_svd'
913
914 INTEGER :: handle, n, m, myinfo, lwork
915 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
916 TYPE(cp_fm_type) :: matrix_lu
917 REAL(kind=dp), DIMENSION(:), POINTER :: work
918 REAL(kind=dp), TARGET :: w(1)
919#if defined(__parallel)
920 INTEGER, DIMENSION(9) :: desca, descu, descvt
921#endif
922
923 CALL timeset(routinen, handle)
924
925 CALL cp_fm_create(matrix=matrix_lu, &
926 matrix_struct=matrix_a%matrix_struct, &
927 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
928 CALL cp_fm_to_fm(matrix_a, matrix_lu)
929 a => matrix_lu%local_data
930 m = matrix_lu%matrix_struct%nrow_global
931 n = matrix_lu%matrix_struct%ncol_global
932 ! Assert that incoming matrix is quadratic
933 cpassert(m == n)
934
935 ! Prepare for workspace queries
936 myinfo = 0
937 lwork = -1
938 work => w
939#if defined(__parallel)
940 ! To do: That might need a redistribution check as in cp_fm_syevd
941 desca(:) = matrix_lu%matrix_struct%descriptor(:)
942 descu(:) = matrix_eigvl%matrix_struct%descriptor(:)
943 descvt(:) = matrix_eigvr_t%matrix_struct%descriptor(:)
944
945 ! Workspace query
946 CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
947 1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
948
949 IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
950 CALL cp_fm_error("ERROR in PDGESVD: Work space query failed", myinfo, PRESENT(info))
951 END IF
952
953 lwork = nint(work(1))
954 ALLOCATE (work(lwork))
955
956 CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
957 1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
958
959 IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
960 CALL cp_fm_error("ERROR in PDGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
961 END IF
962#else
963 ! Workspace query
964 CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
965 m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
966
967 IF (myinfo /= 0) THEN
968 CALL cp_fm_error("ERROR in DGESVD: Work space query failed", myinfo, PRESENT(info))
969 END IF
970
971 lwork = nint(work(1))
972 ALLOCATE (work(lwork))
973
974 CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
975 m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
976
977 IF (myinfo /= 0) THEN
978 CALL cp_fm_error("ERROR in DGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
979 END IF
980
981#endif
982 ! Release intermediary matrices
983 DEALLOCATE (work)
984 CALL cp_fm_release(matrix_lu)
985
986 IF (PRESENT(info)) info = myinfo
987
988 CALL timestop(handle)
989 END SUBROUTINE cp_fm_svd
990
991! **************************************************************************************************
992!> \brief ...
993!> \param matrix ...
994!> \param work ...
995!> \param exponent ...
996!> \param threshold ...
997!> \param n_dependent ...
998!> \param verbose ...
999!> \param eigvals ...
1000! **************************************************************************************************
1001 SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
1002
1003 ! Raise the real symmetric n by n matrix to the power given by
1004 ! the exponent. All eigenvectors with a corresponding eigenvalue lower
1005 ! than threshold are quenched. result in matrix
1006
1007 ! - Creation (29.03.1999, Matthias Krack)
1008 ! - Parallelised using BLACS and ScaLAPACK (06.06.2001,MK)
1009
1010 TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1011 REAL(kind=dp), INTENT(IN) :: exponent, threshold
1012 INTEGER, INTENT(OUT) :: n_dependent
1013 LOGICAL, INTENT(IN), OPTIONAL :: verbose
1014 REAL(kind=dp), DIMENSION(2), INTENT(OUT), &
1015 OPTIONAL :: eigvals
1016
1017 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_power'
1018
1019 INTEGER :: handle, icol_global, &
1020 mypcol, myprow, &
1021 ncol_global, nrow_global
1022 LOGICAL :: my_verbose
1023 REAL(kind=dp) :: condition_number, f, p
1024 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: eigenvalues
1025 REAL(kind=dp), DIMENSION(:, :), POINTER :: eigenvectors
1026 TYPE(cp_blacs_env_type), POINTER :: context
1027
1028#if defined(__parallel)
1029 INTEGER :: icol_local, ipcol, iprow, irow_global, irow_local
1030#endif
1031
1032 CALL timeset(routinen, handle)
1033
1034 my_verbose = .false.
1035 IF (PRESENT(verbose)) my_verbose = verbose
1036
1037 context => matrix%matrix_struct%context
1038 myprow = context%mepos(1)
1039 mypcol = context%mepos(2)
1040 n_dependent = 0
1041 p = 0.5_dp*exponent
1042
1043 nrow_global = matrix%matrix_struct%nrow_global
1044 ncol_global = matrix%matrix_struct%ncol_global
1045
1046 ALLOCATE (eigenvalues(ncol_global))
1047 eigenvalues(:) = 0.0_dp
1048
1049 ! Compute the eigenvectors and eigenvalues
1050
1051 CALL choose_eigv_solver(matrix, work, eigenvalues)
1052
1053 IF (PRESENT(eigvals)) THEN
1054 eigvals(1) = eigenvalues(1)
1055 eigvals(2) = eigenvalues(ncol_global)
1056 END IF
1057
1058#if defined(__parallel)
1059 eigenvectors => work%local_data
1060
1061 ! Build matrix**exponent with eigenvector quenching
1062
1063 DO icol_global = 1, ncol_global
1064
1065 IF (eigenvalues(icol_global) < threshold) THEN
1066
1067 n_dependent = n_dependent + 1
1068
1069 ipcol = work%matrix_struct%g2p_col(icol_global)
1070
1071 IF (mypcol == ipcol) THEN
1072 icol_local = work%matrix_struct%g2l_col(icol_global)
1073 DO irow_global = 1, nrow_global
1074 iprow = work%matrix_struct%g2p_row(irow_global)
1075 IF (myprow == iprow) THEN
1076 irow_local = work%matrix_struct%g2l_row(irow_global)
1077 eigenvectors(irow_local, icol_local) = 0.0_dp
1078 END IF
1079 END DO
1080 END IF
1081
1082 ELSE
1083
1084 f = eigenvalues(icol_global)**p
1085
1086 ipcol = work%matrix_struct%g2p_col(icol_global)
1087
1088 IF (mypcol == ipcol) THEN
1089 icol_local = work%matrix_struct%g2l_col(icol_global)
1090 DO irow_global = 1, nrow_global
1091 iprow = work%matrix_struct%g2p_row(irow_global)
1092 IF (myprow == iprow) THEN
1093 irow_local = work%matrix_struct%g2l_row(irow_global)
1094 eigenvectors(irow_local, icol_local) = &
1095 f*eigenvectors(irow_local, icol_local)
1096 END IF
1097 END DO
1098 END IF
1099
1100 END IF
1101
1102 END DO
1103
1104#else
1105
1106 eigenvectors => work%local_data
1107
1108 ! Build matrix**exponent with eigenvector quenching
1109
1110 DO icol_global = 1, ncol_global
1111
1112 IF (eigenvalues(icol_global) < threshold) THEN
1113
1114 n_dependent = n_dependent + 1
1115 eigenvectors(1:nrow_global, icol_global) = 0.0_dp
1116
1117 ELSE
1118
1119 f = eigenvalues(icol_global)**p
1120 eigenvectors(1:nrow_global, icol_global) = &
1121 f*eigenvectors(1:nrow_global, icol_global)
1122
1123 END IF
1124
1125 END DO
1126
1127#endif
1128 CALL cp_fm_syrk("U", "N", ncol_global, 1.0_dp, work, 1, 1, 0.0_dp, matrix)
1129 CALL cp_fm_uplo_to_full(matrix, work)
1130
1131 ! Print some warnings/notes
1132 IF (matrix%matrix_struct%para_env%is_source() .AND. my_verbose) THEN
1133 condition_number = abs(eigenvalues(ncol_global)/eigenvalues(1))
1134 WRITE (unit=cp_logger_get_default_unit_nr(), fmt="(/,(T2,A,ES15.6))") &
1135 "CP_FM_POWER: smallest eigenvalue:", eigenvalues(1), &
1136 "CP_FM_POWER: largest eigenvalue: ", eigenvalues(ncol_global), &
1137 "CP_FM_POWER: condition number: ", condition_number
1138 IF (eigenvalues(1) <= 0.0_dp) THEN
1139 WRITE (unit=cp_logger_get_default_unit_nr(), fmt="(/,T2,A)") &
1140 "WARNING: matrix has a negative eigenvalue, tighten EPS_DEFAULT"
1141 END IF
1142 IF (condition_number > 1.0e12_dp) THEN
1143 WRITE (unit=cp_logger_get_default_unit_nr(), fmt="(/,T2,A)") &
1144 "WARNING: high condition number => possibly ill-conditioned matrix"
1145 END IF
1146 END IF
1147
1148 DEALLOCATE (eigenvalues)
1149
1150 CALL timestop(handle)
1151
1152 END SUBROUTINE cp_fm_power
1153
1154! **************************************************************************************************
1155!> \brief ...
1156!> \param matrix ...
1157!> \param eigenvectors ...
1158!> \param eigval ...
1159!> \param thresh ...
1160!> \param start_sec_block ...
1161! **************************************************************************************************
1162 SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, start_sec_block)
1163
1164 ! Calculates block diagonalization of a full symmetric matrix
1165 ! It has its origin in cp_fm_syevx. This routine rotates only elements
1166 ! which are larger than a threshold values "thresh".
1167 ! start_sec_block is the start of the second block.
1168 ! IT DOES ONLY ONE SWEEP!
1169
1170 ! - Creation (07.10.2002, Martin Fengler)
1171 ! - Cosmetics (05.04.06, MK)
1172
1173 TYPE(cp_fm_type), INTENT(IN) :: eigenvectors, matrix
1174 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigval
1175 INTEGER, INTENT(IN) :: start_sec_block
1176 REAL(kind=dp), INTENT(IN) :: thresh
1177
1178 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_block_jacobi'
1179
1180 INTEGER :: handle
1181 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, ev
1182
1183 REAL(kind=dp) :: tan_theta, tau, c, s
1184 INTEGER :: q, p, n
1185 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: c_ip
1186
1187#if defined(__parallel)
1188 TYPE(cp_blacs_env_type), POINTER :: context
1189
1190 INTEGER :: nprow, npcol, block_dim_row, block_dim_col, info, &
1191 ev_row_block_size, iam, mynumrows, mype, npe, q_loc
1192 REAL(kind=dp), DIMENSION(:, :), ALLOCATABLE :: a_loc, ev_loc
1193 INTEGER, DIMENSION(9) :: desca, descz, &
1194 desc_a_block, &
1195 desc_ev_loc
1196 TYPE(mp_comm_type):: allgrp
1197 TYPE(cp_blacs_type) :: ictxt_loc
1198 INTEGER, EXTERNAL :: numroc
1199#endif
1200
1201 ! -------------------------------------------------------------------------
1202
1203 CALL timeset(routinen, handle)
1204
1205#if defined(__parallel)
1206 context => matrix%matrix_struct%context
1207 allgrp = matrix%matrix_struct%para_env
1208
1209 nprow = context%num_pe(1)
1210 npcol = context%num_pe(2)
1211
1212 n = matrix%matrix_struct%nrow_global
1213
1214 a => matrix%local_data
1215 desca(:) = matrix%matrix_struct%descriptor(:)
1216 ev => eigenvectors%local_data
1217 descz(:) = eigenvectors%matrix_struct%descriptor(:)
1218
1219 ! Copy the block to be rotated to the master process firstly and broadcast to all processes
1220 ! start_sec_block defines where the second block starts!
1221 ! Block will be processed together with the OO block
1222
1223 block_dim_row = start_sec_block - 1
1224 block_dim_col = n - block_dim_row
1225 ALLOCATE (a_loc(block_dim_row, block_dim_col))
1226
1227 mype = matrix%matrix_struct%para_env%mepos
1228 npe = matrix%matrix_struct%para_env%num_pe
1229 ! Get a new context
1230 CALL ictxt_loc%gridinit(matrix%matrix_struct%para_env, 'Row-major', nprow*npcol, 1)
1231
1232 CALL descinit(desc_a_block, block_dim_row, block_dim_col, block_dim_row, &
1233 block_dim_col, 0, 0, ictxt_loc%get_handle(), block_dim_row, info)
1234
1235 CALL pdgemr2d(block_dim_row, block_dim_col, a, 1, start_sec_block, desca, &
1236 a_loc, 1, 1, desc_a_block, context%get_handle())
1237 ! Only the master (root) process received data yet
1238 CALL allgrp%bcast(a_loc, 0)
1239
1240 ! Since each process owns now the upper block, the eigenvectors can be re-sorted in such a way that
1241 ! each process has a NN*1 grid, i.e. the process owns a bunch of rows which can be modified locally
1242
1243 ! Initialize distribution of the eigenvectors
1244 iam = mype
1245 ev_row_block_size = n/(nprow*npcol)
1246 mynumrows = numroc(n, ev_row_block_size, iam, 0, nprow*npcol)
1247
1248 ALLOCATE (ev_loc(mynumrows, n), c_ip(mynumrows))
1249
1250 CALL descinit(desc_ev_loc, n, n, ev_row_block_size, n, 0, 0, ictxt_loc%get_handle(), &
1251 mynumrows, info)
1252
1253 CALL pdgemr2d(n, n, ev, 1, 1, descz, ev_loc, 1, 1, desc_ev_loc, context%get_handle())
1254
1255 ! Start block diagonalization of matrix
1256
1257 q_loc = 0
1258
1259 DO q = start_sec_block, n
1260 q_loc = q_loc + 1
1261 DO p = 1, (start_sec_block - 1)
1262
1263 IF (abs(a_loc(p, q_loc)) > thresh) THEN
1264
1265 tau = (eigval(q) - eigval(p))/(2.0_dp*a_loc(p, q_loc))
1266
1267 tan_theta = sign(1.0_dp, tau)/(abs(tau) + sqrt(1.0_dp + tau*tau))
1268
1269 ! Cos(theta)
1270 c = 1.0_dp/sqrt(1.0_dp + tan_theta*tan_theta)
1271 s = tan_theta*c
1272
1273 ! Calculate eigenvectors: Q*J
1274 ! c_ip = c*EV_loc(:,p) - s*EV_loc(:,q)
1275 ! c_iq = s*EV_loc(:,p) + c*EV_loc(:,q)
1276 ! EV(:,p) = c_ip
1277 ! EV(:,q) = c_iq
1278 CALL dcopy(mynumrows, ev_loc(1, p), 1, c_ip(1), 1)
1279 CALL dscal(mynumrows, c, ev_loc(1, p), 1)
1280 CALL daxpy(mynumrows, -s, ev_loc(1, q), 1, ev_loc(1, p), 1)
1281 CALL dscal(mynumrows, c, ev_loc(1, q), 1)
1282 CALL daxpy(mynumrows, s, c_ip(1), 1, ev_loc(1, q), 1)
1283
1284 END IF
1285
1286 END DO
1287 END DO
1288
1289 ! Copy eigenvectors back to the original distribution
1290 CALL pdgemr2d(n, n, ev_loc, 1, 1, desc_ev_loc, ev, 1, 1, descz, context%get_handle())
1291
1292 ! Release work storage
1293 DEALLOCATE (a_loc, ev_loc, c_ip)
1294
1295 CALL ictxt_loc%gridexit()
1296
1297#else
1298
1299 n = matrix%matrix_struct%nrow_global
1300
1301 ALLOCATE (c_ip(n)) ! Local eigenvalue vector
1302
1303 a => matrix%local_data ! Contains the Matrix to be worked on
1304 ev => eigenvectors%local_data ! Contains the eigenvectors up to blocksize, rest is garbage
1305
1306 ! Start matrix diagonalization
1307
1308 tan_theta = 0.0_dp
1309 tau = 0.0_dp
1310
1311 DO q = start_sec_block, n
1312 DO p = 1, (start_sec_block - 1)
1313
1314 IF (abs(a(p, q)) > thresh) THEN
1315
1316 tau = (eigval(q) - eigval(p))/(2.0_dp*a(p, q))
1317
1318 tan_theta = sign(1.0_dp, tau)/(abs(tau) + sqrt(1.0_dp + tau*tau))
1319
1320 ! Cos(theta)
1321 c = 1.0_dp/sqrt(1.0_dp + tan_theta*tan_theta)
1322 s = tan_theta*c
1323
1324 ! Calculate eigenvectors: Q*J
1325 ! c_ip = c*EV(:,p) - s*EV(:,q)
1326 ! c_iq = s*EV(:,p) + c*EV(:,q)
1327 ! EV(:,p) = c_ip
1328 ! EV(:,q) = c_iq
1329 CALL dcopy(n, ev(1, p), 1, c_ip(1), 1)
1330 CALL dscal(n, c, ev(1, p), 1)
1331 CALL daxpy(n, -s, ev(1, q), 1, ev(1, p), 1)
1332 CALL dscal(n, c, ev(1, q), 1)
1333 CALL daxpy(n, s, c_ip(1), 1, ev(1, q), 1)
1334
1335 END IF
1336
1337 END DO
1338 END DO
1339
1340 ! Release work storage
1341
1342 DEALLOCATE (c_ip)
1343
1344#endif
1345
1346 CALL timestop(handle)
1347
1348 END SUBROUTINE cp_fm_block_jacobi
1349
1350! **************************************************************************************************
1351!> \brief General Eigenvalue Problem AX = BXE
1352!> Single option version: Cholesky decomposition of B
1353!> \param amatrix ...
1354!> \param bmatrix ...
1355!> \param eigenvectors ...
1356!> \param eigenvalues ...
1357!> \param work ...
1358! **************************************************************************************************
1359 SUBROUTINE cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
1360
1361 TYPE(cp_fm_type), INTENT(IN) :: amatrix, bmatrix, eigenvectors
1362 REAL(kind=dp), DIMENSION(:) :: eigenvalues
1363 TYPE(cp_fm_type), INTENT(IN) :: work
1364
1365 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geeig'
1366
1367 INTEGER :: handle, nao, nmo
1368
1369 CALL timeset(routinen, handle)
1370
1371 CALL cp_fm_get_info(amatrix, nrow_global=nao)
1372 nmo = SIZE(eigenvalues)
1373
1374 IF (diag_type == fm_diag_type_cusolver .AND. cusolver_generalized .AND. nao >= 64) THEN
1375 ! Use cuSolverMP generalized eigenvalue solver for large matrices
1376 ! Use work as intermediate buffer since eigenvectors may be smaller (nao x nmo)
1377 CALL cp_fm_general_cusolver(amatrix, bmatrix, work, eigenvalues)
1378 CALL cp_fm_to_fm(work, eigenvectors, nmo)
1379#if defined(__DLAF)
1380 ELSE IF (diag_type == fm_diag_type_dlaf .AND. amatrix%matrix_struct%nrow_global >= dlaf_neigvec_min) THEN
1381 ! Use DLA-Future generalized eigenvalue solver for large matrices
1382 CALL cp_fm_diag_gen_dlaf(amatrix, bmatrix, eigenvectors, eigenvalues)
1383#endif
1384 ELSE
1385 ! Cholesky decompose S=U(T)U
1386 CALL cp_fm_cholesky_decompose(bmatrix)
1387 ! Invert to get U^(-1)
1388 CALL cp_fm_triangular_invert(bmatrix)
1389 ! Reduce to get U^(-T) * H * U^(-1)
1390 CALL cp_fm_triangular_multiply(bmatrix, amatrix, side="R")
1391 CALL cp_fm_triangular_multiply(bmatrix, amatrix, transpose_tr=.true.)
1392 ! Diagonalize
1393 CALL choose_eigv_solver(matrix=amatrix, eigenvectors=work, &
1394 eigenvalues=eigenvalues)
1395 ! Restore vectors C = U^(-1) * C*
1396 CALL cp_fm_triangular_multiply(bmatrix, work)
1397 CALL cp_fm_to_fm(work, eigenvectors, nmo)
1398 END IF
1399
1400 CALL timestop(handle)
1401
1402 END SUBROUTINE cp_fm_geeig
1403
1404! **************************************************************************************************
1405!> \brief General Eigenvalue Problem AX = BXE
1406!> Use canonical diagonalization : U*s**(-1/2)
1407!> \param amatrix ...
1408!> \param bmatrix ...
1409!> \param eigenvectors ...
1410!> \param eigenvalues ...
1411!> \param work ...
1412!> \param epseig ...
1413! **************************************************************************************************
1414 SUBROUTINE cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
1415
1416 TYPE(cp_fm_type), INTENT(IN) :: amatrix, bmatrix, eigenvectors
1417 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
1418 TYPE(cp_fm_type), INTENT(IN) :: work
1419 REAL(kind=dp), INTENT(IN) :: epseig
1420
1421 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geeig_canon'
1422
1423 INTEGER :: handle, i, icol, irow, nao, nc, ncol, &
1424 nmo, nx
1425 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: evals
1426
1427 CALL timeset(routinen, handle)
1428
1429 ! Test sizees
1430 CALL cp_fm_get_info(amatrix, nrow_global=nao)
1431 nmo = SIZE(eigenvalues)
1432 ALLOCATE (evals(nao))
1433
1434 ! Diagonalize -S matrix, this way the NULL space is at the end of the spectrum
1435 CALL cp_fm_scale(-1.0_dp, bmatrix)
1436 CALL choose_eigv_solver(matrix=bmatrix, eigenvectors=work, eigenvalues=evals)
1437 evals(:) = -evals(:)
1438 nc = nao
1439 DO i = 1, nao
1440 IF (evals(i) < epseig) THEN
1441 nc = i - 1
1442 EXIT
1443 END IF
1444 END DO
1445 cpassert(nc /= 0)
1446
1447 IF (nc /= nao) THEN
1448 IF (nc < nmo) THEN
1449 ! Copy NULL space definition to last vectors of eigenvectors (if needed)
1450 ncol = nmo - nc
1451 CALL cp_fm_to_fm(work, eigenvectors, ncol, nc + 1, nc + 1)
1452 END IF
1453 ! Set NULL space in eigenvector matrix of S to zero
1454 DO icol = nc + 1, nao
1455 DO irow = 1, nao
1456 CALL cp_fm_set_element(work, irow, icol, 0.0_dp)
1457 END DO
1458 END DO
1459 ! Set small eigenvalues to a dummy save value
1460 evals(nc + 1:nao) = 1.0_dp
1461 END IF
1462 ! Calculate U*s**(-1/2)
1463 evals(:) = 1.0_dp/sqrt(evals(:))
1464 CALL cp_fm_column_scale(work, evals)
1465 ! Reduce to get U^(-T) * H * U^(-1)
1466 CALL cp_fm_gemm("T", "N", nao, nao, nao, 1.0_dp, work, amatrix, 0.0_dp, bmatrix)
1467 CALL cp_fm_gemm("N", "N", nao, nao, nao, 1.0_dp, bmatrix, work, 0.0_dp, amatrix)
1468 IF (nc /= nao) THEN
1469 ! set diagonal values to save large value
1470 DO icol = nc + 1, nao
1471 CALL cp_fm_set_element(amatrix, icol, icol, 10000.0_dp)
1472 END DO
1473 END IF
1474 ! Diagonalize
1475 CALL choose_eigv_solver(matrix=amatrix, eigenvectors=bmatrix, eigenvalues=eigenvalues)
1476 nx = min(nc, nmo)
1477 ! Restore vectors C = U^(-1) * C*
1478 CALL cp_fm_gemm("N", "N", nao, nx, nc, 1.0_dp, work, bmatrix, 0.0_dp, eigenvectors)
1479
1480 DEALLOCATE (evals)
1481
1482 CALL timestop(handle)
1483
1484 END SUBROUTINE cp_fm_geeig_canon
1485
1486END MODULE cp_fm_diag
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
methods related to the blacs parallel environment
wrappers for the actual blacs calls. all functionality needed in the code should actually be provide ...
subroutine, public cp_dlaf_finalize()
Finalize DLA-Future and pika runtime.
subroutine, public cp_dlaf_initialize()
Initialize DLA-Future and pika runtime.
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
performs a rank-k update of a symmetric matrix_c matrix_c = beta * matrix_c + alpha * matrix_a * tran...
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_invert(matrix_a, uplo_tr)
inverts a triangular matrix
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
Wrapper for cuSOLVERMp.
subroutine, public cp_fm_general_cusolver(amatrix, bmatrix, eigenvectors, eigenvalues)
Driver routine to solve generalized eigenvalue problem A*x = lambda*B*x with cuSOLVERMp.
subroutine, public cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
Driver routine to diagonalize a FM matrix with the cuSOLVERMp library.
Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization....
subroutine, public cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
Redistributes eigenvectors and eigenvalues back to the original communicator group.
subroutine, public cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, caller_is_elpa, redist_info)
Determines the optimal number of CPUs for matrix diagonalization and redistributes the input matrices...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, start_sec_block)
...
real(kind=dp), parameter, public eps_check_diag_default
Definition cp_fm_diag.F:83
subroutine, public cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
...
integer, parameter, public fm_diag_type_cusolver
Definition cp_fm_diag.F:102
subroutine, public cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
decomposes a quadratic matrix into its singular value decomposition
Definition cp_fm_diag.F:905
integer, parameter, public fm_diag_type_dlaf
Definition cp_fm_diag.F:102
integer, parameter, public fm_diag_type_scalapack
Definition cp_fm_diag.F:102
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
Definition cp_fm_diag.F:448
subroutine, public diag_finalize()
Finalize the diagonalization library.
Definition cp_fm_diag.F:215
integer, parameter, public fm_diag_type_default
Definition cp_fm_diag.F:111
subroutine, public cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
General Eigenvalue Problem AX = BXE Single option version: Cholesky decomposition of B.
subroutine, public choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
Choose the Eigensolver depending on which library is available ELPA seems to be unstable for small sy...
Definition cp_fm_diag.F:239
integer, save, public diag_type
Definition cp_fm_diag.F:87
subroutine, public diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, elpa_print, elpa_one_stage, dlaf_neigvec_min_input, eps_check_diag_input, cusolver_generalized_input)
Setup the diagonalization library to be used.
Definition cp_fm_diag.F:150
integer, parameter, public fm_diag_type_elpa
Definition cp_fm_diag.F:102
subroutine, public cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
General Eigenvalue Problem AX = BXE Use canonical diagonalization : U*s**(-1/2)
subroutine, public cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack....
Definition cp_fm_diag.F:672
logical, save, public cusolver_generalized
Definition cp_fm_diag.F:96
subroutine, public cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
...
subroutine, public cp_fm_diag_gen_dlaf(a_matrix, b_matrix, eigenvectors, eigenvalues)
...
Wrapper for ELPA.
Definition cp_fm_elpa.F:12
subroutine, public set_elpa_kernel(requested_kernel)
Sets the active ELPA kernel.
Definition cp_fm_elpa.F:224
logical, save, public elpa_qr
Definition cp_fm_elpa.F:161
subroutine, public cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
Driver routine to diagonalize a FM matrix with the ELPA library.
Definition cp_fm_elpa.F:276
logical, save, public elpa_print
Definition cp_fm_elpa.F:161
subroutine, public finalize_elpa_library()
Finalize the ELPA library.
Definition cp_fm_elpa.F:212
logical, save, public elpa_one_stage
Definition cp_fm_elpa.F:168
subroutine, public initialize_elpa_library(one_stage, qr, should_print)
Initialize the ELPA library.
Definition cp_fm_elpa.F:192
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_unit_nr(logger, local)
returns the unit nr for the requested kind of log.
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
integer, parameter, public default_output_unit
Definition machine.F:58
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:452
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
type of a logger, at the moment it contains just a print level starting at which level it should be l...