(git:b06d3a9)
Loading...
Searching...
No Matches
cp_fm_diag.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief used for collecting some of the diagonalization schemes available for
10!> cp_fm_type. cp_fm_power also moved here as it is very related
11!> \note
12!> first version : most routines imported
13!> \par History
14!> - unused Jacobi routines removed, cosmetics (05.04.06,MK)
15!> \author Joost VandeVondele (2003-08)
16! **************************************************************************************************
21 cp_fm_gemm, &
23 cp_fm_syrk, &
30 USE cp_fm_elpa, ONLY: cp_fm_diag_elpa, &
36#if defined(__DLAF)
39#endif
40 USE cp_fm_types, ONLY: cp_fm_get_info, &
43 cp_fm_type, &
64 USE kinds, ONLY: default_string_length, &
65 dp
66 USE machine, ONLY: default_output_unit, &
68#if defined (__parallel)
70#endif
71#if defined (__HAS_IEEE_EXCEPTIONS)
72 USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
73 ieee_set_halting_mode, &
74 ieee_all
75#endif
76#include "../base/base_uses.f90"
77
78 IMPLICIT NONE
79 PRIVATE
80
81 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag'
82
83 REAL(kind=dp), PARAMETER, PUBLIC :: eps_check_diag_default = 5.0e-14_dp
84
85 ! The following saved variables are diagonalization global
86 ! Stores the default library for diagonalization
87 INTEGER, SAVE, PUBLIC :: diag_type = 0
88 ! Minimum number of eigenvectors for the use of the ELPA eigensolver.
89 ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
90 INTEGER, SAVE :: elpa_neigvec_min = 0
91#if defined(__DLAF)
92 ! Minimum number of eigenvectors for the use of the DLAF eigensolver.
93 ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
94 INTEGER, SAVE, PUBLIC :: dlaf_neigvec_min = 0
95#endif
96 ! Threshold value for the orthonormality check of the eigenvectors obtained
97 ! after a diagonalization. A negative value disables the check.
98 REAL(kind=dp), SAVE :: eps_check_diag = -1.0_dp
99
100 ! Constants for the diag_type above
101 INTEGER, PARAMETER, PUBLIC :: fm_diag_type_scalapack = 101, &
102 fm_diag_type_elpa = 102, &
103 fm_diag_type_cusolver = 103, &
105#if defined(__CUSOLVERMP)
106 INTEGER, PARAMETER, PUBLIC :: fm_diag_type_default = fm_diag_type_cusolver
107#elif defined(__ELPA)
108 INTEGER, PARAMETER, PUBLIC :: fm_diag_type_default = fm_diag_type_elpa
109#else
110 INTEGER, PARAMETER, PUBLIC :: fm_diag_type_default = fm_diag_type_scalapack
111#endif
112
113 ! Public subroutines
114 PUBLIC :: choose_eigv_solver, &
116 cp_fm_power, &
117 cp_fm_syevd, &
118 cp_fm_syevx, &
119 cp_fm_svd, &
120 cp_fm_geeig, &
122 diag_init, &
124
125CONTAINS
126
127! **************************************************************************************************
128!> \brief Setup the diagonalization library to be used
129!> \param diag_lib diag_library flag from GLOBAL section in input
130!> \param fallback_applied .TRUE. if support for the requested library was not compiled-in and fallback
131!> to ScaLAPACK was applied, .FALSE. otherwise.
132!> \param elpa_kernel integer that determines which ELPA kernel to use for diagonalization
133!> \param elpa_neigvec_min_input ...
134!> \param elpa_qr logical that determines if ELPA should try to use QR to accelerate the
135!> diagonalization procedure of suitably sized matrices
136!> \param elpa_print logical that determines if information about the ELPA diagonalization should
137!> be printed
138!> \param elpa_one_stage logical that enables the one-stage solver
139!> \param dlaf_neigvec_min_input ...
140!> \param eps_check_diag_input ...
141!> \par History
142!> - Add support for DLA-Future (05.09.2023, RMeli)
143!> \author MI 11.2013
144! **************************************************************************************************
145 SUBROUTINE diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, &
146 elpa_print, elpa_one_stage, dlaf_neigvec_min_input, eps_check_diag_input)
147 CHARACTER(LEN=*), INTENT(IN) :: diag_lib
148 LOGICAL, INTENT(OUT) :: fallback_applied
149 INTEGER, INTENT(IN) :: elpa_kernel, elpa_neigvec_min_input
150 LOGICAL, INTENT(IN) :: elpa_qr, elpa_print, elpa_one_stage
151 INTEGER, INTENT(IN) :: dlaf_neigvec_min_input
152 REAL(kind=dp), INTENT(IN) :: eps_check_diag_input
153
154 LOGICAL, SAVE :: initialized = .false.
155
156 fallback_applied = .false.
157
158 IF (diag_lib == "ScaLAPACK") THEN
160 ELSE IF (diag_lib == "ELPA") THEN
161#if defined (__ELPA)
162 ! ELPA is requested and available
164#else
165 ! ELPA library requested but not linked, switch back to SL
167 fallback_applied = .true.
168#endif
169 ELSE IF (diag_lib == "cuSOLVER") THEN
171 ELSE IF (diag_lib == "DLAF") THEN
172#if defined (__DLAF)
174#else
175 cpabort("ERROR in diag_init: CP2K was not compiled with DLA-Future support")
176#endif
177 ELSE
178 cpabort("ERROR in diag_init: Initialization of unknown diagonalization library requested")
179 END IF
180
181 ! Initialization of requested diagonalization library
182 IF (.NOT. initialized .AND. diag_type == fm_diag_type_elpa) THEN
183 CALL initialize_elpa_library(one_stage=elpa_one_stage, qr=elpa_qr, should_print=elpa_print)
184 CALL set_elpa_kernel(elpa_kernel)
185 initialized = .true.
186 END IF
187#if defined(__DLAF)
188 IF (.NOT. initialized .AND. diag_type == fm_diag_type_dlaf) THEN
189 CALL cp_dlaf_initialize()
190 initialized = .true.
191 END IF
192 dlaf_neigvec_min = dlaf_neigvec_min_input
193#else
194 mark_used(dlaf_neigvec_min_input)
195#endif
196
197 elpa_neigvec_min = elpa_neigvec_min_input
198 eps_check_diag = eps_check_diag_input
199
200 END SUBROUTINE diag_init
201
202! **************************************************************************************************
203!> \brief Finalize the diagonalization library
204! **************************************************************************************************
205 SUBROUTINE diag_finalize()
206#if defined (__ELPA)
209#endif
210#if defined (__DLAF)
212 CALL cp_dlaf_finalize()
213#endif
214 END SUBROUTINE diag_finalize
215
216! **************************************************************************************************
217!> \brief Choose the Eigensolver depending on which library is available
218!> ELPA seems to be unstable for small systems
219!> \param matrix ...
220!> \param eigenvectors ...
221!> \param eigenvalues ...
222!> \param info ...
223!> \par info If present returns error code and prevents program stops.
224!> Works currently only for cp_fm_syevd with ScaLAPACK.
225!> Other solvers will end the program regardless of PRESENT(info).
226!> \par History
227!> - Do not use ELPA for small matrices and use instead ScaLAPACK as fallback (10.05.2021, MK)
228! **************************************************************************************************
229 SUBROUTINE choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
230
231 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
232 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
233 INTEGER, INTENT(OUT), OPTIONAL :: info
234
235 CHARACTER(LEN=*), PARAMETER :: routinen = 'choose_eigv_solver'
236
237 ! Sample peak memory
238 CALL m_memory()
239
240 IF (PRESENT(info)) info = 0 ! Default for solvers that do not return an info.
241
243 CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
244
245 ELSE IF (diag_type == fm_diag_type_elpa) THEN
246 IF (matrix%matrix_struct%nrow_global < elpa_neigvec_min) THEN
247 ! We don't trust ELPA with very small matrices.
248 CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
249 ELSE
250 CALL cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
251 END IF
252
253 ELSE IF (diag_type == fm_diag_type_cusolver) THEN
254 IF (matrix%matrix_struct%nrow_global < 64) THEN
255 ! We don't trust cuSolver with very small matrices.
256 CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
257 ELSE
258 CALL cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
259 END IF
260
261#if defined(__DLAF)
262 ELSE IF (diag_type == fm_diag_type_dlaf) THEN
263 IF (matrix%matrix_struct%nrow_global < dlaf_neigvec_min) THEN
264 ! Use ScaLAPACK for small matrices
265 CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
266 ELSE
267 CALL cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
268 END IF
269#endif
270
271 ELSE
272 cpabort("ERROR in "//routinen//": Invalid diagonalization type requested")
273 END IF
274
275 CALL check_diag(matrix, eigenvectors, nvec=SIZE(eigenvalues))
276
277 END SUBROUTINE choose_eigv_solver
278
279! **************************************************************************************************
280!> \brief Check result of diagonalization, i.e. the orthonormality of the eigenvectors
281!> \param matrix Work matrix
282!> \param eigenvectors Eigenvectors to be checked
283!> \param nvec ...
284! **************************************************************************************************
285 SUBROUTINE check_diag(matrix, eigenvectors, nvec)
286
287 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
288 INTEGER, INTENT(IN) :: nvec
289
290 CHARACTER(LEN=*), PARAMETER :: routinen = 'check_diag'
291
292 CHARACTER(LEN=default_string_length) :: diag_type_name
293 REAL(kind=dp) :: eps, eps_abort, eps_warning, gold, test
294 INTEGER :: handle, i, j, ncol, nrow, output_unit
295 LOGICAL :: check_eigenvectors
296#if defined(__parallel)
297 TYPE(cp_blacs_env_type), POINTER :: context
298 INTEGER :: il, jl, ipcol, iprow, &
299 mypcol, myprow, npcol, nprow
300 INTEGER, DIMENSION(9) :: desca
301#endif
302
303 CALL timeset(routinen, handle)
304
305 output_unit = default_output_unit
306 eps_warning = eps_check_diag_default
307#if defined(__CHECK_DIAG)
308 check_eigenvectors = .true.
309 IF (eps_check_diag >= 0.0_dp) THEN
310 eps_warning = eps_check_diag
311 END IF
312#else
313 IF (eps_check_diag >= 0.0_dp) THEN
314 check_eigenvectors = .true.
315 eps_warning = eps_check_diag
316 ELSE
317 check_eigenvectors = .false.
318 END IF
319#endif
320 eps_abort = 10.0_dp*eps_warning
321
322 gold = 0.0_dp
323 test = 0.0_dp
324 eps = 0.0_dp
325
326 IF (check_eigenvectors) THEN
327#if defined(__parallel)
328 nrow = eigenvectors%matrix_struct%nrow_global
329 ncol = min(eigenvectors%matrix_struct%ncol_global, nvec)
330 CALL cp_fm_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, eigenvectors, 0.0_dp, matrix)
331 context => matrix%matrix_struct%context
332 myprow = context%mepos(1)
333 mypcol = context%mepos(2)
334 nprow = context%num_pe(1)
335 npcol = context%num_pe(2)
336 desca(:) = matrix%matrix_struct%descriptor(:)
337 outer: DO j = 1, ncol
338 DO i = 1, ncol
339 CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
340 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
341 gold = merge(0.0_dp, 1.0_dp, i /= j)
342 test = matrix%local_data(il, jl)
343 eps = abs(test - gold)
344 IF (eps > eps_warning) EXIT outer
345 END IF
346 END DO
347 END DO outer
348#else
349 nrow = SIZE(eigenvectors%local_data, 1)
350 ncol = min(SIZE(eigenvectors%local_data, 2), nvec)
351 CALL dgemm("T", "N", ncol, ncol, nrow, 1.0_dp, &
352 eigenvectors%local_data(1, 1), nrow, &
353 eigenvectors%local_data(1, 1), nrow, &
354 0.0_dp, matrix%local_data(1, 1), nrow)
355 outer: DO j = 1, ncol
356 DO i = 1, ncol
357 gold = merge(0.0_dp, 1.0_dp, i /= j)
358 test = matrix%local_data(i, j)
359 eps = abs(test - gold)
360 IF (eps > eps_warning) EXIT outer
361 END DO
362 END DO outer
363#endif
364 IF (eps > eps_warning) THEN
366 diag_type_name = "SYEVD"
367 ELSE IF (diag_type == fm_diag_type_elpa) THEN
368 diag_type_name = "ELPA"
369 ELSE IF (diag_type == fm_diag_type_cusolver) THEN
370 diag_type_name = "CUSOLVER"
371 ELSE IF (diag_type == fm_diag_type_dlaf) THEN
372 diag_type_name = "DLAF"
373 ELSE
374 cpabort("Unknown diag_type")
375 END IF
376 WRITE (unit=output_unit, fmt="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,F0.0,A,ES10.3)") &
377 "The eigenvectors returned by "//trim(diag_type_name)//" are not orthonormal", &
378 "Matrix element (", i, ", ", j, ") = ", test, &
379 "The deviation from the expected value ", gold, " is", eps
380 IF (eps > eps_abort) THEN
381 cpabort("ERROR in "//routinen//": Check of matrix diagonalization failed")
382 ELSE
383 cpwarn("Check of matrix diagonalization failed in routine "//routinen)
384 END IF
385 END IF
386 END IF
387
388 CALL timestop(handle)
389
390 END SUBROUTINE check_diag
391
392! **************************************************************************************************
393!> \brief Issues an error messages and exits (optionally only warns).
394!> \param mesg message to be issued
395!> \param info error code (optional)
396!> \param warn only warn (optional)
397! **************************************************************************************************
398 SUBROUTINE cp_fm_error(mesg, info, warn)
399 CHARACTER(LEN=*), INTENT(IN) :: mesg
400 INTEGER, INTENT(IN), OPTIONAL :: info
401 LOGICAL, INTENT(IN), OPTIONAL :: warn
402
403 CHARACTER(LEN=2*default_string_length) :: message
404 LOGICAL :: warning
405
406 IF (PRESENT(info)) THEN
407 WRITE (message, "(A,A,I0,A)") mesg, " (INFO = ", info, ")"
408 ELSE
409 WRITE (message, "(A)") mesg
410 END IF
411
412 IF (PRESENT(warn)) THEN
413 warning = warn
414 ELSE ! abort
415 warning = .false.
416 END IF
417
418 IF (warning) THEN
419 cpwarn(trim(message))
420 ELSE
421 cpabort(trim(message))
422 END IF
423 END SUBROUTINE cp_fm_error
424
425! **************************************************************************************************
426!> \brief Computes all eigenvalues and vectors of a real symmetric matrix
427!> significantly faster than syevx, scales also much better.
428!> Needs workspace to allocate all the eigenvectors
429!> \param matrix ...
430!> \param eigenvectors ...
431!> \param eigenvalues ...
432!> \param info ...
433!> \par matrix is supposed to be in upper triangular form, and overwritten by this routine
434!> \par info If present returns error code and prevents program stops.
435!> Works currently only for scalapack.
436!> Other solvers will end the program regardless of PRESENT(info).
437! **************************************************************************************************
438 SUBROUTINE cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
439
440 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
441 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
442 INTEGER, INTENT(OUT), OPTIONAL :: info
443
444 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_syevd'
445
446 INTEGER :: handle, myinfo, n, nmo
447 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eig
448#if defined(__parallel)
449 TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
450#else
451 INTEGER :: liwork, lwork
452 INTEGER, DIMENSION(:), POINTER :: iwork
453 REAL(kind=dp), DIMENSION(:, :), POINTER :: m
454 REAL(kind=dp), DIMENSION(:), POINTER :: work
455 INTEGER, TARGET :: v(1)
456 REAL(kind=dp), TARGET :: w(1)
457#endif
458
459 CALL timeset(routinen, handle)
460
461 myinfo = 0
462
463 n = matrix%matrix_struct%nrow_global
464 ALLOCATE (eig(n))
465
466#if defined(__parallel)
467 ! Determine if the input matrix needs to be redistributed before diagonalization.
468 ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
469 ! The redistributed matrix is stored in matrix_new, which is just a pointer
470 ! to the original matrix if no redistribution is required
471 CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new)
472
473 ! Call scalapack on CPUs that hold the new matrix
474 IF (ASSOCIATED(matrix_new%matrix_struct)) THEN
475 IF (PRESENT(info)) THEN
476 CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig, myinfo)
477 ELSE
478 CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig)
479 END IF
480 END IF
481 ! Redistribute results and clean up
482 CALL cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
483#else
484 ! Retrieve the optimal work array sizes first
485 lwork = -1
486 liwork = -1
487 m => matrix%local_data
488 iwork => v
489 work => w
490
491 CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
492
493 IF (myinfo /= 0) THEN
494 CALL cp_fm_error("ERROR in DSYEVD: Work space query failed", myinfo, PRESENT(info))
495 END IF
496
497 ! Reallocate work arrays and perform diagonalisation
498 lwork = nint(work(1))
499 ALLOCATE (work(lwork))
500
501 liwork = iwork(1)
502 ALLOCATE (iwork(liwork))
503
504 CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
505
506 IF (myinfo /= 0) THEN
507 CALL cp_fm_error("ERROR in DSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
508 END IF
509
510 CALL cp_fm_to_fm(matrix, eigenvectors)
511
512 DEALLOCATE (iwork)
513 DEALLOCATE (work)
514#endif
515
516 IF (PRESENT(info)) info = myinfo
517
518 nmo = SIZE(eigenvalues, 1)
519 IF (nmo > n) THEN
520 eigenvalues(1:n) = eig(1:n)
521 ELSE
522 eigenvalues(1:nmo) = eig(1:nmo)
523 END IF
524
525 DEALLOCATE (eig)
526
527 CALL check_diag(matrix, eigenvectors, n)
528
529 CALL timestop(handle)
530
531 END SUBROUTINE cp_fm_syevd
532
533! **************************************************************************************************
534!> \brief ...
535!> \param matrix ...
536!> \param eigenvectors ...
537!> \param eigenvalues ...
538!> \param info ...
539! **************************************************************************************************
540 SUBROUTINE cp_fm_syevd_base(matrix, eigenvectors, eigenvalues, info)
541
542 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
543 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
544 INTEGER, INTENT(OUT), OPTIONAL :: info
545
546 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_syevd_base'
547
548 INTEGER :: handle, myinfo
549#if defined(__parallel)
550 TYPE(cp_blacs_env_type), POINTER :: context
551 INTEGER :: liwork, lwork, n
552 INTEGER, DIMENSION(9) :: descm, descv
553 INTEGER, DIMENSION(:), POINTER :: iwork
554 REAL(kind=dp), DIMENSION(:), POINTER :: work
555 REAL(kind=dp), DIMENSION(:, :), POINTER :: m, v
556 REAL(kind=dp), TARGET :: w(1)
557#if defined (__HAS_IEEE_EXCEPTIONS)
558 LOGICAL, DIMENSION(5) :: halt
559#endif
560#endif
561
562 CALL timeset(routinen, handle)
563
564 myinfo = 0
565
566#if defined(__parallel)
567
568 n = matrix%matrix_struct%nrow_global
569 m => matrix%local_data
570 context => matrix%matrix_struct%context
571 descm(:) = matrix%matrix_struct%descriptor(:)
572
573 v => eigenvectors%local_data
574 descv(:) = eigenvectors%matrix_struct%descriptor(:)
575
576 liwork = 7*n + 8*context%num_pe(2) + 2
577 ALLOCATE (iwork(liwork))
578
579 ! Work space query
580 lwork = -1
581 work => w
582
583 CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
584 work(1), lwork, iwork(1), liwork, myinfo)
585
586 IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
587 CALL cp_fm_error("ERROR in PDSYEVD: Work space query failed", myinfo, PRESENT(info))
588 END IF
589
590 lwork = nint(work(1)) ! can be insufficient due to bug in reference ScaLAPACK
591#if !defined(__SCALAPACK_NO_WA)
592 ! Query workspace for QDORMTR as called by reference ScaLAPACK (PDSYEVD).
593 CALL pdormtr('L', 'U', 'N', n, n, m(1, 1), 1, 1, descm, m(1, 1), &
594 v(1, 1), 1, 1, descv, work(1), -1, myinfo)
595
596 IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
597 CALL cp_fm_error("ERROR in PDORMTR: Work space query failed", myinfo, PRESENT(info))
598 END IF
599
600 IF (lwork < (work(1) + 2*n)) THEN
601 lwork = nint(work(1)) + 2*n ! still wrong by 2*N
602 END IF
603#endif
604 ALLOCATE (work(lwork))
605
606 ! Initial/documented amount of liwork is exceeded (slightly worrisome too).
607 IF (liwork < iwork(1)) THEN
608 liwork = iwork(1)
609 DEALLOCATE (iwork)
610 ALLOCATE (iwork(liwork))
611 END IF
612
613 ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
614 ! Therefore, we disable floating point traps temporarily.
615#if defined (__HAS_IEEE_EXCEPTIONS)
616 CALL ieee_get_halting_mode(ieee_all, halt)
617 CALL ieee_set_halting_mode(ieee_all, .false.)
618#endif
619
620 CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
621 work(1), lwork, iwork(1), liwork, myinfo)
622
623#if defined (__HAS_IEEE_EXCEPTIONS)
624 CALL ieee_set_halting_mode(ieee_all, halt)
625#endif
626 IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
627 CALL cp_fm_error("ERROR in PDSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
628 END IF
629
630 IF (PRESENT(info)) info = myinfo
631
632 DEALLOCATE (work)
633 DEALLOCATE (iwork)
634#else
635 mark_used(matrix)
636 mark_used(eigenvectors)
637 mark_used(eigenvalues)
638 myinfo = -1
639 IF (PRESENT(info)) info = myinfo
640 CALL cp_fm_error("ERROR in "//trim(routinen)// &
641 ": Matrix diagonalization using PDSYEVD requested without ScaLAPACK support")
642#endif
643
644 CALL timestop(handle)
645
646 END SUBROUTINE cp_fm_syevd_base
647
648! **************************************************************************************************
649!> \brief compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack.
650!> If eigenvectors are required this routine will replicate a full matrix on each CPU...
651!> if more than a handful of vectors are needed, use cp_fm_syevd instead
652!> \param matrix ...
653!> \param eigenvectors ...
654!> \param eigenvalues ...
655!> \param neig ...
656!> \param work_syevx ...
657!> \par matrix is supposed to be in upper triangular form, and overwritten by this routine
658!> neig is the number of vectors needed (default all)
659!> work_syevx evec calculation only, is the fraction of the working buffer allowed (1.0 use full buffer)
660!> reducing this saves time, but might cause the routine to fail
661! **************************************************************************************************
662 SUBROUTINE cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
663
664 ! Diagonalise the symmetric n by n matrix using the LAPACK library.
665
666 TYPE(cp_fm_type), INTENT(IN) :: matrix
667 TYPE(cp_fm_type), OPTIONAL, INTENT(IN) :: eigenvectors
668 REAL(kind=dp), OPTIONAL, INTENT(IN) :: work_syevx
669 INTEGER, INTENT(IN), OPTIONAL :: neig
670 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
671
672 CHARACTER(LEN=*), PARAMETER :: routinen = "cp_fm_syevx"
673
674#if defined(__parallel)
675 REAL(kind=dp), PARAMETER :: orfac = -1.0_dp
676#endif
677 REAL(kind=dp), PARAMETER :: vl = 0.0_dp, &
678 vu = 0.0_dp
679
680 TYPE(cp_blacs_env_type), POINTER :: context
681 TYPE(cp_logger_type), POINTER :: logger
682 CHARACTER(LEN=1) :: job_type
683 REAL(kind=dp) :: abstol, work_syevx_local
684 INTEGER :: handle, info, liwork, lwork, &
685 m, n, nb, npcol, nprow, &
686 output_unit, neig_local
687 LOGICAL :: ionode, needs_evecs
688 INTEGER, DIMENSION(:), ALLOCATABLE :: ifail, iwork
689 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: w, work
690 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, z
691
692 REAL(kind=dp), EXTERNAL :: dlamch
693
694#if defined(__parallel)
695 INTEGER :: nn, np0, npe, nq0, nz
696 INTEGER, DIMENSION(9) :: desca, descz
697 INTEGER, DIMENSION(:), ALLOCATABLE :: iclustr
698 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: gap
699 INTEGER, EXTERNAL :: iceil, numroc
700#else
701 INTEGER :: nla, nlz
702 INTEGER, EXTERNAL :: ilaenv
703#endif
704#if defined (__HAS_IEEE_EXCEPTIONS)
705 LOGICAL, DIMENSION(5) :: halt
706#endif
707
708 ! by default all
709 n = matrix%matrix_struct%nrow_global
710 neig_local = n
711 IF (PRESENT(neig)) neig_local = neig
712 IF (neig_local == 0) RETURN
713
714 CALL timeset(routinen, handle)
715
716 needs_evecs = PRESENT(eigenvectors)
717
718 logger => cp_get_default_logger()
719 ionode = logger%para_env%is_source()
720 n = matrix%matrix_struct%nrow_global
721
722 ! by default allocate all needed space
723 work_syevx_local = 1.0_dp
724 IF (PRESENT(work_syevx)) work_syevx_local = work_syevx
725
726 ! set scalapack job type
727 IF (needs_evecs) THEN
728 job_type = "V"
729 ELSE
730 job_type = "N"
731 END IF
732
733 ! target the most accurate calculation of the eigenvalues
734 abstol = 2.0_dp*dlamch("S")
735
736 context => matrix%matrix_struct%context
737 nprow = context%num_pe(1)
738 npcol = context%num_pe(2)
739
740 ALLOCATE (w(n))
741 eigenvalues(:) = 0.0_dp
742#if defined(__parallel)
743
744 IF (matrix%matrix_struct%nrow_block /= matrix%matrix_struct%ncol_block) THEN
745 cpabort("ERROR in "//routinen//": Invalid blocksize (no square blocks) found")
746 END IF
747
748 a => matrix%local_data
749 desca(:) = matrix%matrix_struct%descriptor(:)
750
751 IF (needs_evecs) THEN
752 z => eigenvectors%local_data
753 descz(:) = eigenvectors%matrix_struct%descriptor(:)
754 ELSE
755 ! z will not be referenced
756 z => matrix%local_data
757 descz = desca
758 END IF
759
760 ! Get the optimal work storage size
761
762 npe = nprow*npcol
763 nb = matrix%matrix_struct%nrow_block
764 nn = max(n, nb, 2)
765 np0 = numroc(nn, nb, 0, 0, nprow)
766 nq0 = max(numroc(nn, nb, 0, 0, npcol), nb)
767
768 IF (needs_evecs) THEN
769 lwork = 5*n + max(5*nn, np0*nq0) + iceil(neig_local, npe)*nn + 2*nb*nb + &
770 int(work_syevx_local*real((neig_local - 1)*n, dp)) !!!! allocates a full matrix on every CPU !!!!!
771 ELSE
772 lwork = 5*n + max(5*nn, nb*(np0 + 1))
773 END IF
774 liwork = 6*max(n, npe + 1, 4)
775
776 ALLOCATE (gap(npe))
777 gap = 0.0_dp
778 ALLOCATE (iclustr(2*npe))
779 iclustr = 0
780 ALLOCATE (ifail(n))
781 ifail = 0
782 ALLOCATE (iwork(liwork))
783 ALLOCATE (work(lwork))
784
785 ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
786 ! Therefore, we disable floating point traps temporarily.
787#if defined (__HAS_IEEE_EXCEPTIONS)
788 CALL ieee_get_halting_mode(ieee_all, halt)
789 CALL ieee_set_halting_mode(ieee_all, .false.)
790#endif
791 CALL pdsyevx(job_type, "I", "U", n, a(1, 1), 1, 1, desca, vl, vu, 1, neig_local, abstol, m, nz, w(1), orfac, &
792 z(1, 1), 1, 1, descz, work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap, info)
793#if defined (__HAS_IEEE_EXCEPTIONS)
794 CALL ieee_set_halting_mode(ieee_all, halt)
795#endif
796
797 ! Error handling
798 IF (info /= 0) THEN
799 IF (ionode) THEN
800 output_unit = cp_logger_get_unit_nr(logger, local=.false.)
801 WRITE (unit=output_unit, fmt="(/,(T3,A,T12,1X,I10))") &
802 "info = ", info, &
803 "lwork = ", lwork, &
804 "liwork = ", liwork, &
805 "nz = ", nz
806 IF (info > 0) THEN
807 WRITE (unit=output_unit, fmt="(/,T3,A,(T12,6(1X,I10)))") &
808 "ifail = ", ifail
809 WRITE (unit=output_unit, fmt="(/,T3,A,(T12,6(1X,I10)))") &
810 "iclustr = ", iclustr
811 WRITE (unit=output_unit, fmt="(/,T3,A,(T12,6(1X,E10.3)))") &
812 "gap = ", gap
813 END IF
814 END IF
815 cpabort("ERROR in PDSYEVX (ScaLAPACK)")
816 END IF
817
818 ! Release work storage
819 DEALLOCATE (gap)
820 DEALLOCATE (iclustr)
821
822#else
823
824 a => matrix%local_data
825 IF (needs_evecs) THEN
826 z => eigenvectors%local_data
827 ELSE
828 ! z will not be referenced
829 z => matrix%local_data
830 END IF
831
832 ! Get the optimal work storage size
833
834 nb = max(ilaenv(1, "DSYTRD", "U", n, -1, -1, -1), &
835 ilaenv(1, "DORMTR", "U", n, -1, -1, -1))
836
837 lwork = max((nb + 3)*n, 8*n) + n ! sun bug fix
838 liwork = 5*n
839
840 ALLOCATE (ifail(n))
841 ifail = 0
842 ALLOCATE (iwork(liwork))
843 ALLOCATE (work(lwork))
844 info = 0
845 nla = SIZE(a, 1)
846 nlz = SIZE(z, 1)
847
848 ! LAPACK takes advantage of IEEE754 exceptions for speedup.
849 ! Therefore, we disable floating point traps temporarily.
850#if defined (__HAS_IEEE_EXCEPTIONS)
851 CALL ieee_get_halting_mode(ieee_all, halt)
852 CALL ieee_set_halting_mode(ieee_all, .false.)
853#endif
854 CALL dsyevx(job_type, "I", "U", n, a(1, 1), nla, vl, vu, 1, neig_local, &
855 abstol, m, w, z(1, 1), nlz, work(1), lwork, iwork(1), ifail(1), info)
856#if defined (__HAS_IEEE_EXCEPTIONS)
857 CALL ieee_set_halting_mode(ieee_all, halt)
858#endif
859
860 ! Error handling
861 IF (info /= 0) THEN
862 output_unit = cp_logger_get_unit_nr(logger, local=.false.)
863 WRITE (unit=output_unit, fmt="(/,(T3,A,T12,1X,I10))") &
864 "info = ", info
865 IF (info > 0) THEN
866 WRITE (unit=output_unit, fmt="(/,T3,A,(T12,6(1X,I10)))") &
867 "ifail = ", ifail
868 END IF
869 cpabort("Error in DSYEVX (ScaLAPACK)")
870 END IF
871
872#endif
873 ! Release work storage
874 DEALLOCATE (ifail)
875 DEALLOCATE (iwork)
876 DEALLOCATE (work)
877 eigenvalues(1:neig_local) = w(1:neig_local)
878 DEALLOCATE (w)
879
880 IF (needs_evecs) CALL check_diag(matrix, eigenvectors, neig_local)
881
882 CALL timestop(handle)
883
884 END SUBROUTINE cp_fm_syevx
885
886! **************************************************************************************************
887!> \brief decomposes a quadratic matrix into its singular value decomposition
888!> \param matrix_a ...
889!> \param matrix_eigvl ...
890!> \param matrix_eigvr_t ...
891!> \param eigval ...
892!> \param info ...
893!> \author Maximilian Graml
894! **************************************************************************************************
895 SUBROUTINE cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
896
897 TYPE(cp_fm_type), INTENT(IN) :: matrix_a
898 TYPE(cp_fm_type), INTENT(INOUT) :: matrix_eigvl, matrix_eigvr_t
899 REAL(kind=dp), DIMENSION(:), POINTER, &
900 INTENT(INOUT) :: eigval
901 INTEGER, INTENT(OUT), OPTIONAL :: info
902
903 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_svd'
904
905 INTEGER :: handle, n, m, myinfo, lwork
906 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
907 TYPE(cp_fm_type) :: matrix_lu
908 REAL(kind=dp), DIMENSION(:), POINTER :: work
909 REAL(kind=dp), TARGET :: w(1)
910#if defined(__parallel)
911 INTEGER, DIMENSION(9) :: desca, descu, descvt
912#endif
913
914 CALL timeset(routinen, handle)
915
916 CALL cp_fm_create(matrix=matrix_lu, &
917 matrix_struct=matrix_a%matrix_struct, &
918 name="A_lu"//trim(adjustl(cp_to_string(1)))//"MATRIX")
919 CALL cp_fm_to_fm(matrix_a, matrix_lu)
920 a => matrix_lu%local_data
921 m = matrix_lu%matrix_struct%nrow_global
922 n = matrix_lu%matrix_struct%ncol_global
923 ! Assert that incoming matrix is quadratic
924 cpassert(m == n)
925
926 ! Prepare for workspace queries
927 myinfo = 0
928 lwork = -1
929 work => w
930#if defined(__parallel)
931 ! To do: That might need a redistribution check as in cp_fm_syevd
932 desca(:) = matrix_lu%matrix_struct%descriptor(:)
933 descu(:) = matrix_eigvl%matrix_struct%descriptor(:)
934 descvt(:) = matrix_eigvr_t%matrix_struct%descriptor(:)
935
936 ! Workspace query
937 CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
938 1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
939
940 IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
941 CALL cp_fm_error("ERROR in PDGESVD: Work space query failed", myinfo, PRESENT(info))
942 END IF
943
944 lwork = nint(work(1))
945 ALLOCATE (work(lwork))
946
947 CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
948 1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
949
950 IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
951 CALL cp_fm_error("ERROR in PDGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
952 END IF
953#else
954 ! Workspace query
955 CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
956 m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
957
958 IF (myinfo /= 0) THEN
959 CALL cp_fm_error("ERROR in DGESVD: Work space query failed", myinfo, PRESENT(info))
960 END IF
961
962 lwork = nint(work(1))
963 ALLOCATE (work(lwork))
964
965 CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
966 m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
967
968 IF (myinfo /= 0) THEN
969 CALL cp_fm_error("ERROR in DGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
970 END IF
971
972#endif
973 ! Release intermediary matrices
974 DEALLOCATE (work)
975 CALL cp_fm_release(matrix_lu)
976
977 IF (PRESENT(info)) info = myinfo
978
979 CALL timestop(handle)
980 END SUBROUTINE cp_fm_svd
981
982! **************************************************************************************************
983!> \brief ...
984!> \param matrix ...
985!> \param work ...
986!> \param exponent ...
987!> \param threshold ...
988!> \param n_dependent ...
989!> \param verbose ...
990!> \param eigvals ...
991! **************************************************************************************************
992 SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
993
994 ! Raise the real symmetric n by n matrix to the power given by
995 ! the exponent. All eigenvectors with a corresponding eigenvalue lower
996 ! than threshold are quenched. result in matrix
997
998 ! - Creation (29.03.1999, Matthias Krack)
999 ! - Parallelised using BLACS and ScaLAPACK (06.06.2001,MK)
1000
1001 TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1002 REAL(kind=dp), INTENT(IN) :: exponent, threshold
1003 INTEGER, INTENT(OUT) :: n_dependent
1004 LOGICAL, INTENT(IN), OPTIONAL :: verbose
1005 REAL(kind=dp), DIMENSION(2), INTENT(OUT), &
1006 OPTIONAL :: eigvals
1007
1008 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_power'
1009
1010 INTEGER :: handle, icol_global, &
1011 mypcol, myprow, &
1012 ncol_global, nrow_global
1013 LOGICAL :: my_verbose
1014 REAL(kind=dp) :: condition_number, f, p
1015 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: eigenvalues
1016 REAL(kind=dp), DIMENSION(:, :), POINTER :: eigenvectors
1017 TYPE(cp_blacs_env_type), POINTER :: context
1018
1019#if defined(__parallel)
1020 INTEGER :: icol_local, ipcol, iprow, irow_global, irow_local
1021#endif
1022
1023 CALL timeset(routinen, handle)
1024
1025 my_verbose = .false.
1026 IF (PRESENT(verbose)) my_verbose = verbose
1027
1028 context => matrix%matrix_struct%context
1029 myprow = context%mepos(1)
1030 mypcol = context%mepos(2)
1031 n_dependent = 0
1032 p = 0.5_dp*exponent
1033
1034 nrow_global = matrix%matrix_struct%nrow_global
1035 ncol_global = matrix%matrix_struct%ncol_global
1036
1037 ALLOCATE (eigenvalues(ncol_global))
1038 eigenvalues(:) = 0.0_dp
1039
1040 ! Compute the eigenvectors and eigenvalues
1041
1042 CALL choose_eigv_solver(matrix, work, eigenvalues)
1043
1044 IF (PRESENT(eigvals)) THEN
1045 eigvals(1) = eigenvalues(1)
1046 eigvals(2) = eigenvalues(ncol_global)
1047 END IF
1048
1049#if defined(__parallel)
1050 eigenvectors => work%local_data
1051
1052 ! Build matrix**exponent with eigenvector quenching
1053
1054 DO icol_global = 1, ncol_global
1055
1056 IF (eigenvalues(icol_global) < threshold) THEN
1057
1058 n_dependent = n_dependent + 1
1059
1060 ipcol = work%matrix_struct%g2p_col(icol_global)
1061
1062 IF (mypcol == ipcol) THEN
1063 icol_local = work%matrix_struct%g2l_col(icol_global)
1064 DO irow_global = 1, nrow_global
1065 iprow = work%matrix_struct%g2p_row(irow_global)
1066 IF (myprow == iprow) THEN
1067 irow_local = work%matrix_struct%g2l_row(irow_global)
1068 eigenvectors(irow_local, icol_local) = 0.0_dp
1069 END IF
1070 END DO
1071 END IF
1072
1073 ELSE
1074
1075 f = eigenvalues(icol_global)**p
1076
1077 ipcol = work%matrix_struct%g2p_col(icol_global)
1078
1079 IF (mypcol == ipcol) THEN
1080 icol_local = work%matrix_struct%g2l_col(icol_global)
1081 DO irow_global = 1, nrow_global
1082 iprow = work%matrix_struct%g2p_row(irow_global)
1083 IF (myprow == iprow) THEN
1084 irow_local = work%matrix_struct%g2l_row(irow_global)
1085 eigenvectors(irow_local, icol_local) = &
1086 f*eigenvectors(irow_local, icol_local)
1087 END IF
1088 END DO
1089 END IF
1090
1091 END IF
1092
1093 END DO
1094
1095#else
1096
1097 eigenvectors => work%local_data
1098
1099 ! Build matrix**exponent with eigenvector quenching
1100
1101 DO icol_global = 1, ncol_global
1102
1103 IF (eigenvalues(icol_global) < threshold) THEN
1104
1105 n_dependent = n_dependent + 1
1106 eigenvectors(1:nrow_global, icol_global) = 0.0_dp
1107
1108 ELSE
1109
1110 f = eigenvalues(icol_global)**p
1111 eigenvectors(1:nrow_global, icol_global) = &
1112 f*eigenvectors(1:nrow_global, icol_global)
1113
1114 END IF
1115
1116 END DO
1117
1118#endif
1119 CALL cp_fm_syrk("U", "N", ncol_global, 1.0_dp, work, 1, 1, 0.0_dp, matrix)
1120 CALL cp_fm_uplo_to_full(matrix, work)
1121
1122 ! Print some warnings/notes
1123 IF (matrix%matrix_struct%para_env%is_source() .AND. my_verbose) THEN
1124 condition_number = abs(eigenvalues(ncol_global)/eigenvalues(1))
1125 WRITE (unit=cp_logger_get_default_unit_nr(), fmt="(/,(T2,A,ES15.6))") &
1126 "CP_FM_POWER: smallest eigenvalue:", eigenvalues(1), &
1127 "CP_FM_POWER: largest eigenvalue: ", eigenvalues(ncol_global), &
1128 "CP_FM_POWER: condition number: ", condition_number
1129 IF (eigenvalues(1) <= 0.0_dp) THEN
1130 WRITE (unit=cp_logger_get_default_unit_nr(), fmt="(/,T2,A)") &
1131 "WARNING: matrix has a negative eigenvalue, tighten EPS_DEFAULT"
1132 END IF
1133 IF (condition_number > 1.0e12_dp) THEN
1134 WRITE (unit=cp_logger_get_default_unit_nr(), fmt="(/,T2,A)") &
1135 "WARNING: high condition number => possibly ill-conditioned matrix"
1136 END IF
1137 END IF
1138
1139 DEALLOCATE (eigenvalues)
1140
1141 CALL timestop(handle)
1142
1143 END SUBROUTINE cp_fm_power
1144
1145! **************************************************************************************************
1146!> \brief ...
1147!> \param matrix ...
1148!> \param eigenvectors ...
1149!> \param eigval ...
1150!> \param thresh ...
1151!> \param start_sec_block ...
1152! **************************************************************************************************
1153 SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, start_sec_block)
1154
1155 ! Calculates block diagonalization of a full symmetric matrix
1156 ! It has its origin in cp_fm_syevx. This routine rotates only elements
1157 ! which are larger than a threshold values "thresh".
1158 ! start_sec_block is the start of the second block.
1159 ! IT DOES ONLY ONE SWEEP!
1160
1161 ! - Creation (07.10.2002, Martin Fengler)
1162 ! - Cosmetics (05.04.06, MK)
1163
1164 TYPE(cp_fm_type), INTENT(IN) :: eigenvectors, matrix
1165 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigval
1166 INTEGER, INTENT(IN) :: start_sec_block
1167 REAL(kind=dp), INTENT(IN) :: thresh
1168
1169 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_block_jacobi'
1170
1171 INTEGER :: handle
1172 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, ev
1173
1174 REAL(kind=dp) :: tan_theta, tau, c, s
1175 INTEGER :: q, p, n
1176 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: c_ip
1177
1178#if defined(__parallel)
1179 TYPE(cp_blacs_env_type), POINTER :: context
1180
1181 INTEGER :: nprow, npcol, block_dim_row, block_dim_col, info, &
1182 ev_row_block_size, iam, mynumrows, mype, npe, q_loc
1183 REAL(kind=dp), DIMENSION(:, :), ALLOCATABLE :: a_loc, ev_loc
1184 INTEGER, DIMENSION(9) :: desca, descz, &
1185 desc_a_block, &
1186 desc_ev_loc
1187 TYPE(mp_comm_type):: allgrp
1188 TYPE(cp_blacs_type) :: ictxt_loc
1189 INTEGER, EXTERNAL :: numroc
1190#endif
1191
1192 ! -------------------------------------------------------------------------
1193
1194 CALL timeset(routinen, handle)
1195
1196#if defined(__parallel)
1197 context => matrix%matrix_struct%context
1198 allgrp = matrix%matrix_struct%para_env
1199
1200 nprow = context%num_pe(1)
1201 npcol = context%num_pe(2)
1202
1203 n = matrix%matrix_struct%nrow_global
1204
1205 a => matrix%local_data
1206 desca(:) = matrix%matrix_struct%descriptor(:)
1207 ev => eigenvectors%local_data
1208 descz(:) = eigenvectors%matrix_struct%descriptor(:)
1209
1210 ! Copy the block to be rotated to the master process firstly and broadcast to all processes
1211 ! start_sec_block defines where the second block starts!
1212 ! Block will be processed together with the OO block
1213
1214 block_dim_row = start_sec_block - 1
1215 block_dim_col = n - block_dim_row
1216 ALLOCATE (a_loc(block_dim_row, block_dim_col))
1217
1218 mype = matrix%matrix_struct%para_env%mepos
1219 npe = matrix%matrix_struct%para_env%num_pe
1220 ! Get a new context
1221 CALL ictxt_loc%gridinit(matrix%matrix_struct%para_env, 'Row-major', nprow*npcol, 1)
1222
1223 CALL descinit(desc_a_block, block_dim_row, block_dim_col, block_dim_row, &
1224 block_dim_col, 0, 0, ictxt_loc%get_handle(), block_dim_row, info)
1225
1226 CALL pdgemr2d(block_dim_row, block_dim_col, a, 1, start_sec_block, desca, &
1227 a_loc, 1, 1, desc_a_block, context%get_handle())
1228 ! Only the master (root) process received data yet
1229 CALL allgrp%bcast(a_loc, 0)
1230
1231 ! Since each process owns now the upper block, the eigenvectors can be re-sorted in such a way that
1232 ! each process has a NN*1 grid, i.e. the process owns a bunch of rows which can be modified locally
1233
1234 ! Initialize distribution of the eigenvectors
1235 iam = mype
1236 ev_row_block_size = n/(nprow*npcol)
1237 mynumrows = numroc(n, ev_row_block_size, iam, 0, nprow*npcol)
1238
1239 ALLOCATE (ev_loc(mynumrows, n), c_ip(mynumrows))
1240
1241 CALL descinit(desc_ev_loc, n, n, ev_row_block_size, n, 0, 0, ictxt_loc%get_handle(), &
1242 mynumrows, info)
1243
1244 CALL pdgemr2d(n, n, ev, 1, 1, descz, ev_loc, 1, 1, desc_ev_loc, context%get_handle())
1245
1246 ! Start block diagonalization of matrix
1247
1248 q_loc = 0
1249
1250 DO q = start_sec_block, n
1251 q_loc = q_loc + 1
1252 DO p = 1, (start_sec_block - 1)
1253
1254 IF (abs(a_loc(p, q_loc)) > thresh) THEN
1255
1256 tau = (eigval(q) - eigval(p))/(2.0_dp*a_loc(p, q_loc))
1257
1258 tan_theta = sign(1.0_dp, tau)/(abs(tau) + sqrt(1.0_dp + tau*tau))
1259
1260 ! Cos(theta)
1261 c = 1.0_dp/sqrt(1.0_dp + tan_theta*tan_theta)
1262 s = tan_theta*c
1263
1264 ! Calculate eigenvectors: Q*J
1265 ! c_ip = c*EV_loc(:,p) - s*EV_loc(:,q)
1266 ! c_iq = s*EV_loc(:,p) + c*EV_loc(:,q)
1267 ! EV(:,p) = c_ip
1268 ! EV(:,q) = c_iq
1269 CALL dcopy(mynumrows, ev_loc(1, p), 1, c_ip(1), 1)
1270 CALL dscal(mynumrows, c, ev_loc(1, p), 1)
1271 CALL daxpy(mynumrows, -s, ev_loc(1, q), 1, ev_loc(1, p), 1)
1272 CALL dscal(mynumrows, c, ev_loc(1, q), 1)
1273 CALL daxpy(mynumrows, s, c_ip(1), 1, ev_loc(1, q), 1)
1274
1275 END IF
1276
1277 END DO
1278 END DO
1279
1280 ! Copy eigenvectors back to the original distribution
1281 CALL pdgemr2d(n, n, ev_loc, 1, 1, desc_ev_loc, ev, 1, 1, descz, context%get_handle())
1282
1283 ! Release work storage
1284 DEALLOCATE (a_loc, ev_loc, c_ip)
1285
1286 CALL ictxt_loc%gridexit()
1287
1288#else
1289
1290 n = matrix%matrix_struct%nrow_global
1291
1292 ALLOCATE (c_ip(n)) ! Local eigenvalue vector
1293
1294 a => matrix%local_data ! Contains the Matrix to be worked on
1295 ev => eigenvectors%local_data ! Contains the eigenvectors up to blocksize, rest is garbage
1296
1297 ! Start matrix diagonalization
1298
1299 tan_theta = 0.0_dp
1300 tau = 0.0_dp
1301
1302 DO q = start_sec_block, n
1303 DO p = 1, (start_sec_block - 1)
1304
1305 IF (abs(a(p, q)) > thresh) THEN
1306
1307 tau = (eigval(q) - eigval(p))/(2.0_dp*a(p, q))
1308
1309 tan_theta = sign(1.0_dp, tau)/(abs(tau) + sqrt(1.0_dp + tau*tau))
1310
1311 ! Cos(theta)
1312 c = 1.0_dp/sqrt(1.0_dp + tan_theta*tan_theta)
1313 s = tan_theta*c
1314
1315 ! Calculate eigenvectors: Q*J
1316 ! c_ip = c*EV(:,p) - s*EV(:,q)
1317 ! c_iq = s*EV(:,p) + c*EV(:,q)
1318 ! EV(:,p) = c_ip
1319 ! EV(:,q) = c_iq
1320 CALL dcopy(n, ev(1, p), 1, c_ip(1), 1)
1321 CALL dscal(n, c, ev(1, p), 1)
1322 CALL daxpy(n, -s, ev(1, q), 1, ev(1, p), 1)
1323 CALL dscal(n, c, ev(1, q), 1)
1324 CALL daxpy(n, s, c_ip(1), 1, ev(1, q), 1)
1325
1326 END IF
1327
1328 END DO
1329 END DO
1330
1331 ! Release work storage
1332
1333 DEALLOCATE (c_ip)
1334
1335#endif
1336
1337 CALL timestop(handle)
1338
1339 END SUBROUTINE cp_fm_block_jacobi
1340
1341! **************************************************************************************************
1342!> \brief General Eigenvalue Problem AX = BXE
1343!> Single option version: Cholesky decomposition of B
1344!> \param amatrix ...
1345!> \param bmatrix ...
1346!> \param eigenvectors ...
1347!> \param eigenvalues ...
1348!> \param work ...
1349! **************************************************************************************************
1350 SUBROUTINE cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
1351
1352 TYPE(cp_fm_type), INTENT(IN) :: amatrix, bmatrix, eigenvectors
1353 REAL(kind=dp), DIMENSION(:) :: eigenvalues
1354 TYPE(cp_fm_type), INTENT(IN) :: work
1355
1356 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geeig'
1357
1358 INTEGER :: handle, nao, nmo
1359
1360 CALL timeset(routinen, handle)
1361
1362 CALL cp_fm_get_info(amatrix, nrow_global=nao)
1363 nmo = SIZE(eigenvalues)
1364
1365 IF (diag_type == fm_diag_type_cusolver .AND. nao >= 64) THEN
1366 ! Use cuSolverMP generalized eigenvalue solver for large matrices
1367 CALL cp_fm_general_cusolver(amatrix, bmatrix, eigenvectors, eigenvalues)
1368#if defined(__DLAF)
1369 ELSE IF (diag_type == fm_diag_type_dlaf .AND. amatrix%matrix_struct%nrow_global >= dlaf_neigvec_min) THEN
1370 ! Use DLA-Future generalized eigenvalue solver for large matrices
1371 CALL cp_fm_diag_gen_dlaf(amatrix, bmatrix, eigenvectors, eigenvalues)
1372#endif
1373 ELSE
1374 ! Cholesky decompose S=U(T)U
1375 CALL cp_fm_cholesky_decompose(bmatrix)
1376 ! Invert to get U^(-1)
1377 CALL cp_fm_triangular_invert(bmatrix)
1378 ! Reduce to get U^(-T) * H * U^(-1)
1379 CALL cp_fm_triangular_multiply(bmatrix, amatrix, side="R")
1380 CALL cp_fm_triangular_multiply(bmatrix, amatrix, transpose_tr=.true.)
1381 ! Diagonalize
1382 CALL choose_eigv_solver(matrix=amatrix, eigenvectors=work, &
1383 eigenvalues=eigenvalues)
1384 ! Restore vectors C = U^(-1) * C*
1385 CALL cp_fm_triangular_multiply(bmatrix, work)
1386 CALL cp_fm_to_fm(work, eigenvectors, nmo)
1387 END IF
1388
1389 CALL timestop(handle)
1390
1391 END SUBROUTINE cp_fm_geeig
1392
1393! **************************************************************************************************
1394!> \brief General Eigenvalue Problem AX = BXE
1395!> Use canonical diagonalization : U*s**(-1/2)
1396!> \param amatrix ...
1397!> \param bmatrix ...
1398!> \param eigenvectors ...
1399!> \param eigenvalues ...
1400!> \param work ...
1401!> \param epseig ...
1402! **************************************************************************************************
1403 SUBROUTINE cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
1404
1405 TYPE(cp_fm_type), INTENT(IN) :: amatrix, bmatrix, eigenvectors
1406 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
1407 TYPE(cp_fm_type), INTENT(IN) :: work
1408 REAL(kind=dp), INTENT(IN) :: epseig
1409
1410 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_geeig_canon'
1411
1412 INTEGER :: handle, i, icol, irow, nao, nc, ncol, &
1413 nmo, nx
1414 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: evals
1415
1416 CALL timeset(routinen, handle)
1417
1418 ! Test sizees
1419 CALL cp_fm_get_info(amatrix, nrow_global=nao)
1420 nmo = SIZE(eigenvalues)
1421 ALLOCATE (evals(nao))
1422
1423 ! Diagonalize -S matrix, this way the NULL space is at the end of the spectrum
1424 CALL cp_fm_scale(-1.0_dp, bmatrix)
1425 CALL choose_eigv_solver(matrix=bmatrix, eigenvectors=work, eigenvalues=evals)
1426 evals(:) = -evals(:)
1427 nc = nao
1428 DO i = 1, nao
1429 IF (evals(i) < epseig) THEN
1430 nc = i - 1
1431 EXIT
1432 END IF
1433 END DO
1434 cpassert(nc /= 0)
1435
1436 IF (nc /= nao) THEN
1437 IF (nc < nmo) THEN
1438 ! Copy NULL space definition to last vectors of eigenvectors (if needed)
1439 ncol = nmo - nc
1440 CALL cp_fm_to_fm(work, eigenvectors, ncol, nc + 1, nc + 1)
1441 END IF
1442 ! Set NULL space in eigenvector matrix of S to zero
1443 DO icol = nc + 1, nao
1444 DO irow = 1, nao
1445 CALL cp_fm_set_element(work, irow, icol, 0.0_dp)
1446 END DO
1447 END DO
1448 ! Set small eigenvalues to a dummy save value
1449 evals(nc + 1:nao) = 1.0_dp
1450 END IF
1451 ! Calculate U*s**(-1/2)
1452 evals(:) = 1.0_dp/sqrt(evals(:))
1453 CALL cp_fm_column_scale(work, evals)
1454 ! Reduce to get U^(-T) * H * U^(-1)
1455 CALL cp_fm_gemm("T", "N", nao, nao, nao, 1.0_dp, work, amatrix, 0.0_dp, bmatrix)
1456 CALL cp_fm_gemm("N", "N", nao, nao, nao, 1.0_dp, bmatrix, work, 0.0_dp, amatrix)
1457 IF (nc /= nao) THEN
1458 ! set diagonal values to save large value
1459 DO icol = nc + 1, nao
1460 CALL cp_fm_set_element(amatrix, icol, icol, 10000.0_dp)
1461 END DO
1462 END IF
1463 ! Diagonalize
1464 CALL choose_eigv_solver(matrix=amatrix, eigenvectors=bmatrix, eigenvalues=eigenvalues)
1465 nx = min(nc, nmo)
1466 ! Restore vectors C = U^(-1) * C*
1467 CALL cp_fm_gemm("N", "N", nao, nx, nc, 1.0_dp, work, bmatrix, 0.0_dp, eigenvectors)
1468
1469 DEALLOCATE (evals)
1470
1471 CALL timestop(handle)
1472
1473 END SUBROUTINE cp_fm_geeig_canon
1474
1475END MODULE cp_fm_diag
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
methods related to the blacs parallel environment
wrappers for the actual blacs calls. all functionality needed in the code should actually be provide ...
subroutine, public cp_dlaf_finalize()
Finalize DLA-Future and pika runtime.
subroutine, public cp_dlaf_initialize()
Initialize DLA-Future and pika runtime.
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
performs a rank-k update of a symmetric matrix_c matrix_c = beta * matrix_c + alpha * matrix_a * tran...
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_invert(matrix_a, uplo_tr)
inverts a triangular matrix
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
Wrapper for cuSOLVERMp.
subroutine, public cp_fm_general_cusolver(amatrix, bmatrix, eigenvectors, eigenvalues)
Driver routine to solve generalized eigenvalue problem A*x = lambda*B*x with cuSOLVERMp.
subroutine, public cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
Driver routine to diagonalize a FM matrix with the cuSOLVERMp library.
Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization....
subroutine, public cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
Redistributes eigenvectors and eigenvalues back to the original communicator group.
subroutine, public cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, caller_is_elpa, redist_info)
Determines the optimal number of CPUs for matrix diagonalization and redistributes the input matrices...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, start_sec_block)
...
real(kind=dp), parameter, public eps_check_diag_default
Definition cp_fm_diag.F:83
subroutine, public diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, elpa_print, elpa_one_stage, dlaf_neigvec_min_input, eps_check_diag_input)
Setup the diagonalization library to be used.
Definition cp_fm_diag.F:147
subroutine, public cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
...
Definition cp_fm_diag.F:993
integer, parameter, public fm_diag_type_cusolver
Definition cp_fm_diag.F:101
subroutine, public cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
decomposes a quadratic matrix into its singular value decomposition
Definition cp_fm_diag.F:896
integer, parameter, public fm_diag_type_dlaf
Definition cp_fm_diag.F:101
integer, parameter, public fm_diag_type_scalapack
Definition cp_fm_diag.F:101
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
Definition cp_fm_diag.F:439
subroutine, public diag_finalize()
Finalize the diagonalization library.
Definition cp_fm_diag.F:206
integer, parameter, public fm_diag_type_default
Definition cp_fm_diag.F:110
subroutine, public cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
General Eigenvalue Problem AX = BXE Single option version: Cholesky decomposition of B.
subroutine, public choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
Choose the Eigensolver depending on which library is available ELPA seems to be unstable for small sy...
Definition cp_fm_diag.F:230
integer, save, public diag_type
Definition cp_fm_diag.F:87
integer, parameter, public fm_diag_type_elpa
Definition cp_fm_diag.F:101
subroutine, public cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
General Eigenvalue Problem AX = BXE Use canonical diagonalization : U*s**(-1/2)
subroutine, public cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack....
Definition cp_fm_diag.F:663
subroutine, public cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
...
subroutine, public cp_fm_diag_gen_dlaf(a_matrix, b_matrix, eigenvectors, eigenvalues)
...
Wrapper for ELPA.
Definition cp_fm_elpa.F:12
subroutine, public set_elpa_kernel(requested_kernel)
Sets the active ELPA kernel.
Definition cp_fm_elpa.F:224
logical, save, public elpa_qr
Definition cp_fm_elpa.F:161
subroutine, public cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
Driver routine to diagonalize a FM matrix with the ELPA library.
Definition cp_fm_elpa.F:276
logical, save, public elpa_print
Definition cp_fm_elpa.F:161
subroutine, public finalize_elpa_library()
Finalize the ELPA library.
Definition cp_fm_elpa.F:212
logical, save, public elpa_one_stage
Definition cp_fm_elpa.F:168
subroutine, public initialize_elpa_library(one_stage, qr, should_print)
Initialize the ELPA library.
Definition cp_fm_elpa.F:192
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_unit_nr(logger, local)
returns the unit nr for the requested kind of log.
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
integer, parameter, public default_output_unit
Definition machine.F:58
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:452
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
type of a logger, at the moment it contains just a print level starting at which level it should be l...