(git:b195825)
cp_fm_elpa.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Wrapper for ELPA
10 !> \author Ole Schuett
11 ! **************************************************************************************************
12 MODULE cp_fm_elpa
13  USE cp_log_handling, ONLY: cp_to_string
14  USE machine, ONLY: m_cpuid, &
15  machine_x86, &
20  USE cp_blacs_env, ONLY: cp_blacs_env_type
26  USE cp_fm_types, ONLY: cp_fm_type, &
27  cp_fm_to_fm, &
28  cp_fm_release, &
29  cp_fm_create, &
33  cp_logger_type
34  USE kinds, ONLY: default_string_length, &
35  dp
36  USE message_passing, ONLY: mp_comm_type
37  USE omp_lib, ONLY: omp_get_max_threads
38 
39 #include "../base/base_uses.f90"
40 
41 #if defined (__ELPA)
42  USE elpa_constants, ONLY: elpa_2stage_real_invalid, &
43  elpa_2stage_real_default, &
44  elpa_2stage_real_generic, &
45  elpa_2stage_real_generic_simple, &
46  elpa_2stage_real_bgp, &
47  elpa_2stage_real_bgq, &
48  elpa_2stage_real_sse_assembly, &
49  elpa_2stage_real_sse_block2, &
50  elpa_2stage_real_sse_block4, &
51  elpa_2stage_real_sse_block6, &
52  elpa_2stage_real_avx_block2, &
53  elpa_2stage_real_avx_block4, &
54  elpa_2stage_real_avx_block6, &
55  elpa_2stage_real_avx2_block2, &
56  elpa_2stage_real_avx2_block4, &
57  elpa_2stage_real_avx2_block6, &
58  elpa_2stage_real_avx512_block2, &
59  elpa_2stage_real_avx512_block4, &
60  elpa_2stage_real_avx512_block6, &
61  elpa_2stage_real_nvidia_gpu, &
62  elpa_2stage_real_amd_gpu, &
63  elpa_2stage_real_intel_gpu_sycl
64 
65  USE elpa, ONLY: elpa_t, elpa_solver_2stage, &
66  elpa_init, elpa_uninit, &
67  elpa_allocate, elpa_deallocate, elpa_ok
68 #endif
69 
70  IMPLICIT NONE
71 
72  PRIVATE
73 
74  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
75 
76 #if defined(__ELPA)
77  INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
78  elpa_2stage_real_invalid, & ! auto
79  elpa_2stage_real_generic, &
80  elpa_2stage_real_generic_simple, &
81  elpa_2stage_real_bgp, &
82  elpa_2stage_real_bgq, &
83  elpa_2stage_real_sse_assembly, &
84  elpa_2stage_real_sse_block2, &
85  elpa_2stage_real_sse_block4, &
86  elpa_2stage_real_sse_block6, &
87  elpa_2stage_real_avx_block2, &
88  elpa_2stage_real_avx_block4, &
89  elpa_2stage_real_avx_block6, &
90  elpa_2stage_real_avx2_block2, &
91  elpa_2stage_real_avx2_block4, &
92  elpa_2stage_real_avx2_block6, &
93  elpa_2stage_real_avx512_block2, &
94  elpa_2stage_real_avx512_block4, &
95  elpa_2stage_real_avx512_block6, &
96  elpa_2stage_real_nvidia_gpu, &
97  elpa_2stage_real_amd_gpu, &
98  elpa_2stage_real_intel_gpu_sycl]
99 
100  CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
101  elpa_kernel_names = [character(len=14) :: &
102  "AUTO", &
103  "GENERIC", &
104  "GENERIC_SIMPLE", &
105  "BGP", &
106  "BGQ", &
107  "SSE", &
108  "SSE_BLOCK2", &
109  "SSE_BLOCK4", &
110  "SSE_BLOCK6", &
111  "AVX_BLOCK2", &
112  "AVX_BLOCK4", &
113  "AVX_BLOCK6", &
114  "AVX2_BLOCK2", &
115  "AVX2_BLOCK4", &
116  "AVX2_BLOCK6", &
117  "AVX512_BLOCK2", &
118  "AVX512_BLOCK4", &
119  "AVX512_BLOCK6", &
120  "NVIDIA_GPU", &
121  "AMD_GPU", &
122  "INTEL_GPU"]
123 
124  CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
125  elpa_kernel_descriptions = [character(len=44) :: &
126  "Automatically selected kernel", &
127  "Generic kernel", &
128  "Simplified generic kernel", &
129  "Kernel optimized for IBM BGP", &
130  "Kernel optimized for IBM BGQ", &
131  "Kernel optimized for x86_64/SSE", &
132  "Kernel optimized for x86_64/SSE (block=2)", &
133  "Kernel optimized for x86_64/SSE (block=4)", &
134  "Kernel optimized for x86_64/SSE (block=6)", &
135  "Kernel optimized for Intel AVX (block=2)", &
136  "Kernel optimized for Intel AVX (block=4)", &
137  "Kernel optimized for Intel AVX (block=6)", &
138  "Kernel optimized for Intel AVX2 (block=2)", &
139  "Kernel optimized for Intel AVX2 (block=4)", &
140  "Kernel optimized for Intel AVX2 (block=6)", &
141  "Kernel optimized for Intel AVX-512 (block=2)", &
142  "Kernel optimized for Intel AVX-512 (block=4)", &
143  "Kernel optimized for Intel AVX-512 (block=6)", &
144  "Kernel targeting Nvidia GPUs", &
145  "Kernel targeting AMD GPUs", &
146  "Kernel targeting Intel GPUs"]
147 
148 #else
149  INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
150  CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
151  CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
152 #endif
153 
154 #if defined(__ELPA)
155  INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
156 #endif
157  LOGICAL, SAVE :: elpa_qr = .false., &
158  elpa_qr_unsafe = .false., &
159  elpa_should_print = .false.
160 
161  PUBLIC :: cp_fm_diag_elpa, &
162  set_elpa_kernel, &
163  set_elpa_qr, &
164  set_elpa_print, &
165  elpa_kernel_ids, &
166  elpa_kernel_names, &
167  elpa_kernel_descriptions, &
170 
171 CONTAINS
172 
173 ! **************************************************************************************************
174 !> \brief Initialize the ELPA library
175 ! **************************************************************************************************
177 #if defined(__ELPA)
178  IF (elpa_init(20180525) /= elpa_ok) &
179  cpabort("The linked ELPA library does not support the required API version")
180 #else
181  cpabort("Initialization of ELPA library requested but not enabled during build")
182 #endif
183  END SUBROUTINE
184 
185 ! **************************************************************************************************
186 !> \brief Finalize the ELPA library
187 ! **************************************************************************************************
189 #if defined(__ELPA)
190  CALL elpa_uninit()
191 #else
192  cpabort("Finalization of ELPA library requested but not enabled during build")
193 #endif
194  END SUBROUTINE
195 
196 ! **************************************************************************************************
197 !> \brief Sets the active ELPA kernel.
198 !> \param requested_kernel one of the elpa_kernel_ids
199 ! **************************************************************************************************
200  SUBROUTINE set_elpa_kernel(requested_kernel)
201  INTEGER, INTENT(IN) :: requested_kernel
202 
203 #if defined (__ELPA)
204  INTEGER :: cpuid
205 
206  elpa_kernel = requested_kernel
207 
208  ! Resolve AUTO kernel.
209  IF (elpa_kernel == elpa_2stage_real_invalid) THEN
210  cpuid = m_cpuid()
211  IF ((machine_cpu_generic .LT. cpuid) .AND. (cpuid .LE. machine_x86)) THEN
212  SELECT CASE (cpuid)
213  CASE (machine_x86_sse4)
214  elpa_kernel = elpa_2stage_real_sse_block4
215  CASE (machine_x86_avx)
216  elpa_kernel = elpa_2stage_real_avx_block4
217  CASE (machine_x86_avx2)
218  elpa_kernel = elpa_2stage_real_avx2_block4
219  CASE DEFAULT
220  elpa_kernel = elpa_2stage_real_avx512_block4
221  END SELECT
222  END IF
223 
224  ! Prefer GPU kernel if available.
225 #if defined (__ELPA_NVIDIA_GPU)
226  elpa_kernel = elpa_2stage_real_nvidia_gpu
227 #endif
228 #if defined (__ELPA_AMD_GPU)
229  elpa_kernel = elpa_2stage_real_amd_gpu
230 #endif
231 #if defined (__ELPA_INTEL_GPU)
232  elpa_kernel = elpa_2stage_real_intel_gpu_sycl
233 #endif
234 
235  ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
236  IF (elpa_kernel == elpa_2stage_real_invalid) THEN
237  elpa_kernel = elpa_2stage_real_default
238  END IF
239  END IF
240 #else
241  mark_used(requested_kernel)
242 #endif
243  END SUBROUTINE set_elpa_kernel
244 
245 ! **************************************************************************************************
246 !> \brief Sets flags that determines if ELPA should try to use QR during diagonalization
247 !> If use_qr = .TRUE., the QR step is performed only if the size of the input matrix is
248 !> suitable. Check cp_fm_diag_elpa for further details.
249 !> \param use_qr the logical flag
250 !> \param use_qr_unsafe logical which determines if block size checks should be bypassed for some
251 !> ELPA versions, potentially leading to incorrect eigenvalues
252 ! **************************************************************************************************
253  SUBROUTINE set_elpa_qr(use_qr, use_qr_unsafe)
254  LOGICAL, INTENT(IN) :: use_qr, use_qr_unsafe
255 
256  elpa_qr = use_qr
257  elpa_qr_unsafe = use_qr_unsafe
258  END SUBROUTINE set_elpa_qr
259 
260 ! **************************************************************************************************
261 !> \brief Sets a flag that determines if additional information about the ELPA diagonalization
262 !> should be printed when the diagonalization routine is called.
263 !> \param flag the logical flag
264 ! **************************************************************************************************
265  SUBROUTINE set_elpa_print(flag)
266  LOGICAL, INTENT(IN) :: flag
267 
268  elpa_should_print = flag
269  END SUBROUTINE set_elpa_print
270 
271 ! **************************************************************************************************
272 !> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
273 !> \param matrix the matrix that is diagonalized
274 !> \param eigenvectors eigenvectors of the input matrix
275 !> \param eigenvalues eigenvalues of the input matrix
276 ! **************************************************************************************************
277  SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
278  TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
279  REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
280 
281 #if defined(__ELPA)
282  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_diag_elpa'
283 
284  INTEGER :: handle
285  TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
286  TYPE(cp_fm_redistribute_info) :: rdinfo
287 
288  CALL timeset(routinen, handle)
289 
290  ! Determine if the input matrix needs to be redistributed before diagonalization.
291  ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
292  ! The redistributed matrix is stored in matrix_new, which is just a pointer
293  ! to the original matrix if no redistribution is required.
294  ! With ELPA, we have to make sure that all processor columns have nonzero width
295  CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
296  caller_is_elpa=.true., redist_info=rdinfo)
297 
298  ! Call ELPA on CPUs that hold the new matrix
299  IF (ASSOCIATED(matrix_new%matrix_struct)) &
300  CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
301 
302  ! Redistribute results and clean up
303  CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
304 
305  CALL timestop(handle)
306 #else
307  mark_used(matrix)
308  mark_used(eigenvectors)
309  mark_used(eigenvalues)
310 
311  cpabort("CP2K compiled without the ELPA library.")
312 #endif
313  END SUBROUTINE cp_fm_diag_elpa
314 
315 #if defined(__ELPA)
316 ! **************************************************************************************************
317 !> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
318 !> \param matrix the matrix that is diagonalized
319 !> \param eigenvectors eigenvectors of the input matrix
320 !> \param eigenvalues eigenvalues of the input matrix
321 !> \param rdinfo ...
322 ! **************************************************************************************************
323  SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
324 
325  TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
326  REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
327  TYPE(cp_fm_redistribute_info), INTENT(IN) :: rdinfo
328 
329  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_diag_elpa_base'
330 
331  INTEGER :: handle
332 
333  CLASS(elpa_t), POINTER :: elpa_obj
334  CHARACTER(len=default_string_length) :: kernel_name
335  TYPE(mp_comm_type) :: group
336  INTEGER :: i, &
337  mypcol, myprow, n, &
338  n_rows, n_cols, &
339  nblk, neig, io_unit, &
340  success
341  LOGICAL :: use_qr, check_eigenvalues
342  REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
343  TYPE(cp_blacs_env_type), POINTER :: context
344  TYPE(cp_fm_type) :: matrix_noqr, eigenvectors_noqr
345  TYPE(cp_logger_type), POINTER :: logger
346  REAL(kind=dp), PARAMETER :: th = 1.0e-14_dp
347  INTEGER, DIMENSION(:), POINTER :: ncol_locals
348 
349  CALL timeset(routinen, handle)
350  NULLIFY (logger)
351  NULLIFY (ncol_locals)
352 
353  check_eigenvalues = .false.
354 
355  logger => cp_get_default_logger()
356  io_unit = cp_logger_get_default_io_unit(logger)
357 
358  n = matrix%matrix_struct%nrow_global
359  context => matrix%matrix_struct%context
360  group = matrix%matrix_struct%para_env
361 
362  myprow = context%mepos(1)
363  mypcol = context%mepos(2)
364 
365  ! elpa needs the full matrix
366  CALL cp_fm_upper_to_full(matrix, eigenvectors)
367 
368  CALL cp_fm_struct_get(matrix%matrix_struct, &
369  local_leading_dimension=n_rows, &
370  ncol_local=n_cols, &
371  nrow_block=nblk, &
372  ncol_locals=ncol_locals)
373 
374  ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
375  IF (io_unit > 0 .AND. any(ncol_locals == 0)) THEN
376  CALL rdinfo%write(io_unit)
377  CALL cp_fm_write_info(matrix, io_unit)
378  cpabort("ELPA [pre-fail]: Problem contains processor column with zero width.")
379  END IF
380 
381  neig = SIZE(eigenvalues, 1)
382  ! Decide if matrix is suitable for ELPA to use QR
383  ! The definition of what is considered a suitable matrix depends on the ELPA version
384  ! The relevant ELPA files to check are
385  ! - Proper matrix order: src/elpa2/elpa2_template.F90
386  ! - Proper block size: test/Fortran/test.F90
387  ! Note that the names of these files might change in different ELPA versions
388  ! Matrix order must be even
389  use_qr = elpa_qr .AND. (modulo(n, 2) .EQ. 0)
390  ! Matrix order and block size must be greater than or equal to 64
391  IF (.NOT. elpa_qr_unsafe) &
392  use_qr = use_qr .AND. (n .GE. 64) .AND. (nblk .GE. 64)
393 
394  ! Check if eigenvalues computed with ELPA_QR_UNSAFE should be verified
395  IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_should_print) &
396  check_eigenvalues = .true.
397 
398  CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
399 
400  IF (check_eigenvalues) THEN
401  ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
402  ALLOCATE (eval_noqr(n))
403  CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
404  CALL cp_fm_to_fm(matrix, matrix_noqr)
405  CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
406  CALL cp_fm_upper_to_full(matrix_noqr, eigenvectors_noqr)
407  END IF
408 
409  IF (io_unit > 0 .AND. elpa_should_print) THEN
410  WRITE (unit=io_unit, fmt="(/,T2,A)") &
411  "ELPA| Matrix diagonalization information"
412 
413  ! Find name for given kernel id.
414  ! In case ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
415  kernel_name = "id: "//trim(adjustl(cp_to_string(elpa_kernel)))
416  DO i = 1, SIZE(elpa_kernel_ids)
417  IF (elpa_kernel_ids(i) == elpa_kernel) THEN
418  kernel_name = elpa_kernel_names(i)
419  END IF
420  END DO
421 
422  WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &
423  "ELPA| Matrix order (NA) ", n, &
424  "ELPA| Matrix block size (NBLK) ", nblk, &
425  "ELPA| Number of eigenvectors (NEV) ", neig, &
426  "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
427  "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
428  WRITE (unit=io_unit, fmt="(T2,A,T61,A20)") &
429  "ELPA| Kernel ", adjustr(trim(kernel_name))
430  IF (elpa_qr) THEN
431  WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
432  "ELPA| QR step requested ", "YES"
433  ELSE
434  WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
435  "ELPA| QR step requested ", "NO"
436  END IF
437 
438  IF (elpa_qr) THEN
439  IF (elpa_qr_unsafe) THEN
440  WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
441  "ELPA| Use potentially unsafe QR ", "YES"
442  ELSE
443  WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
444  "ELPA| Use potentially unsafe QR ", "NO"
445  END IF
446  IF (use_qr) THEN
447  WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
448  "ELPA| Matrix is suitable for QR ", "YES"
449  ELSE
450  WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
451  "ELPA| Matrix is suitable for QR ", "NO"
452  END IF
453  IF (.NOT. use_qr) THEN
454  IF (modulo(n, 2) /= 0) THEN
455  WRITE (unit=io_unit, fmt="(T2,A)") &
456  "ELPA| Matrix order is NOT even"
457  END IF
458  IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
459  WRITE (unit=io_unit, fmt="(T2,A)") &
460  "ELPA| Matrix block size is NOT 64 or greater"
461  END IF
462  ELSE
463  IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
464  WRITE (unit=io_unit, fmt="(T2,A)") &
465  "ELPA| Matrix block size check was bypassed"
466  END IF
467  END IF
468  END IF
469  END IF
470 
471  ! the full eigenvalues vector is needed
472  ALLOCATE (eval(n))
473 
474  elpa_obj => elpa_allocate()
475 
476  CALL elpa_obj%set("na", n, success)
477  cpassert(success == elpa_ok)
478 
479  CALL elpa_obj%set("nev", neig, success)
480  cpassert(success == elpa_ok)
481 
482  CALL elpa_obj%set("local_nrows", n_rows, success)
483  cpassert(success == elpa_ok)
484 
485  CALL elpa_obj%set("local_ncols", n_cols, success)
486  cpassert(success == elpa_ok)
487 
488  CALL elpa_obj%set("nblk", nblk, success)
489  cpassert(success == elpa_ok)
490 
491  CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
492  cpassert(success == elpa_ok)
493 
494  CALL elpa_obj%set("process_row", myprow, success)
495  cpassert(success == elpa_ok)
496 
497  CALL elpa_obj%set("process_col", mypcol, success)
498  cpassert(success == elpa_ok)
499 
500  success = elpa_obj%setup()
501  cpassert(success == elpa_ok)
502 
503  CALL elpa_obj%set("solver", elpa_solver_2stage, success)
504  cpassert(success == elpa_ok)
505 
506  ! enabling the GPU must happen before setting the kernel
507  IF (elpa_kernel == elpa_2stage_real_nvidia_gpu) THEN
508  CALL elpa_obj%set("nvidia-gpu", 1, success)
509  cpassert(success == elpa_ok)
510  END IF
511  IF (elpa_kernel == elpa_2stage_real_amd_gpu) THEN
512  CALL elpa_obj%set("amd-gpu", 1, success)
513  cpassert(success == elpa_ok)
514  END IF
515  IF (elpa_kernel == elpa_2stage_real_intel_gpu_sycl) THEN
516  CALL elpa_obj%set("intel-gpu", 1, success)
517  cpassert(success == elpa_ok)
518  END IF
519 
520  CALL elpa_obj%set("real_kernel", elpa_kernel, success)
521  IF (success /= elpa_ok) THEN
522  cpwarn("Setting real_kernel for ELPA failed")
523  END IF
524 
525  IF (use_qr) THEN
526  CALL elpa_obj%set("qr", 1, success)
527  cpassert(success == elpa_ok)
528  END IF
529 
530  ! Set number of threads only when ELPA was built with OpenMP support.
531  IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == elpa_ok) THEN
532  CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
533  cpassert(success == elpa_ok)
534  END IF
535 
536  CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
537  IF (success /= elpa_ok) &
538  cpabort("ELPA failed to diagonalize a matrix")
539 
540  IF (check_eigenvalues) THEN
541  ! run again without QR
542  CALL elpa_obj%set("qr", 0, success)
543  cpassert(success == elpa_ok)
544 
545  CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
546  IF (success /= elpa_ok) &
547  cpabort("ELPA failed to diagonalize a matrix even without QR decomposition")
548 
549  IF (any(abs(eval(1:neig) - eval_noqr(1:neig)) .GT. th)) &
550  cpabort("Eigenvalues calculated with QR decomp. in ELPA are wrong. Disable ELPA_QR_UNSAFE.")
551 
552  DEALLOCATE (eval_noqr)
553  CALL cp_fm_release(matrix_noqr)
554  CALL cp_fm_release(eigenvectors_noqr)
555  END IF
556 
557  CALL elpa_deallocate(elpa_obj, success)
558  cpassert(success == elpa_ok)
559 
560  eigenvalues(1:neig) = eval(1:neig)
561  DEALLOCATE (eval)
562 
563  CALL timestop(handle)
564 
565  END SUBROUTINE cp_fm_diag_elpa_base
566 #endif
567 
568 END MODULE cp_fm_elpa
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Definition: grid_common.h:117
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
basic linear algebra operations for full matrices
subroutine, public cp_fm_upper_to_full(matrix, work)
given an upper triangular matrix computes the corresponding full matrix
Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization....
subroutine, public cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
Redistributes eigenvectors and eigenvalues back to the original communicator group.
subroutine, public cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, caller_is_elpa, redist_info)
Determines the optimal number of CPUs for matrix diagonalization and redistributes the input matrices...
Wrapper for ELPA.
Definition: cp_fm_elpa.F:12
subroutine, public set_elpa_print(flag)
Sets a flag that determines if additional information about the ELPA diagonalization should be printe...
Definition: cp_fm_elpa.F:266
subroutine, public set_elpa_kernel(requested_kernel)
Sets the active ELPA kernel.
Definition: cp_fm_elpa.F:201
subroutine, public cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
Driver routine to diagonalize a FM matrix with the ELPA library.
Definition: cp_fm_elpa.F:278
subroutine, public set_elpa_qr(use_qr, use_qr_unsafe)
Sets flags that determines if ELPA should try to use QR during diagonalization If use_qr = ....
Definition: cp_fm_elpa.F:254
subroutine, public initialize_elpa_library()
Initialize the ELPA library.
Definition: cp_fm_elpa.F:177
subroutine, public finalize_elpa_library()
Finalize the ELPA library.
Definition: cp_fm_elpa.F:189
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
Definition: cp_fm_struct.F:409
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
subroutine, public cp_fm_write_info(matrix, io_unit)
Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
Definition: cp_fm_types.F:1048
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
integer, parameter, public default_string_length
Definition: kinds.F:57
Machine interface based on Fortran 2003 and POSIX.
Definition: machine.F:17
integer, parameter, public machine_x86_avx
Definition: machine.F:55
integer, parameter, public machine_x86_sse4
Definition: machine.F:55
integer, parameter, public machine_cpu_generic
Definition: machine.F:55
integer, parameter, public machine_x86_avx2
Definition: machine.F:55
pure integer function, public m_cpuid()
Target architecture or instruction set extension according to CPU-check at runtime.
Definition: machine.F:169
integer, parameter, public machine_x86
Definition: machine.F:55
Interface to the message passing library MPI.