(git:374b731)
Loading...
Searching...
No Matches
cp_fm_elpa.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Wrapper for ELPA
10!> \author Ole Schuett
11! **************************************************************************************************
14 USE machine, ONLY: m_cpuid, &
26 USE cp_fm_types, ONLY: cp_fm_type, &
34 USE kinds, ONLY: default_string_length, &
35 dp
37 USE omp_lib, ONLY: omp_get_max_threads
38
39#include "../base/base_uses.f90"
40
41#if defined (__ELPA)
42 USE elpa_constants, ONLY: elpa_2stage_real_invalid, &
43 elpa_2stage_real_default, &
44 elpa_2stage_real_generic, &
45 elpa_2stage_real_generic_simple, &
46 elpa_2stage_real_bgp, &
47 elpa_2stage_real_bgq, &
48 elpa_2stage_real_sse_assembly, &
49 elpa_2stage_real_sse_block2, &
50 elpa_2stage_real_sse_block4, &
51 elpa_2stage_real_sse_block6, &
52 elpa_2stage_real_avx_block2, &
53 elpa_2stage_real_avx_block4, &
54 elpa_2stage_real_avx_block6, &
55 elpa_2stage_real_avx2_block2, &
56 elpa_2stage_real_avx2_block4, &
57 elpa_2stage_real_avx2_block6, &
58 elpa_2stage_real_avx512_block2, &
59 elpa_2stage_real_avx512_block4, &
60 elpa_2stage_real_avx512_block6, &
61 elpa_2stage_real_nvidia_gpu, &
62 elpa_2stage_real_amd_gpu, &
63 elpa_2stage_real_intel_gpu_sycl
64
65 USE elpa, ONLY: elpa_t, elpa_solver_2stage, &
66 elpa_init, elpa_uninit, &
67 elpa_allocate, elpa_deallocate, elpa_ok
68#endif
69
70 IMPLICIT NONE
71
72 PRIVATE
73
74 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
75
76#if defined(__ELPA)
77 INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
78 elpa_2stage_real_invalid, & ! auto
79 elpa_2stage_real_generic, &
80 elpa_2stage_real_generic_simple, &
81 elpa_2stage_real_bgp, &
82 elpa_2stage_real_bgq, &
83 elpa_2stage_real_sse_assembly, &
84 elpa_2stage_real_sse_block2, &
85 elpa_2stage_real_sse_block4, &
86 elpa_2stage_real_sse_block6, &
87 elpa_2stage_real_avx_block2, &
88 elpa_2stage_real_avx_block4, &
89 elpa_2stage_real_avx_block6, &
90 elpa_2stage_real_avx2_block2, &
91 elpa_2stage_real_avx2_block4, &
92 elpa_2stage_real_avx2_block6, &
93 elpa_2stage_real_avx512_block2, &
94 elpa_2stage_real_avx512_block4, &
95 elpa_2stage_real_avx512_block6, &
96 elpa_2stage_real_nvidia_gpu, &
97 elpa_2stage_real_amd_gpu, &
98 elpa_2stage_real_intel_gpu_sycl]
99
100 CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
101 elpa_kernel_names = [character(len=14) :: &
102 "AUTO", &
103 "GENERIC", &
104 "GENERIC_SIMPLE", &
105 "BGP", &
106 "BGQ", &
107 "SSE", &
108 "SSE_BLOCK2", &
109 "SSE_BLOCK4", &
110 "SSE_BLOCK6", &
111 "AVX_BLOCK2", &
112 "AVX_BLOCK4", &
113 "AVX_BLOCK6", &
114 "AVX2_BLOCK2", &
115 "AVX2_BLOCK4", &
116 "AVX2_BLOCK6", &
117 "AVX512_BLOCK2", &
118 "AVX512_BLOCK4", &
119 "AVX512_BLOCK6", &
120 "NVIDIA_GPU", &
121 "AMD_GPU", &
122 "INTEL_GPU"]
123
124 CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
125 elpa_kernel_descriptions = [character(len=44) :: &
126 "Automatically selected kernel", &
127 "Generic kernel", &
128 "Simplified generic kernel", &
129 "Kernel optimized for IBM BGP", &
130 "Kernel optimized for IBM BGQ", &
131 "Kernel optimized for x86_64/SSE", &
132 "Kernel optimized for x86_64/SSE (block=2)", &
133 "Kernel optimized for x86_64/SSE (block=4)", &
134 "Kernel optimized for x86_64/SSE (block=6)", &
135 "Kernel optimized for Intel AVX (block=2)", &
136 "Kernel optimized for Intel AVX (block=4)", &
137 "Kernel optimized for Intel AVX (block=6)", &
138 "Kernel optimized for Intel AVX2 (block=2)", &
139 "Kernel optimized for Intel AVX2 (block=4)", &
140 "Kernel optimized for Intel AVX2 (block=6)", &
141 "Kernel optimized for Intel AVX-512 (block=2)", &
142 "Kernel optimized for Intel AVX-512 (block=4)", &
143 "Kernel optimized for Intel AVX-512 (block=6)", &
144 "Kernel targeting Nvidia GPUs", &
145 "Kernel targeting AMD GPUs", &
146 "Kernel targeting Intel GPUs"]
147
148#else
149 INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
150 CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
151 CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
152#endif
153
154#if defined(__ELPA)
155 INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
156#endif
157 LOGICAL, SAVE :: elpa_qr = .false., &
158 elpa_qr_unsafe = .false., &
159 elpa_should_print = .false.
160
161 PUBLIC :: cp_fm_diag_elpa, &
163 set_elpa_qr, &
165 elpa_kernel_ids, &
166 elpa_kernel_names, &
167 elpa_kernel_descriptions, &
170
171CONTAINS
172
173! **************************************************************************************************
174!> \brief Initialize the ELPA library
175! **************************************************************************************************
177#if defined(__ELPA)
178 IF (elpa_init(20180525) /= elpa_ok) &
179 cpabort("The linked ELPA library does not support the required API version")
180#else
181 cpabort("Initialization of ELPA library requested but not enabled during build")
182#endif
183 END SUBROUTINE
184
185! **************************************************************************************************
186!> \brief Finalize the ELPA library
187! **************************************************************************************************
189#if defined(__ELPA)
190 CALL elpa_uninit()
191#else
192 cpabort("Finalization of ELPA library requested but not enabled during build")
193#endif
194 END SUBROUTINE
195
196! **************************************************************************************************
197!> \brief Sets the active ELPA kernel.
198!> \param requested_kernel one of the elpa_kernel_ids
199! **************************************************************************************************
200 SUBROUTINE set_elpa_kernel(requested_kernel)
201 INTEGER, INTENT(IN) :: requested_kernel
202
203#if defined (__ELPA)
204 INTEGER :: cpuid
205
206 elpa_kernel = requested_kernel
207
208 ! Resolve AUTO kernel.
209 IF (elpa_kernel == elpa_2stage_real_invalid) THEN
210 cpuid = m_cpuid()
211 IF ((machine_cpu_generic .LT. cpuid) .AND. (cpuid .LE. machine_x86)) THEN
212 SELECT CASE (cpuid)
213 CASE (machine_x86_sse4)
214 elpa_kernel = elpa_2stage_real_sse_block4
215 CASE (machine_x86_avx)
216 elpa_kernel = elpa_2stage_real_avx_block4
217 CASE (machine_x86_avx2)
218 elpa_kernel = elpa_2stage_real_avx2_block4
219 CASE DEFAULT
220 elpa_kernel = elpa_2stage_real_avx512_block4
221 END SELECT
222 END IF
223
224 ! Prefer GPU kernel if available.
225#if defined (__ELPA_NVIDIA_GPU)
226 elpa_kernel = elpa_2stage_real_nvidia_gpu
227#endif
228#if defined (__ELPA_AMD_GPU)
229 elpa_kernel = elpa_2stage_real_amd_gpu
230#endif
231#if defined (__ELPA_INTEL_GPU)
232 elpa_kernel = elpa_2stage_real_intel_gpu_sycl
233#endif
234
235 ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
236 IF (elpa_kernel == elpa_2stage_real_invalid) THEN
237 elpa_kernel = elpa_2stage_real_default
238 END IF
239 END IF
240#else
241 mark_used(requested_kernel)
242#endif
243 END SUBROUTINE set_elpa_kernel
244
245! **************************************************************************************************
246!> \brief Sets flags that determines if ELPA should try to use QR during diagonalization
247!> If use_qr = .TRUE., the QR step is performed only if the size of the input matrix is
248!> suitable. Check cp_fm_diag_elpa for further details.
249!> \param use_qr the logical flag
250!> \param use_qr_unsafe logical which determines if block size checks should be bypassed for some
251!> ELPA versions, potentially leading to incorrect eigenvalues
252! **************************************************************************************************
253 SUBROUTINE set_elpa_qr(use_qr, use_qr_unsafe)
254 LOGICAL, INTENT(IN) :: use_qr, use_qr_unsafe
255
256 elpa_qr = use_qr
257 elpa_qr_unsafe = use_qr_unsafe
258 END SUBROUTINE set_elpa_qr
259
260! **************************************************************************************************
261!> \brief Sets a flag that determines if additional information about the ELPA diagonalization
262!> should be printed when the diagonalization routine is called.
263!> \param flag the logical flag
264! **************************************************************************************************
265 SUBROUTINE set_elpa_print(flag)
266 LOGICAL, INTENT(IN) :: flag
267
268 elpa_should_print = flag
269 END SUBROUTINE set_elpa_print
270
271! **************************************************************************************************
272!> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
273!> \param matrix the matrix that is diagonalized
274!> \param eigenvectors eigenvectors of the input matrix
275!> \param eigenvalues eigenvalues of the input matrix
276! **************************************************************************************************
277 SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
278 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
279 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
280
281#if defined(__ELPA)
282 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_diag_elpa'
283
284 INTEGER :: handle
285 TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
286 TYPE(cp_fm_redistribute_info) :: rdinfo
287
288 CALL timeset(routinen, handle)
289
290 ! Determine if the input matrix needs to be redistributed before diagonalization.
291 ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
292 ! The redistributed matrix is stored in matrix_new, which is just a pointer
293 ! to the original matrix if no redistribution is required.
294 ! With ELPA, we have to make sure that all processor columns have nonzero width
295 CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
296 caller_is_elpa=.true., redist_info=rdinfo)
297
298 ! Call ELPA on CPUs that hold the new matrix
299 IF (ASSOCIATED(matrix_new%matrix_struct)) &
300 CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
301
302 ! Redistribute results and clean up
303 CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
304
305 CALL timestop(handle)
306#else
307 mark_used(matrix)
308 mark_used(eigenvectors)
309 mark_used(eigenvalues)
310
311 cpabort("CP2K compiled without the ELPA library.")
312#endif
313 END SUBROUTINE cp_fm_diag_elpa
314
315#if defined(__ELPA)
316! **************************************************************************************************
317!> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
318!> \param matrix the matrix that is diagonalized
319!> \param eigenvectors eigenvectors of the input matrix
320!> \param eigenvalues eigenvalues of the input matrix
321!> \param rdinfo ...
322! **************************************************************************************************
323 SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
324
325 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
326 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
327 TYPE(cp_fm_redistribute_info), INTENT(IN) :: rdinfo
328
329 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_diag_elpa_base'
330
331 INTEGER :: handle
332
333 CLASS(elpa_t), POINTER :: elpa_obj
334 CHARACTER(len=default_string_length) :: kernel_name
335 TYPE(mp_comm_type) :: group
336 INTEGER :: i, &
337 mypcol, myprow, n, &
338 n_rows, n_cols, &
339 nblk, neig, io_unit, &
340 success
341 LOGICAL :: use_qr, check_eigenvalues
342 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
343 TYPE(cp_blacs_env_type), POINTER :: context
344 TYPE(cp_fm_type) :: matrix_noqr, eigenvectors_noqr
345 TYPE(cp_logger_type), POINTER :: logger
346 REAL(kind=dp), PARAMETER :: th = 1.0e-14_dp
347 INTEGER, DIMENSION(:), POINTER :: ncol_locals
348
349 CALL timeset(routinen, handle)
350 NULLIFY (logger)
351 NULLIFY (ncol_locals)
352
353 check_eigenvalues = .false.
354
355 logger => cp_get_default_logger()
356 io_unit = cp_logger_get_default_io_unit(logger)
357
358 n = matrix%matrix_struct%nrow_global
359 context => matrix%matrix_struct%context
360 group = matrix%matrix_struct%para_env
361
362 myprow = context%mepos(1)
363 mypcol = context%mepos(2)
364
365 ! elpa needs the full matrix
366 CALL cp_fm_upper_to_full(matrix, eigenvectors)
367
368 CALL cp_fm_struct_get(matrix%matrix_struct, &
369 local_leading_dimension=n_rows, &
370 ncol_local=n_cols, &
371 nrow_block=nblk, &
372 ncol_locals=ncol_locals)
373
374 ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
375 IF (io_unit > 0 .AND. any(ncol_locals == 0)) THEN
376 CALL rdinfo%write(io_unit)
377 CALL cp_fm_write_info(matrix, io_unit)
378 cpabort("ELPA [pre-fail]: Problem contains processor column with zero width.")
379 END IF
380
381 neig = SIZE(eigenvalues, 1)
382 ! Decide if matrix is suitable for ELPA to use QR
383 ! The definition of what is considered a suitable matrix depends on the ELPA version
384 ! The relevant ELPA files to check are
385 ! - Proper matrix order: src/elpa2/elpa2_template.F90
386 ! - Proper block size: test/Fortran/test.F90
387 ! Note that the names of these files might change in different ELPA versions
388 ! Matrix order must be even
389 use_qr = elpa_qr .AND. (modulo(n, 2) .EQ. 0)
390 ! Matrix order and block size must be greater than or equal to 64
391 IF (.NOT. elpa_qr_unsafe) &
392 use_qr = use_qr .AND. (n .GE. 64) .AND. (nblk .GE. 64)
393
394 ! Check if eigenvalues computed with ELPA_QR_UNSAFE should be verified
395 IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_should_print) &
396 check_eigenvalues = .true.
397
398 CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
399
400 IF (check_eigenvalues) THEN
401 ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
402 ALLOCATE (eval_noqr(n))
403 CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
404 CALL cp_fm_to_fm(matrix, matrix_noqr)
405 CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
406 CALL cp_fm_upper_to_full(matrix_noqr, eigenvectors_noqr)
407 END IF
408
409 IF (io_unit > 0 .AND. elpa_should_print) THEN
410 WRITE (unit=io_unit, fmt="(/,T2,A)") &
411 "ELPA| Matrix diagonalization information"
412
413 ! Find name for given kernel id.
414 ! In case ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
415 kernel_name = "id: "//trim(adjustl(cp_to_string(elpa_kernel)))
416 DO i = 1, SIZE(elpa_kernel_ids)
417 IF (elpa_kernel_ids(i) == elpa_kernel) THEN
418 kernel_name = elpa_kernel_names(i)
419 END IF
420 END DO
421
422 WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &
423 "ELPA| Matrix order (NA) ", n, &
424 "ELPA| Matrix block size (NBLK) ", nblk, &
425 "ELPA| Number of eigenvectors (NEV) ", neig, &
426 "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
427 "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
428 WRITE (unit=io_unit, fmt="(T2,A,T61,A20)") &
429 "ELPA| Kernel ", adjustr(trim(kernel_name))
430 IF (elpa_qr) THEN
431 WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
432 "ELPA| QR step requested ", "YES"
433 ELSE
434 WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
435 "ELPA| QR step requested ", "NO"
436 END IF
437
438 IF (elpa_qr) THEN
439 IF (elpa_qr_unsafe) THEN
440 WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
441 "ELPA| Use potentially unsafe QR ", "YES"
442 ELSE
443 WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
444 "ELPA| Use potentially unsafe QR ", "NO"
445 END IF
446 IF (use_qr) THEN
447 WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
448 "ELPA| Matrix is suitable for QR ", "YES"
449 ELSE
450 WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
451 "ELPA| Matrix is suitable for QR ", "NO"
452 END IF
453 IF (.NOT. use_qr) THEN
454 IF (modulo(n, 2) /= 0) THEN
455 WRITE (unit=io_unit, fmt="(T2,A)") &
456 "ELPA| Matrix order is NOT even"
457 END IF
458 IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
459 WRITE (unit=io_unit, fmt="(T2,A)") &
460 "ELPA| Matrix block size is NOT 64 or greater"
461 END IF
462 ELSE
463 IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
464 WRITE (unit=io_unit, fmt="(T2,A)") &
465 "ELPA| Matrix block size check was bypassed"
466 END IF
467 END IF
468 END IF
469 END IF
470
471 ! the full eigenvalues vector is needed
472 ALLOCATE (eval(n))
473
474 elpa_obj => elpa_allocate()
475
476 CALL elpa_obj%set("na", n, success)
477 cpassert(success == elpa_ok)
478
479 CALL elpa_obj%set("nev", neig, success)
480 cpassert(success == elpa_ok)
481
482 CALL elpa_obj%set("local_nrows", n_rows, success)
483 cpassert(success == elpa_ok)
484
485 CALL elpa_obj%set("local_ncols", n_cols, success)
486 cpassert(success == elpa_ok)
487
488 CALL elpa_obj%set("nblk", nblk, success)
489 cpassert(success == elpa_ok)
490
491 CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
492 cpassert(success == elpa_ok)
493
494 CALL elpa_obj%set("process_row", myprow, success)
495 cpassert(success == elpa_ok)
496
497 CALL elpa_obj%set("process_col", mypcol, success)
498 cpassert(success == elpa_ok)
499
500 success = elpa_obj%setup()
501 cpassert(success == elpa_ok)
502
503 CALL elpa_obj%set("solver", elpa_solver_2stage, success)
504 cpassert(success == elpa_ok)
505
506 ! enabling the GPU must happen before setting the kernel
507 IF (elpa_kernel == elpa_2stage_real_nvidia_gpu) THEN
508 CALL elpa_obj%set("nvidia-gpu", 1, success)
509 cpassert(success == elpa_ok)
510 END IF
511 IF (elpa_kernel == elpa_2stage_real_amd_gpu) THEN
512 CALL elpa_obj%set("amd-gpu", 1, success)
513 cpassert(success == elpa_ok)
514 END IF
515 IF (elpa_kernel == elpa_2stage_real_intel_gpu_sycl) THEN
516 CALL elpa_obj%set("intel-gpu", 1, success)
517 cpassert(success == elpa_ok)
518 END IF
519
520 CALL elpa_obj%set("real_kernel", elpa_kernel, success)
521 IF (success /= elpa_ok) THEN
522 cpwarn("Setting real_kernel for ELPA failed")
523 END IF
524
525 IF (use_qr) THEN
526 CALL elpa_obj%set("qr", 1, success)
527 cpassert(success == elpa_ok)
528 END IF
529
530 ! Set number of threads only when ELPA was built with OpenMP support.
531 IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == elpa_ok) THEN
532 CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
533 cpassert(success == elpa_ok)
534 END IF
535
536 CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
537 IF (success /= elpa_ok) &
538 cpabort("ELPA failed to diagonalize a matrix")
539
540 IF (check_eigenvalues) THEN
541 ! run again without QR
542 CALL elpa_obj%set("qr", 0, success)
543 cpassert(success == elpa_ok)
544
545 CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
546 IF (success /= elpa_ok) &
547 cpabort("ELPA failed to diagonalize a matrix even without QR decomposition")
548
549 IF (any(abs(eval(1:neig) - eval_noqr(1:neig)) .GT. th)) &
550 cpabort("Eigenvalues calculated with QR decomp. in ELPA are wrong. Disable ELPA_QR_UNSAFE.")
551
552 DEALLOCATE (eval_noqr)
553 CALL cp_fm_release(matrix_noqr)
554 CALL cp_fm_release(eigenvectors_noqr)
555 END IF
556
557 CALL elpa_deallocate(elpa_obj, success)
558 cpassert(success == elpa_ok)
559
560 eigenvalues(1:neig) = eval(1:neig)
561 DEALLOCATE (eval)
562
563 CALL timestop(handle)
564
565 END SUBROUTINE cp_fm_diag_elpa_base
566#endif
567
568END MODULE cp_fm_elpa
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
methods related to the blacs parallel environment
basic linear algebra operations for full matrices
subroutine, public cp_fm_upper_to_full(matrix, work)
given an upper triangular matrix computes the corresponding full matrix
Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization....
subroutine, public cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
Redistributes eigenvectors and eigenvalues back to the original communicator group.
subroutine, public cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, caller_is_elpa, redist_info)
Determines the optimal number of CPUs for matrix diagonalization and redistributes the input matrices...
Wrapper for ELPA.
Definition cp_fm_elpa.F:12
subroutine, public set_elpa_print(flag)
Sets a flag that determines if additional information about the ELPA diagonalization should be printe...
Definition cp_fm_elpa.F:266
subroutine, public set_elpa_kernel(requested_kernel)
Sets the active ELPA kernel.
Definition cp_fm_elpa.F:201
subroutine, public cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
Driver routine to diagonalize a FM matrix with the ELPA library.
Definition cp_fm_elpa.F:278
subroutine, public set_elpa_qr(use_qr, use_qr_unsafe)
Sets flags that determines if ELPA should try to use QR during diagonalization If use_qr = ....
Definition cp_fm_elpa.F:254
subroutine, public initialize_elpa_library()
Initialize the ELPA library.
Definition cp_fm_elpa.F:177
subroutine, public finalize_elpa_library()
Finalize the ELPA library.
Definition cp_fm_elpa.F:189
represent the structure of a full matrix
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_write_info(matrix, io_unit)
Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
integer, parameter, public machine_x86_avx
Definition machine.F:55
integer, parameter, public machine_x86_sse4
Definition machine.F:55
integer, parameter, public machine_cpu_generic
Definition machine.F:55
integer, parameter, public machine_x86_avx2
Definition machine.F:55
pure integer function, public m_cpuid()
Target architecture or instruction set extension according to CPU-check at runtime.
Definition machine.F:169
integer, parameter, public machine_x86
Definition machine.F:55
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
represent a full matrix
type of a logger, at the moment it contains just a print level starting at which level it should be l...