(git:23d6dbe)
Loading...
Searching...
No Matches
cp_fm_elpa.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Wrapper for ELPA
10!> \author Ole Schuett
11! **************************************************************************************************
14 USE machine, ONLY: m_cpuid, &
26 USE cp_fm_types, ONLY: cp_fm_type, &
36 USE omp_lib, ONLY: omp_get_max_threads
37#if defined(__HAS_IEEE_EXCEPTIONS)
38 USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
39 ieee_set_halting_mode, &
40 ieee_all
41#endif
42#include "../base/base_uses.f90"
43
44#if defined(__ELPA)
45 USE elpa_constants, ONLY: elpa_solver_1stage, elpa_solver_2stage, elpa_ok, &
46 elpa_2stage_real_invalid, &
47 elpa_2stage_real_default, &
48 elpa_2stage_real_generic, &
49 elpa_2stage_real_generic_simple, &
50 elpa_2stage_real_bgp, &
51 elpa_2stage_real_bgq, &
52 elpa_2stage_real_sse_assembly, &
53 elpa_2stage_real_sse_block2, &
54 elpa_2stage_real_sse_block4, &
55 elpa_2stage_real_sse_block6, &
56 elpa_2stage_real_avx_block2, &
57 elpa_2stage_real_avx_block4, &
58 elpa_2stage_real_avx_block6, &
59 elpa_2stage_real_avx2_block2, &
60 elpa_2stage_real_avx2_block4, &
61 elpa_2stage_real_avx2_block6, &
62 elpa_2stage_real_avx512_block2, &
63 elpa_2stage_real_avx512_block4, &
64 elpa_2stage_real_avx512_block6, &
65 elpa_2stage_real_nvidia_gpu, &
66 elpa_2stage_real_amd_gpu, &
67 elpa_2stage_real_intel_gpu_sycl
68
69 USE elpa, ONLY: elpa_t, elpa_init, elpa_uninit, &
70 elpa_allocate, elpa_deallocate
71#endif
72
73 IMPLICIT NONE
74
75 PRIVATE
76
77 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
78
79#if defined(__ELPA)
80 INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
81 elpa_2stage_real_invalid, & ! auto
82 elpa_2stage_real_generic, &
83 elpa_2stage_real_generic_simple, &
84 elpa_2stage_real_bgp, &
85 elpa_2stage_real_bgq, &
86 elpa_2stage_real_sse_assembly, &
87 elpa_2stage_real_sse_block2, &
88 elpa_2stage_real_sse_block4, &
89 elpa_2stage_real_sse_block6, &
90 elpa_2stage_real_avx_block2, &
91 elpa_2stage_real_avx_block4, &
92 elpa_2stage_real_avx_block6, &
93 elpa_2stage_real_avx2_block2, &
94 elpa_2stage_real_avx2_block4, &
95 elpa_2stage_real_avx2_block6, &
96 elpa_2stage_real_avx512_block2, &
97 elpa_2stage_real_avx512_block4, &
98 elpa_2stage_real_avx512_block6, &
99 elpa_2stage_real_nvidia_gpu, &
100 elpa_2stage_real_amd_gpu, &
101 elpa_2stage_real_intel_gpu_sycl]
102
103 CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
104 elpa_kernel_names = [character(len=14) :: &
105 "AUTO", &
106 "GENERIC", &
107 "GENERIC_SIMPLE", &
108 "BGP", &
109 "BGQ", &
110 "SSE", &
111 "SSE_BLOCK2", &
112 "SSE_BLOCK4", &
113 "SSE_BLOCK6", &
114 "AVX_BLOCK2", &
115 "AVX_BLOCK4", &
116 "AVX_BLOCK6", &
117 "AVX2_BLOCK2", &
118 "AVX2_BLOCK4", &
119 "AVX2_BLOCK6", &
120 "AVX512_BLOCK2", &
121 "AVX512_BLOCK4", &
122 "AVX512_BLOCK6", &
123 "NVIDIA_GPU", &
124 "AMD_GPU", &
125 "INTEL_GPU"]
126
127 CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
128 elpa_kernel_descriptions = [character(len=44) :: &
129 "Automatically selected kernel", &
130 "Generic kernel", &
131 "Simplified generic kernel", &
132 "Kernel optimized for IBM BGP", &
133 "Kernel optimized for IBM BGQ", &
134 "Kernel optimized for x86_64/SSE", &
135 "Kernel optimized for x86_64/SSE (block=2)", &
136 "Kernel optimized for x86_64/SSE (block=4)", &
137 "Kernel optimized for x86_64/SSE (block=6)", &
138 "Kernel optimized for Intel AVX (block=2)", &
139 "Kernel optimized for Intel AVX (block=4)", &
140 "Kernel optimized for Intel AVX (block=6)", &
141 "Kernel optimized for Intel AVX2 (block=2)", &
142 "Kernel optimized for Intel AVX2 (block=4)", &
143 "Kernel optimized for Intel AVX2 (block=6)", &
144 "Kernel optimized for Intel AVX-512 (block=2)", &
145 "Kernel optimized for Intel AVX-512 (block=4)", &
146 "Kernel optimized for Intel AVX-512 (block=6)", &
147 "Kernel targeting Nvidia GPUs", &
148 "Kernel targeting AMD GPUs", &
149 "Kernel targeting Intel GPUs"]
150#else
151 INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
152 CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
153 CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
154#endif
155
156#if defined(__ELPA)
157 INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
158#endif
159
160 ! elpa_qr_unsafe: disable block size limitations
161 LOGICAL, SAVE :: elpa_qr_unsafe = .true., &
162 elpa_print = .false., &
163 elpa_qr = .false.
164
165#if defined(__OFFLOAD_OPENCL)
166 LOGICAL, SAVE :: elpa_one_stage = .true.
167#else
168 LOGICAL, SAVE :: elpa_one_stage = .false.
169#endif
170
171 PUBLIC :: cp_fm_diag_elpa, &
174 elpa_print, &
175 elpa_qr, &
176 elpa_kernel_ids, &
177 elpa_kernel_names, &
178 elpa_kernel_descriptions, &
181
182CONTAINS
183
184! **************************************************************************************************
185!> \brief Initialize the ELPA library
186!> \param one_stage ...
187!> \param qr ...
188!> \param should_print flag that determines if additional information
189!> is printed when the diagonalization routine is called.
190! **************************************************************************************************
191 SUBROUTINE initialize_elpa_library(one_stage, qr, should_print)
192 LOGICAL, INTENT(IN), OPTIONAL :: one_stage, qr, should_print
193
194#if defined(__ELPA)
195 IF (elpa_init(20180525) /= elpa_ok) &
196 cpabort("The linked ELPA library does not support the required API version")
197 IF (PRESENT(one_stage)) elpa_one_stage = one_stage
198 IF (PRESENT(should_print)) elpa_print = should_print
199 IF (PRESENT(qr)) elpa_qr = qr
200#else
201 mark_used(one_stage)
202 mark_used(qr)
203 mark_used(should_print)
204 cpabort("Initialization of ELPA library requested but not enabled during build")
205#endif
206 END SUBROUTINE initialize_elpa_library
207
208! **************************************************************************************************
209!> \brief Finalize the ELPA library
210! **************************************************************************************************
212#if defined(__ELPA)
213 CALL elpa_uninit()
214#else
215 cpabort("Finalization of ELPA library requested but not enabled during build")
216#endif
217 END SUBROUTINE finalize_elpa_library
218
219! **************************************************************************************************
220!> \brief Sets the active ELPA kernel.
221!> \param requested_kernel one of the elpa_kernel_ids
222! **************************************************************************************************
223 SUBROUTINE set_elpa_kernel(requested_kernel)
224 INTEGER, INTENT(IN) :: requested_kernel
225
226#if defined(__ELPA)
227 INTEGER :: cpuid
228
229 elpa_kernel = requested_kernel
230
231 ! Resolve AUTO kernel.
232 IF (elpa_kernel == elpa_2stage_real_invalid) THEN
233 cpuid = m_cpuid()
234 IF ((machine_cpu_generic < cpuid) .AND. (cpuid <= machine_x86)) THEN
235 SELECT CASE (cpuid)
236 CASE (machine_x86_sse4)
237 elpa_kernel = elpa_2stage_real_sse_block4
238 CASE (machine_x86_avx)
239 elpa_kernel = elpa_2stage_real_avx_block4
240 CASE (machine_x86_avx2)
241 elpa_kernel = elpa_2stage_real_avx2_block4
242 CASE DEFAULT
243 elpa_kernel = elpa_2stage_real_avx512_block4
244 END SELECT
245 END IF
246
247 ! Prefer GPU kernel if available.
248#if !defined(__NO_OFFLOAD_ELPA)
249#if defined(__OFFLOAD_CUDA)
250 elpa_kernel = elpa_2stage_real_nvidia_gpu
251#endif
252#if defined(__OFFLOAD_HIP)
253 elpa_kernel = elpa_2stage_real_amd_gpu
254#endif
255#if defined(__OFFLOAD_OPENCL)
256 elpa_kernel = elpa_2stage_real_intel_gpu_sycl
257#endif
258#endif
259 ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
260 IF (elpa_kernel == elpa_2stage_real_invalid) THEN
261 elpa_kernel = elpa_2stage_real_default
262 END IF
263 END IF
264#else
265 mark_used(requested_kernel)
266#endif
267 END SUBROUTINE set_elpa_kernel
268
269! **************************************************************************************************
270!> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
271!> \param matrix the matrix that is diagonalized
272!> \param eigenvectors eigenvectors of the input matrix
273!> \param eigenvalues eigenvalues of the input matrix
274! **************************************************************************************************
275 SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
276 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
277 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
278
279#if defined(__ELPA)
280 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_diag_elpa'
281
282 INTEGER :: handle
283 TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
284 TYPE(cp_fm_redistribute_info) :: rdinfo
285
286 CALL timeset(routinen, handle)
287
288 ! Determine if the input matrix needs to be redistributed before diagonalization.
289 ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
290 ! The redistributed matrix is stored in matrix_new, which is just a pointer
291 ! to the original matrix if no redistribution is required.
292 ! With ELPA, we have to make sure that all processor columns have nonzero width
293 CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
294 caller_is_elpa=.true., redist_info=rdinfo)
295
296 ! Call ELPA on CPUs that hold the new matrix
297 IF (ASSOCIATED(matrix_new%matrix_struct)) &
298 CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
299
300 ! Redistribute results and clean up
301 CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
302
303 CALL timestop(handle)
304#else
305 eigenvalues = 0
306 mark_used(matrix)
307 mark_used(eigenvectors)
308
309 cpabort("CP2K compiled without the ELPA library.")
310#endif
311 END SUBROUTINE cp_fm_diag_elpa
312
313#if defined(__ELPA)
314! **************************************************************************************************
315!> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
316!> \param matrix the matrix that is diagonalized
317!> \param eigenvectors eigenvectors of the input matrix
318!> \param eigenvalues eigenvalues of the input matrix
319!> \param rdinfo ...
320! **************************************************************************************************
321 SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
322
323 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
324 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
325 TYPE(cp_fm_redistribute_info), INTENT(IN) :: rdinfo
326
327 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_diag_elpa_base'
328
329 INTEGER :: handle
330
331 CLASS(elpa_t), POINTER :: elpa_obj
332 CHARACTER(len=default_string_length) :: kernel_name
333 TYPE(mp_comm_type) :: group
334 INTEGER :: i, &
335 mypcol, myprow, n, &
336 n_rows, n_cols, &
337 nblk, neig, io_unit, &
338 success
339 LOGICAL :: use_qr, check_eigenvalues
340 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
341 TYPE(cp_blacs_env_type), POINTER :: context
342 TYPE(cp_fm_type) :: matrix_noqr, eigenvectors_noqr
343 TYPE(cp_logger_type), POINTER :: logger
344 REAL(kind=dp), PARAMETER :: th = 1.0e-14_dp
345 INTEGER, DIMENSION(:), POINTER :: ncol_locals
346#if defined(__HAS_IEEE_EXCEPTIONS)
347 LOGICAL, DIMENSION(5) :: halt
348#endif
349
350 CALL timeset(routinen, handle)
351 NULLIFY (logger)
352 NULLIFY (ncol_locals)
353
354 check_eigenvalues = .false.
355
356 logger => cp_get_default_logger()
357 io_unit = cp_logger_get_default_io_unit(logger)
358
359 n = matrix%matrix_struct%nrow_global
360 context => matrix%matrix_struct%context
361 group = matrix%matrix_struct%para_env
362
363 myprow = context%mepos(1)
364 mypcol = context%mepos(2)
365
366 ! elpa needs the full matrix
367 CALL cp_fm_uplo_to_full(matrix, eigenvectors)
368
369 CALL cp_fm_struct_get(matrix%matrix_struct, &
370 local_leading_dimension=n_rows, &
371 ncol_local=n_cols, &
372 nrow_block=nblk, &
373 ncol_locals=ncol_locals)
374
375 ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
376 IF (io_unit > 0 .AND. any(ncol_locals == 0)) THEN
377 CALL rdinfo%write(io_unit)
378 CALL cp_fm_write_info(matrix, io_unit)
379 cpabort("ELPA [pre-fail]: Problem contains processor column with zero width.")
380 END IF
381
382 neig = SIZE(eigenvalues, 1)
383 ! Decide if matrix is suitable for ELPA to use QR
384 ! The definition of what is considered a suitable matrix depends on the ELPA version
385 ! The relevant ELPA files to check are
386 ! - Proper matrix order: src/elpa2/elpa2_template.F90
387 ! - Proper block size: test/Fortran/test.F90
388 ! Note that the names of these files might change in different ELPA versions
389 ! Matrix order must be even
390 use_qr = elpa_qr .AND. (modulo(n, 2) == 0)
391 ! Matrix order and block size must be greater than or equal to 64
392 IF (.NOT. elpa_qr_unsafe) &
393 use_qr = use_qr .AND. (n >= 64) .AND. (nblk >= 64)
394
395 ! Check if eigenvalues computed with elpa_qr_unsafe should be verified
396 IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_print) &
397 check_eigenvalues = .true.
398
399 CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
400
401 IF (check_eigenvalues) THEN
402 ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
403 ALLOCATE (eval_noqr(n))
404 CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
405 CALL cp_fm_to_fm(matrix, matrix_noqr)
406 CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
407 CALL cp_fm_uplo_to_full(matrix_noqr, eigenvectors_noqr)
408 END IF
409
410 IF (io_unit > 0 .AND. elpa_print) THEN
411 WRITE (unit=io_unit, fmt="(/,T2,A)") &
412 "ELPA| Matrix diagonalization information"
413
414 ! Find name for given kernel id.
415 ! In case of ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
416 kernel_name = "id: "//trim(adjustl(cp_to_string(elpa_kernel)))
417 DO i = 1, SIZE(elpa_kernel_ids)
418 IF (elpa_kernel_ids(i) == elpa_kernel) THEN
419 kernel_name = elpa_kernel_names(i)
420 END IF
421 END DO
422
423 WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &
424 "ELPA| Matrix order (NA) ", n, &
425 "ELPA| Matrix block size (NBLK) ", nblk, &
426 "ELPA| Number of eigenvectors (NEV) ", neig, &
427 "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
428 "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
429 WRITE (unit=io_unit, fmt="(T2,A,T61,A20)") &
430 "ELPA| Kernel ", adjustr(trim(kernel_name))
431 IF (elpa_qr) THEN
432 WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
433 "ELPA| QR step requested ", "YES"
434 ELSE
435 WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
436 "ELPA| QR step requested ", "NO"
437 END IF
438
439 IF (elpa_qr) THEN
440 IF (use_qr) THEN
441 WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
442 "ELPA| Matrix is suitable for QR ", "YES"
443 ELSE
444 WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
445 "ELPA| Matrix is suitable for QR ", "NO"
446 END IF
447 IF (.NOT. use_qr) THEN
448 IF (modulo(n, 2) /= 0) THEN
449 WRITE (unit=io_unit, fmt="(T2,A)") &
450 "ELPA| Matrix order is NOT even"
451 END IF
452 IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
453 WRITE (unit=io_unit, fmt="(T2,A)") &
454 "ELPA| Matrix block size is NOT 64 or greater"
455 END IF
456 ELSE
457 IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
458 WRITE (unit=io_unit, fmt="(T2,A)") &
459 "ELPA| Matrix block size check was bypassed"
460 END IF
461 END IF
462 END IF
463 END IF
464
465 ! the full eigenvalues vector is needed
466 ALLOCATE (eval(n))
467
468 elpa_obj => elpa_allocate()
469
470 CALL elpa_obj%set("na", n, success)
471 cpassert(success == elpa_ok)
472
473 CALL elpa_obj%set("nev", neig, success)
474 cpassert(success == elpa_ok)
475
476 CALL elpa_obj%set("local_nrows", n_rows, success)
477 cpassert(success == elpa_ok)
478
479 CALL elpa_obj%set("local_ncols", n_cols, success)
480 cpassert(success == elpa_ok)
481
482 CALL elpa_obj%set("nblk", nblk, success)
483 cpassert(success == elpa_ok)
484
485 CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
486 cpassert(success == elpa_ok)
487
488 CALL elpa_obj%set("process_row", myprow, success)
489 cpassert(success == elpa_ok)
490
491 CALL elpa_obj%set("process_col", mypcol, success)
492 cpassert(success == elpa_ok)
493
494 success = elpa_obj%setup()
495 cpassert(success == elpa_ok)
496
497 CALL elpa_obj%set("solver", &
498 merge(elpa_solver_1stage, elpa_solver_2stage, elpa_one_stage), &
499 success)
500 IF (success /= elpa_ok) &
501 cpabort("Setting solver for ELPA failed")
502
503 ! enabling the GPU must happen before setting the kernel
504 SELECT CASE (elpa_kernel)
505 CASE (elpa_2stage_real_nvidia_gpu)
506 CALL elpa_obj%set("nvidia-gpu", 1, success)
507 cpassert(success == elpa_ok)
508 CASE (elpa_2stage_real_amd_gpu)
509 CALL elpa_obj%set("amd-gpu", 1, success)
510 cpassert(success == elpa_ok)
511 CASE (elpa_2stage_real_intel_gpu_sycl)
512 CALL elpa_obj%set("intel-gpu", 1, success)
513 cpassert(success == elpa_ok)
514 END SELECT
515
516 IF (.NOT. elpa_one_stage) THEN
517 CALL elpa_obj%set("real_kernel", elpa_kernel, success)
518 cpwarn_if(success /= elpa_ok, "Setting real_kernel for ELPA failed")
519
520 IF (use_qr) THEN
521 CALL elpa_obj%set("qr", 1, success)
522 cpassert(success == elpa_ok)
523 END IF
524 END IF
525
526 ! Set number of threads only when ELPA was built with OpenMP support.
527 IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == elpa_ok) THEN
528 CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
529 cpassert(success == elpa_ok)
530 END IF
531
532 ! ELPA solver: calculate the Eigenvalues/vectors
533#if defined(__HAS_IEEE_EXCEPTIONS)
534 CALL ieee_get_halting_mode(ieee_all, halt)
535 CALL ieee_set_halting_mode(ieee_all, .false.)
536#endif
537 CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
538#if defined(__HAS_IEEE_EXCEPTIONS)
539 CALL ieee_set_halting_mode(ieee_all, halt)
540#endif
541
542 IF (success /= elpa_ok) &
543 cpabort("ELPA failed to diagonalize a matrix")
544
545 IF (check_eigenvalues) THEN
546 ! run again without QR
547 CALL elpa_obj%set("qr", 0, success)
548 cpassert(success == elpa_ok)
549
550 CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
551 IF (success /= elpa_ok) &
552 cpabort("ELPA failed to diagonalize a matrix even without QR decomposition")
553
554 IF (any(abs(eval(1:neig) - eval_noqr(1:neig)) > th)) &
555 cpabort("ELPA failed to calculate Eigenvalues with ELPA's QR decomposition")
556
557 DEALLOCATE (eval_noqr)
558 CALL cp_fm_release(matrix_noqr)
559 CALL cp_fm_release(eigenvectors_noqr)
560 END IF
561
562 CALL elpa_deallocate(elpa_obj, success)
563 cpassert(success == elpa_ok)
564
565 eigenvalues(1:neig) = eval(1:neig)
566 DEALLOCATE (eval)
567
568 CALL timestop(handle)
569
570 END SUBROUTINE cp_fm_diag_elpa_base
571#endif
572
573END MODULE cp_fm_elpa
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
methods related to the blacs parallel environment
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization....
subroutine, public cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
Redistributes eigenvectors and eigenvalues back to the original communicator group.
subroutine, public cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, caller_is_elpa, redist_info)
Determines the optimal number of CPUs for matrix diagonalization and redistributes the input matrices...
Wrapper for ELPA.
Definition cp_fm_elpa.F:12
subroutine, public set_elpa_kernel(requested_kernel)
Sets the active ELPA kernel.
Definition cp_fm_elpa.F:224
logical, save, public elpa_qr
Definition cp_fm_elpa.F:161
subroutine, public cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
Driver routine to diagonalize a FM matrix with the ELPA library.
Definition cp_fm_elpa.F:276
logical, save, public elpa_print
Definition cp_fm_elpa.F:161
subroutine, public finalize_elpa_library()
Finalize the ELPA library.
Definition cp_fm_elpa.F:212
logical, save, public elpa_one_stage
Definition cp_fm_elpa.F:168
subroutine, public initialize_elpa_library(one_stage, qr, should_print)
Initialize the ELPA library.
Definition cp_fm_elpa.F:192
represent the structure of a full matrix
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_write_info(matrix, io_unit)
Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
integer, parameter, public machine_x86_avx
Definition machine.F:69
integer, parameter, public machine_x86_sse4
Definition machine.F:69
integer, parameter, public machine_cpu_generic
Definition machine.F:69
integer, parameter, public machine_x86_avx2
Definition machine.F:69
pure integer function, public m_cpuid()
Target architecture or instruction set extension according to CPU-check at runtime.
Definition machine.F:200
integer, parameter, public machine_x86
Definition machine.F:69
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
represent a full matrix
type of a logger, at the moment it contains just a print level starting at which level it should be l...