(git:8ebf9ad)
Loading...
Searching...
No Matches
cp_fm_elpa.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Wrapper for ELPA
10!> \author Ole Schuett
11! **************************************************************************************************
14 USE machine, ONLY: m_cpuid, &
26 USE cp_fm_types, ONLY: cp_fm_type, &
36 USE omp_lib, ONLY: omp_get_max_threads
37#if defined(__HAS_IEEE_EXCEPTIONS)
38 USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
39 ieee_set_halting_mode, &
40 ieee_all
41#endif
42#include "../base/base_uses.f90"
43
44#if defined(__ELPA)
45 USE elpa_constants, ONLY: elpa_solver_1stage, elpa_solver_2stage, elpa_ok, &
46 elpa_2stage_real_invalid, &
47 elpa_2stage_real_default, &
48 elpa_2stage_real_generic, &
49 elpa_2stage_real_generic_simple, &
50 elpa_2stage_real_bgp, &
51 elpa_2stage_real_bgq, &
52 elpa_2stage_real_sse_assembly, &
53 elpa_2stage_real_sse_block2, &
54 elpa_2stage_real_sse_block4, &
55 elpa_2stage_real_sse_block6, &
56 elpa_2stage_real_avx_block2, &
57 elpa_2stage_real_avx_block4, &
58 elpa_2stage_real_avx_block6, &
59 elpa_2stage_real_avx2_block2, &
60 elpa_2stage_real_avx2_block4, &
61 elpa_2stage_real_avx2_block6, &
62 elpa_2stage_real_avx512_block2, &
63 elpa_2stage_real_avx512_block4, &
64 elpa_2stage_real_avx512_block6, &
65 elpa_2stage_real_nvidia_gpu, &
66 elpa_2stage_real_amd_gpu, &
67 elpa_2stage_real_intel_gpu_sycl
68
69 USE elpa, ONLY: elpa_t, elpa_init, elpa_uninit, &
70 elpa_allocate, elpa_deallocate
71#endif
72
73 IMPLICIT NONE
74
75 PRIVATE
76
77 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
78
79#if defined(__ELPA)
80 INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
81 elpa_2stage_real_invalid, & ! auto
82 elpa_2stage_real_generic, &
83 elpa_2stage_real_generic_simple, &
84 elpa_2stage_real_bgp, &
85 elpa_2stage_real_bgq, &
86 elpa_2stage_real_sse_assembly, &
87 elpa_2stage_real_sse_block2, &
88 elpa_2stage_real_sse_block4, &
89 elpa_2stage_real_sse_block6, &
90 elpa_2stage_real_avx_block2, &
91 elpa_2stage_real_avx_block4, &
92 elpa_2stage_real_avx_block6, &
93 elpa_2stage_real_avx2_block2, &
94 elpa_2stage_real_avx2_block4, &
95 elpa_2stage_real_avx2_block6, &
96 elpa_2stage_real_avx512_block2, &
97 elpa_2stage_real_avx512_block4, &
98 elpa_2stage_real_avx512_block6, &
99 elpa_2stage_real_nvidia_gpu, &
100 elpa_2stage_real_amd_gpu, &
101 elpa_2stage_real_intel_gpu_sycl]
102
103 CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
104 elpa_kernel_names = [character(len=14) :: &
105 "AUTO", &
106 "GENERIC", &
107 "GENERIC_SIMPLE", &
108 "BGP", &
109 "BGQ", &
110 "SSE", &
111 "SSE_BLOCK2", &
112 "SSE_BLOCK4", &
113 "SSE_BLOCK6", &
114 "AVX_BLOCK2", &
115 "AVX_BLOCK4", &
116 "AVX_BLOCK6", &
117 "AVX2_BLOCK2", &
118 "AVX2_BLOCK4", &
119 "AVX2_BLOCK6", &
120 "AVX512_BLOCK2", &
121 "AVX512_BLOCK4", &
122 "AVX512_BLOCK6", &
123 "NVIDIA_GPU", &
124 "AMD_GPU", &
125 "INTEL_GPU"]
126
127 CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
128 elpa_kernel_descriptions = [character(len=44) :: &
129 "Automatically selected kernel", &
130 "Generic kernel", &
131 "Simplified generic kernel", &
132 "Kernel optimized for IBM BGP", &
133 "Kernel optimized for IBM BGQ", &
134 "Kernel optimized for x86_64/SSE", &
135 "Kernel optimized for x86_64/SSE (block=2)", &
136 "Kernel optimized for x86_64/SSE (block=4)", &
137 "Kernel optimized for x86_64/SSE (block=6)", &
138 "Kernel optimized for Intel AVX (block=2)", &
139 "Kernel optimized for Intel AVX (block=4)", &
140 "Kernel optimized for Intel AVX (block=6)", &
141 "Kernel optimized for Intel AVX2 (block=2)", &
142 "Kernel optimized for Intel AVX2 (block=4)", &
143 "Kernel optimized for Intel AVX2 (block=6)", &
144 "Kernel optimized for Intel AVX-512 (block=2)", &
145 "Kernel optimized for Intel AVX-512 (block=4)", &
146 "Kernel optimized for Intel AVX-512 (block=6)", &
147 "Kernel targeting Nvidia GPUs", &
148 "Kernel targeting AMD GPUs", &
149 "Kernel targeting Intel GPUs"]
150#else
151 INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
152 CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
153 CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
154#endif
155
156#if defined(__ELPA)
157 INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
158#endif
159
160 ! elpa_qr_unsafe: disable block size limitations
161 LOGICAL, SAVE :: elpa_should_print = .false., &
162 elpa_qr_unsafe = .true., &
163 elpa_qr = .false.
164
165#if defined(__OFFLOAD_OPENCL)
166 LOGICAL, SAVE :: elpa_one_stage = .true.
167#else
168 LOGICAL, SAVE :: elpa_one_stage = .false.
169#endif
170
171 PUBLIC :: cp_fm_diag_elpa, &
174 elpa_qr, &
176 elpa_kernel_ids, &
177 elpa_kernel_names, &
178 elpa_kernel_descriptions, &
181
182CONTAINS
183
184! **************************************************************************************************
185!> \brief Initialize the ELPA library
186!> \param one_stage ...
187!> \param qr ...
188! **************************************************************************************************
189 SUBROUTINE initialize_elpa_library(one_stage, qr)
190 LOGICAL, INTENT(IN), OPTIONAL :: one_stage, qr
191
192#if defined(__ELPA)
193 IF (elpa_init(20180525) /= elpa_ok) &
194 cpabort("The linked ELPA library does not support the required API version")
195 IF (PRESENT(one_stage)) elpa_one_stage = one_stage
196 IF (PRESENT(qr)) elpa_qr = qr
197#else
198 cpabort("Initialization of ELPA library requested but not enabled during build")
199#endif
200 END SUBROUTINE initialize_elpa_library
201
202! **************************************************************************************************
203!> \brief Finalize the ELPA library
204! **************************************************************************************************
206#if defined(__ELPA)
207 CALL elpa_uninit()
208#else
209 cpabort("Finalization of ELPA library requested but not enabled during build")
210#endif
211 END SUBROUTINE finalize_elpa_library
212
213! **************************************************************************************************
214!> \brief Sets the active ELPA kernel.
215!> \param requested_kernel one of the elpa_kernel_ids
216! **************************************************************************************************
217 SUBROUTINE set_elpa_kernel(requested_kernel)
218 INTEGER, INTENT(IN) :: requested_kernel
219
220#if defined(__ELPA)
221 INTEGER :: cpuid
222
223 elpa_kernel = requested_kernel
224
225 ! Resolve AUTO kernel.
226 IF (elpa_kernel == elpa_2stage_real_invalid) THEN
227 cpuid = m_cpuid()
228 IF ((machine_cpu_generic < cpuid) .AND. (cpuid <= machine_x86)) THEN
229 SELECT CASE (cpuid)
230 CASE (machine_x86_sse4)
231 elpa_kernel = elpa_2stage_real_sse_block4
232 CASE (machine_x86_avx)
233 elpa_kernel = elpa_2stage_real_avx_block4
234 CASE (machine_x86_avx2)
235 elpa_kernel = elpa_2stage_real_avx2_block4
236 CASE DEFAULT
237 elpa_kernel = elpa_2stage_real_avx512_block4
238 END SELECT
239 END IF
240
241 ! Prefer GPU kernel if available.
242#if !defined(__NO_OFFLOAD_ELPA)
243#if defined(__OFFLOAD_CUDA)
244 elpa_kernel = elpa_2stage_real_nvidia_gpu
245#endif
246#if defined(__OFFLOAD_HIP)
247 elpa_kernel = elpa_2stage_real_amd_gpu
248#endif
249#if defined(__OFFLOAD_OPENCL)
250 elpa_kernel = elpa_2stage_real_intel_gpu_sycl
251#endif
252#endif
253 ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
254 IF (elpa_kernel == elpa_2stage_real_invalid) THEN
255 elpa_kernel = elpa_2stage_real_default
256 END IF
257 END IF
258#else
259 mark_used(requested_kernel)
260#endif
261 END SUBROUTINE set_elpa_kernel
262
263! **************************************************************************************************
264!> \brief Sets a flag that determines if additional information about the ELPA diagonalization
265!> should be printed when the diagonalization routine is called.
266!> \param flag the logical flag
267! **************************************************************************************************
268 SUBROUTINE set_elpa_print(flag)
269 LOGICAL, INTENT(IN) :: flag
270
271 elpa_should_print = flag
272 END SUBROUTINE set_elpa_print
273
274! **************************************************************************************************
275!> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
276!> \param matrix the matrix that is diagonalized
277!> \param eigenvectors eigenvectors of the input matrix
278!> \param eigenvalues eigenvalues of the input matrix
279! **************************************************************************************************
280 SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
281 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
282 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
283
284#if defined(__ELPA)
285 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_diag_elpa'
286
287 INTEGER :: handle
288 TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
289 TYPE(cp_fm_redistribute_info) :: rdinfo
290
291 CALL timeset(routinen, handle)
292
293 ! Determine if the input matrix needs to be redistributed before diagonalization.
294 ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
295 ! The redistributed matrix is stored in matrix_new, which is just a pointer
296 ! to the original matrix if no redistribution is required.
297 ! With ELPA, we have to make sure that all processor columns have nonzero width
298 CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
299 caller_is_elpa=.true., redist_info=rdinfo)
300
301 ! Call ELPA on CPUs that hold the new matrix
302 IF (ASSOCIATED(matrix_new%matrix_struct)) &
303 CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
304
305 ! Redistribute results and clean up
306 CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
307
308 CALL timestop(handle)
309#else
310 eigenvalues = 0
311 mark_used(matrix)
312 mark_used(eigenvectors)
313
314 cpabort("CP2K compiled without the ELPA library.")
315#endif
316 END SUBROUTINE cp_fm_diag_elpa
317
318#if defined(__ELPA)
319! **************************************************************************************************
320!> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
321!> \param matrix the matrix that is diagonalized
322!> \param eigenvectors eigenvectors of the input matrix
323!> \param eigenvalues eigenvalues of the input matrix
324!> \param rdinfo ...
325! **************************************************************************************************
326 SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
327
328 TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
329 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
330 TYPE(cp_fm_redistribute_info), INTENT(IN) :: rdinfo
331
332 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_diag_elpa_base'
333
334 INTEGER :: handle
335
336 CLASS(elpa_t), POINTER :: elpa_obj
337 CHARACTER(len=default_string_length) :: kernel_name
338 TYPE(mp_comm_type) :: group
339 INTEGER :: i, &
340 mypcol, myprow, n, &
341 n_rows, n_cols, &
342 nblk, neig, io_unit, &
343 success
344 LOGICAL :: use_qr, check_eigenvalues
345 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
346 TYPE(cp_blacs_env_type), POINTER :: context
347 TYPE(cp_fm_type) :: matrix_noqr, eigenvectors_noqr
348 TYPE(cp_logger_type), POINTER :: logger
349 REAL(kind=dp), PARAMETER :: th = 1.0e-14_dp
350 INTEGER, DIMENSION(:), POINTER :: ncol_locals
351#if defined(__HAS_IEEE_EXCEPTIONS)
352 LOGICAL, DIMENSION(5) :: halt
353#endif
354
355 CALL timeset(routinen, handle)
356 NULLIFY (logger)
357 NULLIFY (ncol_locals)
358
359 check_eigenvalues = .false.
360
361 logger => cp_get_default_logger()
362 io_unit = cp_logger_get_default_io_unit(logger)
363
364 n = matrix%matrix_struct%nrow_global
365 context => matrix%matrix_struct%context
366 group = matrix%matrix_struct%para_env
367
368 myprow = context%mepos(1)
369 mypcol = context%mepos(2)
370
371 ! elpa needs the full matrix
372 CALL cp_fm_uplo_to_full(matrix, eigenvectors)
373
374 CALL cp_fm_struct_get(matrix%matrix_struct, &
375 local_leading_dimension=n_rows, &
376 ncol_local=n_cols, &
377 nrow_block=nblk, &
378 ncol_locals=ncol_locals)
379
380 ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
381 IF (io_unit > 0 .AND. any(ncol_locals == 0)) THEN
382 CALL rdinfo%write(io_unit)
383 CALL cp_fm_write_info(matrix, io_unit)
384 cpabort("ELPA [pre-fail]: Problem contains processor column with zero width.")
385 END IF
386
387 neig = SIZE(eigenvalues, 1)
388 ! Decide if matrix is suitable for ELPA to use QR
389 ! The definition of what is considered a suitable matrix depends on the ELPA version
390 ! The relevant ELPA files to check are
391 ! - Proper matrix order: src/elpa2/elpa2_template.F90
392 ! - Proper block size: test/Fortran/test.F90
393 ! Note that the names of these files might change in different ELPA versions
394 ! Matrix order must be even
395 use_qr = elpa_qr .AND. (modulo(n, 2) == 0)
396 ! Matrix order and block size must be greater than or equal to 64
397 IF (.NOT. elpa_qr_unsafe) &
398 use_qr = use_qr .AND. (n >= 64) .AND. (nblk >= 64)
399
400 ! Check if eigenvalues computed with elpa_qr_unsafe should be verified
401 IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_should_print) &
402 check_eigenvalues = .true.
403
404 CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
405
406 IF (check_eigenvalues) THEN
407 ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
408 ALLOCATE (eval_noqr(n))
409 CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
410 CALL cp_fm_to_fm(matrix, matrix_noqr)
411 CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
412 CALL cp_fm_uplo_to_full(matrix_noqr, eigenvectors_noqr)
413 END IF
414
415 IF (io_unit > 0 .AND. elpa_should_print) THEN
416 WRITE (unit=io_unit, fmt="(/,T2,A)") &
417 "ELPA| Matrix diagonalization information"
418
419 ! Find name for given kernel id.
420 ! In case of ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
421 kernel_name = "id: "//trim(adjustl(cp_to_string(elpa_kernel)))
422 DO i = 1, SIZE(elpa_kernel_ids)
423 IF (elpa_kernel_ids(i) == elpa_kernel) THEN
424 kernel_name = elpa_kernel_names(i)
425 END IF
426 END DO
427
428 WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &
429 "ELPA| Matrix order (NA) ", n, &
430 "ELPA| Matrix block size (NBLK) ", nblk, &
431 "ELPA| Number of eigenvectors (NEV) ", neig, &
432 "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
433 "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
434 WRITE (unit=io_unit, fmt="(T2,A,T61,A20)") &
435 "ELPA| Kernel ", adjustr(trim(kernel_name))
436 IF (elpa_qr) THEN
437 WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
438 "ELPA| QR step requested ", "YES"
439 ELSE
440 WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
441 "ELPA| QR step requested ", "NO"
442 END IF
443
444 IF (elpa_qr) THEN
445 IF (use_qr) THEN
446 WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
447 "ELPA| Matrix is suitable for QR ", "YES"
448 ELSE
449 WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
450 "ELPA| Matrix is suitable for QR ", "NO"
451 END IF
452 IF (.NOT. use_qr) THEN
453 IF (modulo(n, 2) /= 0) THEN
454 WRITE (unit=io_unit, fmt="(T2,A)") &
455 "ELPA| Matrix order is NOT even"
456 END IF
457 IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
458 WRITE (unit=io_unit, fmt="(T2,A)") &
459 "ELPA| Matrix block size is NOT 64 or greater"
460 END IF
461 ELSE
462 IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
463 WRITE (unit=io_unit, fmt="(T2,A)") &
464 "ELPA| Matrix block size check was bypassed"
465 END IF
466 END IF
467 END IF
468 END IF
469
470 ! the full eigenvalues vector is needed
471 ALLOCATE (eval(n))
472
473 elpa_obj => elpa_allocate()
474
475 CALL elpa_obj%set("na", n, success)
476 cpassert(success == elpa_ok)
477
478 CALL elpa_obj%set("nev", neig, success)
479 cpassert(success == elpa_ok)
480
481 CALL elpa_obj%set("local_nrows", n_rows, success)
482 cpassert(success == elpa_ok)
483
484 CALL elpa_obj%set("local_ncols", n_cols, success)
485 cpassert(success == elpa_ok)
486
487 CALL elpa_obj%set("nblk", nblk, success)
488 cpassert(success == elpa_ok)
489
490 CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
491 cpassert(success == elpa_ok)
492
493 CALL elpa_obj%set("process_row", myprow, success)
494 cpassert(success == elpa_ok)
495
496 CALL elpa_obj%set("process_col", mypcol, success)
497 cpassert(success == elpa_ok)
498
499 success = elpa_obj%setup()
500 cpassert(success == elpa_ok)
501
502 CALL elpa_obj%set("solver", &
503 merge(elpa_solver_1stage, elpa_solver_2stage, elpa_one_stage), &
504 success)
505 cpassert(success == elpa_ok)
506
507 ! enabling the GPU must happen before setting the kernel
508 SELECT CASE (elpa_kernel)
509 CASE (elpa_2stage_real_nvidia_gpu)
510 CALL elpa_obj%set("nvidia-gpu", 1, success)
511 cpassert(success == elpa_ok)
512 CASE (elpa_2stage_real_amd_gpu)
513 CALL elpa_obj%set("amd-gpu", 1, success)
514 cpassert(success == elpa_ok)
515 CASE (elpa_2stage_real_intel_gpu_sycl)
516 CALL elpa_obj%set("intel-gpu", 1, success)
517 cpassert(success == elpa_ok)
518 END SELECT
519
520 IF (.NOT. elpa_one_stage) THEN
521 CALL elpa_obj%set("real_kernel", elpa_kernel, success)
522 cpwarn_if(success /= elpa_ok, "Setting real_kernel for ELPA failed")
523
524 IF (use_qr) THEN
525 CALL elpa_obj%set("qr", 1, success)
526 cpassert(success == elpa_ok)
527 END IF
528 END IF
529
530 ! Set number of threads only when ELPA was built with OpenMP support.
531 IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == elpa_ok) THEN
532 CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
533 cpassert(success == elpa_ok)
534 END IF
535
536 ! ELPA solver: calculate the Eigenvalues/vectors
537#if defined(__HAS_IEEE_EXCEPTIONS)
538 CALL ieee_get_halting_mode(ieee_all, halt)
539 CALL ieee_set_halting_mode(ieee_all, .false.)
540#endif
541 CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
542#if defined(__HAS_IEEE_EXCEPTIONS)
543 CALL ieee_set_halting_mode(ieee_all, halt)
544#endif
545
546 IF (success /= elpa_ok) &
547 cpabort("ELPA failed to diagonalize a matrix")
548
549 IF (check_eigenvalues) THEN
550 ! run again without QR
551 CALL elpa_obj%set("qr", 0, success)
552 cpassert(success == elpa_ok)
553
554 CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
555 IF (success /= elpa_ok) &
556 cpabort("ELPA failed to diagonalize a matrix even without QR decomposition")
557
558 IF (any(abs(eval(1:neig) - eval_noqr(1:neig)) > th)) &
559 cpabort("ELPA failed to calculate Eigenvalues with ELPA's QR decomposition")
560
561 DEALLOCATE (eval_noqr)
562 CALL cp_fm_release(matrix_noqr)
563 CALL cp_fm_release(eigenvectors_noqr)
564 END IF
565
566 CALL elpa_deallocate(elpa_obj, success)
567 cpassert(success == elpa_ok)
568
569 eigenvalues(1:neig) = eval(1:neig)
570 DEALLOCATE (eval)
571
572 CALL timestop(handle)
573
574 END SUBROUTINE cp_fm_diag_elpa_base
575#endif
576
577END MODULE cp_fm_elpa
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
methods related to the blacs parallel environment
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization....
subroutine, public cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
Redistributes eigenvectors and eigenvalues back to the original communicator group.
subroutine, public cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, caller_is_elpa, redist_info)
Determines the optimal number of CPUs for matrix diagonalization and redistributes the input matrices...
Wrapper for ELPA.
Definition cp_fm_elpa.F:12
subroutine, public set_elpa_print(flag)
Sets a flag that determines if additional information about the ELPA diagonalization should be printe...
Definition cp_fm_elpa.F:269
subroutine, public set_elpa_kernel(requested_kernel)
Sets the active ELPA kernel.
Definition cp_fm_elpa.F:218
logical, save, public elpa_qr
Definition cp_fm_elpa.F:161
subroutine, public cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
Driver routine to diagonalize a FM matrix with the ELPA library.
Definition cp_fm_elpa.F:281
subroutine, public initialize_elpa_library(one_stage, qr)
Initialize the ELPA library.
Definition cp_fm_elpa.F:190
subroutine, public finalize_elpa_library()
Finalize the ELPA library.
Definition cp_fm_elpa.F:206
logical, save, public elpa_one_stage
Definition cp_fm_elpa.F:168
represent the structure of a full matrix
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_write_info(matrix, io_unit)
Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
integer, parameter, public machine_x86_avx
Definition machine.F:69
integer, parameter, public machine_x86_sse4
Definition machine.F:69
integer, parameter, public machine_cpu_generic
Definition machine.F:69
integer, parameter, public machine_x86_avx2
Definition machine.F:69
pure integer function, public m_cpuid()
Target architecture or instruction set extension according to CPU-check at runtime.
Definition machine.F:200
integer, parameter, public machine_x86
Definition machine.F:69
Interface to the message passing library MPI.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
represent a full matrix
type of a logger, at the moment it contains just a print level starting at which level it should be l...