(git:0de0cc2)
cp_fm_diag_utils.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization.
10 !> Heuristics are used to determine the optimal number of CPUs for diagonalization and the
11 !> input matrices are redistributed if necessary
12 !> \par History
13 !> - [01.2018] moved redistribution related code from cp_fm_syevd here
14 !> \author Nico Holmberg [01.2018]
15 ! **************************************************************************************************
19  cp_blacs_env_type
22  cp_fm_struct_type
23  USE cp_fm_types, ONLY: cp_fm_create,&
25  cp_fm_release,&
26  cp_fm_type
29  cp_logger_type
30  USE kinds, ONLY: dp
31  USE mathlib, ONLY: gcd
32  USE message_passing, ONLY: mp_para_env_type
33 #include "../base/base_uses.f90"
34 
35  IMPLICIT NONE
36 
37  PRIVATE
38 
39  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag_utils'
40 
41  ! Information on redistribution
42  TYPE, PUBLIC :: cp_fm_redistribute_info
43  INTEGER :: matrix_order = -1
44  INTEGER :: num_pe_old = -1 ! number of processes before a potential redistribute
45  INTEGER :: num_pe_new = -1 ! number of processes after a potential redistribute
46  INTEGER :: num_pe_opt = -1 ! optimal number of processes for the given matrix
47  INTEGER :: num_pe_max_nz_col = -1 ! the maximal number of processes s.t. no column has zero width, may be < 0 if ignored
48  LOGICAL :: redistribute = .false. ! whether or not the matrix was actually redistributed
49  CONTAINS
50  PROCEDURE, pass(self) :: write => cp_fm_redistribute_info_write
51  END TYPE
52 
53  ! Container for redistribution settings and temporary work structs
54  TYPE cp_fm_redistribute_type
55  ! Settings
56  INTEGER :: a = -1, x = -1
57  LOGICAL :: should_print = .false.
58  LOGICAL :: elpa_force_redistribute = .false.
59  ! Temporaries
60  INTEGER, DIMENSION(:), POINTER :: group_distribution => null(), &
61  group_partition => null()
62  TYPE(cp_blacs_env_type), POINTER :: blacs_env_new => null()
63  TYPE(mp_para_env_type), POINTER :: para_env_new => null()
64  END TYPE cp_fm_redistribute_type
65 
66  ! Permanent instance of the redistribute type
67  TYPE(cp_fm_redistribute_type), PRIVATE, &
68  SAVE :: work_redistribute
69 
70  ! Public subroutines
71 
72  PUBLIC :: cp_fm_redistribute_start, &
75 
76 CONTAINS
77 
78 ! **************************************************************************************************
79 !> \brief Write the redistribute info nicely formatted to the given I/O unit
80 !> \param self reference to the cp_fm_redistribute_info instance
81 !> \param io_unit I/O unit to use for writing
82 ! **************************************************************************************************
83  SUBROUTINE cp_fm_redistribute_info_write(self, io_unit)
84  CLASS(cp_fm_redistribute_info), INTENT(IN) :: self
85  INTEGER, INTENT(IN) :: io_unit
86 
87  WRITE (unit=io_unit, fmt="(A)") ""
88  WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &
89  "CP_FM_DIAG| Number of processes over which the matrix is distributed ", self%num_pe_old, &
90  "CP_FM_DIAG| Matrix order ", self%matrix_order
91  WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &
92  "CP_FM_DIAG| Optimal number of CPUs ", self%num_pe_opt
93  IF (self%num_pe_max_nz_col < 0) THEN
94  WRITE (unit=io_unit, fmt="(T2,A,T71,A10)") &
95  "CP_FM_DIAG| Maximum number of CPUs (with non-zero columns) ", "<N/A>"
96  ELSE
97  WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &
98  "CP_FM_DIAG| Maximum number of CPUs (with non-zero columns): ", self%num_pe_max_nz_col
99  END IF
100  IF (self%redistribute) THEN
101  WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &
102  "CP_FM_DIAG| Number of processes for the redistribution ", self%num_pe_new
103  ELSE
104  WRITE (unit=io_unit, fmt="(T2,A)") &
105  "CP_FM_DIAG| The matrix will NOT be redistributed"
106  END IF
107  WRITE (unit=io_unit, fmt="(A)") ""
108 
109  END SUBROUTINE cp_fm_redistribute_info_write
110 
111 ! **************************************************************************************************
112 !> \brief Releases the temporary storage needed when redistributing arrays
113 !> \param has_redistributed flag that determines if the processors holds a part of the
114 !> redistributed array
115 !> \author Nico Holmberg [01.2018]
116 ! **************************************************************************************************
117  SUBROUTINE cp_fm_redistribute_work_finalize(has_redistributed)
118  LOGICAL, INTENT(IN) :: has_redistributed
119 
120  IF (ASSOCIATED(work_redistribute%group_distribution)) THEN
121  IF (has_redistributed) THEN
122  CALL cp_blacs_env_release(work_redistribute%blacs_env_new)
123  END IF
124  CALL work_redistribute%para_env_new%free()
125  DEALLOCATE (work_redistribute%para_env_new)
126  DEALLOCATE (work_redistribute%group_distribution)
127  DEALLOCATE (work_redistribute%group_partition)
128  END IF
129  ! Return work to its initial state
130  work_redistribute = cp_fm_redistribute_type()
131 
132  END SUBROUTINE cp_fm_redistribute_work_finalize
133 
134 ! **************************************************************************************************
135 !> \brief Initializes the parameters that determine how to calculate the optimal number of CPUs
136 !> for diagonalizing a matrix. The parameters are read from the GLOBAL input section.
137 !> \param a integer parameter used to define the rule for determining the optimal
138 !> number of CPUs for diagonalization
139 !> \param x integer parameter used to define the rule for determining the optimal
140 !> number of CPUs for diagonalization
141 !> \param should_print flag that determines if information about the redistribution process
142 !> should be printed
143 !> \param elpa_force_redistribute flag that if redistribution should always be performed when
144 !> the ELPA diagonalization library is in use
145 !> \author Nico Holmberg [01.2018]
146 ! **************************************************************************************************
147  SUBROUTINE cp_fm_redistribute_init(a, x, should_print, elpa_force_redistribute)
148  INTEGER, INTENT(IN) :: a, x
149  LOGICAL, INTENT(IN) :: should_print, elpa_force_redistribute
150 
151  work_redistribute%a = a
152  work_redistribute%x = x
153  work_redistribute%should_print = should_print
154  work_redistribute%elpa_force_redistribute = elpa_force_redistribute
155  ! Init work
156  work_redistribute = cp_fm_redistribute_type()
157 
158  END SUBROUTINE cp_fm_redistribute_init
159 
160 ! **************************************************************************************************
161 !> \brief Calculates the optimal number of CPUs for diagonalizing a matrix.
162 !> \param size the size of the diagonalized matrix
163 !> \return the optimal number of CPUs
164 !> \author Nico Holmberg [01.2018]
165 ! **************************************************************************************************
166  PURE FUNCTION cp_fm_diag_get_optimal_ncpu(size) RESULT(ncpu)
167  INTEGER, INTENT(IN) :: size
168  INTEGER :: ncpu
169 
170  ncpu = ((size + work_redistribute%a*work_redistribute%x - 1)/ &
171  (work_redistribute%a*work_redistribute%x))*work_redistribute%a
172 
173  END FUNCTION cp_fm_diag_get_optimal_ncpu
174 
175 #if defined(__SCALAPACK)
176 ! **************************************************************************************************
177 !> \brief Determines the largest number of CPUs a matrix can be distributed on without any of the
178 !> processors getting a zero-width column (currently only needed for ELPA).
179 !> \param matrix the matrix that will be diagonalized
180 !> \return the maximum number of CPUs for ELPA
181 !> \author Nico Holmberg [01.2018]
182 ! **************************************************************************************************
183  FUNCTION cp_fm_max_ncpu_non_zero_column(matrix) RESULT(ncpu)
184  TYPE(cp_fm_type), INTENT(IN) :: matrix
185  INTEGER :: ncpu
186 
187  INTEGER :: gcd_max, ipe, jpe, ncol_block, &
188  ncol_global, npcol, nrow_block, &
189  nrow_global, num_pe_old, nzero
190  INTEGER, DIMENSION(:), POINTER :: ncol_locals
191  INTEGER, EXTERNAL :: numroc
192 
193  NULLIFY (ncol_locals)
194  ! First check if there are any zero width columns in current layout
195  CALL cp_fm_get_info(matrix, ncol_locals=ncol_locals, &
196  nrow_global=nrow_global, ncol_global=ncol_global, &
197  nrow_block=nrow_block, ncol_block=ncol_block)
198  nzero = count(ncol_locals == 0)
199  num_pe_old = matrix%matrix_struct%para_env%num_pe
200  ncpu = num_pe_old - nzero
201 
202  ! Avoid layouts with odd number of CPUs (blacs grid layout will be square)
203  IF (ncpu > 2) &
204  ncpu = ncpu - modulo(ncpu, 2)
205 
206  ! if there are no zero-width columns and the number of processors was even, leave it at that
207  IF (ncpu == num_pe_old) &
208  RETURN
209 
210  ! Iteratively search for the maximum number of CPUs for ELPA
211  ! On each step, we test whether the blacs grid created with ncpu processes
212  ! contains any columns with zero width
213  DO WHILE (ncpu > 1)
214  ! Determine layout of new blacs grid with ncpu CPUs
215  ! (snippet copied from cp_blacs_env.F:cp_blacs_env_create)
216  gcd_max = -1
217  DO ipe = 1, ceiling(sqrt(real(ncpu, dp)))
218  jpe = ncpu/ipe
219  IF (ipe*jpe .NE. ncpu) &
220  cycle
221  IF (gcd(ipe, jpe) >= gcd_max) THEN
222  npcol = jpe
223  gcd_max = gcd(ipe, jpe)
224  END IF
225  END DO
226 
227  ! Count the number of processors without any columns
228  ! (snippet copied from cp_fm_struct.F:cp_fm_struct_create)
229  nzero = 0
230  DO ipe = 0, npcol - 1
231  IF (numroc(ncol_global, ncol_block, ipe, 0, npcol) == 0) &
232  nzero = nzero + 1
233  END DO
234 
235  IF (nzero == 0) &
236  EXIT
237 
238  ncpu = ncpu - nzero
239 
240  IF (ncpu > 2) &
241  ncpu = ncpu - modulo(ncpu, 2)
242  END DO
243 
244  END FUNCTION cp_fm_max_ncpu_non_zero_column
245 #endif
246 
247 ! **************************************************************************************************
248 !> \brief Determines the optimal number of CPUs for matrix diagonalization and redistributes
249 !> the input matrices if necessary
250 !> \param matrix the input cp_fm_type matrix to be diagonalized
251 !> \param eigenvectors the cp_fm_type matrix that will hold the eigenvectors of the input matrix
252 !> \param matrix_new the redistributed input matrix which will subsequently be diagonalized,
253 !> or a pointer to the original matrix if no redistribution is required
254 !> \param eigenvectors_new the redistributed eigenvectors matrix, or a pointer to the original
255 !> matrix if no redistribution is required
256 !> \param caller_is_elpa flag that determines if ELPA is used for diagonalization
257 !> \param redist_info get info about the redistribution
258 !> \par History
259 !> - [01.2018] created by moving redistribution related code from cp_fm_syevd here
260 !> \author Nico Holmberg [01.2018]
261 ! **************************************************************************************************
262  SUBROUTINE cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
263  caller_is_elpa, redist_info)
264 
265  TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
266  TYPE(cp_fm_type), INTENT(OUT) :: matrix_new, eigenvectors_new
267  LOGICAL, OPTIONAL, INTENT(IN) :: caller_is_elpa
268 
269  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_redistribute_start'
270 
271  INTEGER :: handle
272  LOGICAL :: is_elpa
273  TYPE(cp_fm_redistribute_info), OPTIONAL, INTENT(OUT) :: redist_info
274 
275 #if defined(__SCALAPACK)
276  REAL(kind=dp) :: fake_local_data(1, 1)
277  INTEGER :: fake_descriptor(9), mepos_old, &
278  io_unit, ngroups, ncol_block, blksize, nrow_block
279  TYPE(cp_fm_struct_type), POINTER :: fm_struct_new
280  TYPE(mp_para_env_type), POINTER :: para_env
281  TYPE(cp_logger_type), POINTER :: logger
282  TYPE(cp_fm_redistribute_info) :: rdinfo
283 #endif
284 
285  CALL timeset(routinen, handle)
286  is_elpa = .false.
287  IF (PRESENT(caller_is_elpa)) THEN
288 #if defined(__ELPA)
289  is_elpa = caller_is_elpa
290 #else
291  cpabort("CP2K compiled without the ELPA library.")
292 #endif
293  END IF
294 
295 #if defined(__SCALAPACK)
296 
297  logger => cp_get_default_logger()
298  io_unit = cp_logger_get_default_io_unit(logger)
299 
300  ! first figure out the optimal number of cpus
301  ! this is pure heuristics, the defaults are based on rosa timings
302  ! that demonstrate that timings go up sharply if too many tasks are used
303  ! we take a multiple of 4, and approximately n/60
304  para_env => matrix%matrix_struct%para_env
305  mepos_old = para_env%mepos
306  ncol_block = -1 ! normally we also want to adjust the block size according to the optimal # of CPUs
307  nrow_block = -1
308  blksize = -1
309 
310  rdinfo%matrix_order = matrix%matrix_struct%nrow_global
311  rdinfo%num_pe_old = para_env%num_pe
312  rdinfo%num_pe_opt = cp_fm_diag_get_optimal_ncpu(rdinfo%matrix_order)
313  rdinfo%num_pe_new = rdinfo%num_pe_opt
314  rdinfo%num_pe_max_nz_col = -1
315  rdinfo%redistribute = .false.
316 
317  IF (is_elpa) THEN
318  ! with ELPA we don't have to redistribute if not necessary (scales, unlike ScaLAPACK)
319  rdinfo%num_pe_new = rdinfo%num_pe_old
320 
321  ! BUT: Diagonalization with ELPA fails when a processor column has zero width
322  ! Determine the maximum number of CPUs the matrix can be distributed without zero-width columns
323  ! for the current block size.
324  rdinfo%num_pe_max_nz_col = cp_fm_max_ncpu_non_zero_column(matrix)
325 
326  ! if the user wants to redistribute to the ScaLAPACK optimal number of CPUs anyway, let him if it's safe.
327  IF (work_redistribute%elpa_force_redistribute .AND. rdinfo%num_pe_opt < rdinfo%num_pe_max_nz_col) THEN
328  ! Use heuristics to determine the need for redistribution (when num_pe_opt is smaller than the safe maximum)
329  ! in this case we can also take the block size used for ScaLAPACK
330  rdinfo%num_pe_new = rdinfo%num_pe_opt
331  ELSE IF (rdinfo%num_pe_old > rdinfo%num_pe_max_nz_col) THEN
332  ! Otherwise, only redistribute if we have to
333  rdinfo%num_pe_new = rdinfo%num_pe_max_nz_col
334  ! do NOT let cp_fm_struct_create automatically adjust the block size because the
335  ! calculated number of processors such that no block has 0 columns wouldn't match (see #578):
336  ! if the automatically chosen block size is larger than the present one we would still end
337  ! up with empty processors
338  END IF
339 
340  CALL cp_fm_get_info(matrix, ncol_block=ncol_block, nrow_block=nrow_block)
341 
342  ! On GPUs, ELPA requires the block size to be a power of 2
343  blksize = 1
344  DO WHILE (2*blksize <= min(nrow_block, ncol_block))
345  blksize = blksize*2
346  END DO
347  nrow_block = blksize
348  ncol_block = blksize
349  END IF
350 
351  ! finally, only redistribute if we're going to use less CPUs than before or changed the block size
352  rdinfo%redistribute = (rdinfo%num_pe_old > rdinfo%num_pe_new) .OR. (blksize >= 0 .AND. &
353  ((blksize /= matrix%matrix_struct%ncol_block) .OR. (blksize /= matrix%matrix_struct%nrow_block)))
354 
355  IF (work_redistribute%should_print .AND. io_unit > 0) THEN
356  IF (is_elpa) THEN
357  IF (work_redistribute%elpa_force_redistribute) THEN
358  WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &
359  "CP_FM_DIAG| Force redistribute (ELPA):", "YES"
360  ELSE
361  WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &
362  "CP_FM_DIAG| Force redistribute (ELPA):", "NO"
363  END IF
364  END IF
365  CALL rdinfo%write(io_unit)
366  END IF
367  CALL para_env%sync()
368 
369  ! if the optimal is smaller than num_pe, we will redistribute the input matrix
370  IF (rdinfo%redistribute) THEN
371  ! split comm, the first num_pe_new tasks will do the work
372  ALLOCATE (work_redistribute%group_distribution(0:rdinfo%num_pe_old - 1))
373  ALLOCATE (work_redistribute%group_partition(0:1))
374  work_redistribute%group_partition = (/rdinfo%num_pe_new, rdinfo%num_pe_old - rdinfo%num_pe_new/)
375  ALLOCATE (work_redistribute%para_env_new)
376  CALL work_redistribute%para_env_new%from_split( &
377  comm=para_env, ngroups=ngroups, group_distribution=work_redistribute%group_distribution, &
378  n_subgroups=2, group_partition=work_redistribute%group_partition)
379 
380  IF (work_redistribute%group_distribution(mepos_old) == 0) THEN
381 
382  ! create blacs, should inherit the preferences for the layout and so on, from the higher level
383  NULLIFY (work_redistribute%blacs_env_new)
384  CALL cp_blacs_env_create(blacs_env=work_redistribute%blacs_env_new, para_env=work_redistribute%para_env_new)
385 
386  ! create new matrix
387  NULLIFY (fm_struct_new)
388  IF (nrow_block == -1 .OR. ncol_block == -1) THEN
389  CALL cp_fm_struct_create(fmstruct=fm_struct_new, &
390  para_env=work_redistribute%para_env_new, &
391  context=work_redistribute%blacs_env_new, &
392  nrow_global=rdinfo%matrix_order, ncol_global=rdinfo%matrix_order, &
393  ncol_block=ncol_block, nrow_block=nrow_block)
394  ELSE
395  CALL cp_fm_struct_create(fmstruct=fm_struct_new, &
396  para_env=work_redistribute%para_env_new, &
397  context=work_redistribute%blacs_env_new, &
398  nrow_global=rdinfo%matrix_order, ncol_global=rdinfo%matrix_order, &
399  ncol_block=ncol_block, nrow_block=nrow_block, force_block=.true.)
400  END IF
401  CALL cp_fm_create(matrix_new, matrix_struct=fm_struct_new, name="yevd_new_mat")
402  CALL cp_fm_create(eigenvectors_new, matrix_struct=fm_struct_new, name="yevd_new_vec")
403  CALL cp_fm_struct_release(fm_struct_new)
404 
405  ! redistribute old
406  CALL pdgemr2d(rdinfo%matrix_order, rdinfo%matrix_order, matrix%local_data(1, 1), 1, 1, &
407  matrix%matrix_struct%descriptor, &
408  matrix_new%local_data(1, 1), 1, 1, matrix_new%matrix_struct%descriptor, &
409  matrix%matrix_struct%context)
410  ELSE
411  ! these tasks must help redistribute (they own part of the data),
412  ! but need fake 'new' data, and their descriptor must indicate this with -1
413  ! see also scalapack comments on pdgemr2d
414  fake_descriptor = -1
415  CALL pdgemr2d(rdinfo%matrix_order, rdinfo%matrix_order, matrix%local_data(1, 1), 1, 1, &
416  matrix%matrix_struct%descriptor, &
417  fake_local_data(1, 1), 1, 1, fake_descriptor, &
418  matrix%matrix_struct%context)
419  END IF
420  ELSE
421  ! No need to redistribute, just return pointers to the original arrays
422  matrix_new = matrix
423  eigenvectors_new = eigenvectors
424  END IF
425 
426  IF (PRESENT(redist_info)) &
427  redist_info = rdinfo
428 #else
429 
430  mark_used(matrix)
431  mark_used(eigenvectors)
432  mark_used(matrix_new)
433  mark_used(eigenvectors_new)
434  mark_used(redist_info)
435  cpabort("Routine called in non-parallel case.")
436 #endif
437 
438  CALL timestop(handle)
439 
440  END SUBROUTINE cp_fm_redistribute_start
441 
442 ! **************************************************************************************************
443 !> \brief Redistributes eigenvectors and eigenvalues back to the original communicator group
444 !> \param matrix the input cp_fm_type matrix to be diagonalized
445 !> \param eigenvectors the cp_fm_type matrix that will hold the eigenvectors of the input matrix
446 !> \param eig global array holding the eigenvalues of the input matrixmatrix
447 !> \param matrix_new the redistributed input matrix which will subsequently be diagonalized,
448 !> or a pointer to the original matrix if no redistribution is required
449 !> \param eigenvectors_new the redistributed eigenvectors matrix, or a pointer to the original
450 !> matrix if no redistribution is required
451 !> \par History
452 !> - [01.2018] created by moving redistribution related code from cp_fm_syevd here
453 !> \author Nico Holmberg [01.2018]
454 ! **************************************************************************************************
455  SUBROUTINE cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
456 
457  TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
458  REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: eig
459  TYPE(cp_fm_type), INTENT(INOUT) :: matrix_new, eigenvectors_new
460 
461  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_redistribute_end'
462 
463  INTEGER :: handle
464 #if defined(__SCALAPACK)
465  REAL(kind=dp) :: fake_local_data(1, 1)
466  INTEGER :: fake_descriptor(9), mepos_old, n
467  TYPE(mp_para_env_type), POINTER :: para_env
468 #endif
469 
470  CALL timeset(routinen, handle)
471 
472 #if defined(__SCALAPACK)
473 
474  ! Check if matrix was redistributed
475  IF (ASSOCIATED(work_redistribute%group_distribution)) THEN
476  n = matrix%matrix_struct%nrow_global
477  para_env => matrix%matrix_struct%para_env
478  mepos_old = para_env%mepos
479 
480  IF (work_redistribute%group_distribution(mepos_old) == 0) THEN
481  ! redistribute results on CPUs that hold the redistributed matrix
482  CALL pdgemr2d(n, n, eigenvectors_new%local_data(1, 1), 1, 1, eigenvectors_new%matrix_struct%descriptor, &
483  eigenvectors%local_data(1, 1), 1, 1, eigenvectors%matrix_struct%descriptor, &
484  eigenvectors%matrix_struct%context)
485  CALL cp_fm_release(matrix_new)
486  CALL cp_fm_release(eigenvectors_new)
487  ELSE
488  ! these tasks must help redistribute (they own part of the data),
489  ! but need fake 'new' data, and their descriptor must indicate this with -1
490  ! see also scalapack comments on pdgemr2d
491  fake_descriptor = -1
492  CALL pdgemr2d(n, n, fake_local_data(1, 1), 1, 1, fake_descriptor, &
493  eigenvectors%local_data(1, 1), 1, 1, eigenvectors%matrix_struct%descriptor, &
494  eigenvectors%matrix_struct%context)
495  END IF
496  ! free work
497  CALL cp_fm_redistribute_work_finalize(work_redistribute%group_distribution(mepos_old) == 0)
498 
499  ! finally, also the eigenvalues need to end up on the non-group member tasks
500  CALL para_env%bcast(eig, 0)
501  END IF
502 
503 #else
504 
505  mark_used(matrix)
506  mark_used(eigenvectors)
507  mark_used(eig)
508  mark_used(matrix_new)
509  mark_used(eigenvectors_new)
510  cpabort("Routine called in non-parallel case.")
511 #endif
512 
513  CALL timestop(handle)
514 
515  END SUBROUTINE cp_fm_redistribute_end
516 
517 END MODULE cp_fm_diag_utils
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Definition: grid_common.h:117
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
Definition: cp_blacs_env.F:282
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Definition: cp_blacs_env.F:123
Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization....
subroutine, public cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
Redistributes eigenvectors and eigenvalues back to the original communicator group.
subroutine, public cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, caller_is_elpa, redist_info)
Determines the optimal number of CPUs for matrix diagonalization and redistributes the input matrices...
subroutine, public cp_fm_redistribute_init(a, x, should_print, elpa_force_redistribute)
Initializes the parameters that determine how to calculate the optimal number of CPUs for diagonalizi...
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
Definition: cp_fm_struct.F:125
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition: cp_fm_struct.F:320
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
Definition: cp_fm_types.F:1016
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
Collection of simple mathematical functions and subroutines.
Definition: mathlib.F:15
elemental integer function, public gcd(a, b)
computes the greatest common divisor of two number
Definition: mathlib.F:1299
Interface to the message passing library MPI.