dc/de6/cp__fm__diag__utils_8F_source.html

!--------------------------------------------------------------------------------------------------!

!   CP2K: A general program to perform molecular dynamics simulations                              !

!   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !

!                                                                                                  !

!   SPDX-License-Identifier: GPL-2.0-or-later                                                      !

!--------------------------------------------------------------------------------------------------!


! **************************************************************************************************

!> \brief Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization.

!>        Heuristics are used to determine the optimal number of CPUs for diagonalization and the

!>        input matrices are redistributed if necessary

!> \par History

!>      - [01.2018] moved redistribution related code from cp_fm_syevd here

!> \author Nico Holmberg [01.2018]

! **************************************************************************************************

MODULE cp_fm_diag_utils

   USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&

                                              cp_blacs_env_release,&

                                              cp_blacs_env_type

   USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&

                                              cp_fm_struct_release,&

                                              cp_fm_struct_type

   USE cp_fm_types,                     ONLY: cp_fm_create,&

                                              cp_fm_get_info,&

                                              cp_fm_release,&

                                              cp_fm_type

   USE cp_log_handling,                 ONLY: cp_get_default_logger,&

                                              cp_logger_get_default_io_unit,&

                                              cp_logger_type

   USE kinds,                           ONLY: dp

   USE mathlib,                         ONLY: gcd

   USE message_passing,                 ONLY: mp_para_env_type

#include "../base/base_uses.f90"


   IMPLICIT NONE


   PRIVATE


   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag_utils'


   ! Information on redistribution


   TYPE, PUBLIC :: cp_fm_redistribute_info

      INTEGER :: matrix_order = -1

      INTEGER :: num_pe_old = -1 ! number of processes before a potential redistribute

      INTEGER :: num_pe_new = -1 ! number of processes after a potential redistribute

      INTEGER :: num_pe_opt = -1 ! optimal number of processes for the given matrix

      INTEGER :: num_pe_max_nz_col = -1 ! the maximal number of processes s.t. no column has zero width, may be < 0 if ignored

      LOGICAL :: redistribute = .false. ! whether or not the matrix was actually redistributed

   CONTAINS

      PROCEDURE, pass(self) :: write => cp_fm_redistribute_info_write

   END TYPE


   ! Container for redistribution settings and temporary work structs

   TYPE cp_fm_redistribute_type

      ! Settings

      INTEGER                                  :: a = -1, x = -1

      LOGICAL                                  :: should_print = .false.

      LOGICAL                                  :: elpa_force_redistribute = .false.

      ! Temporaries

      INTEGER, DIMENSION(:), POINTER           :: group_distribution => null(), &

                                                  group_partition => null()

      TYPE(cp_blacs_env_type), POINTER         :: blacs_env_new => null()

      TYPE(mp_para_env_type), POINTER          :: para_env_new => null()

   END TYPE cp_fm_redistribute_type


   ! Permanent instance of the redistribute type

   TYPE(cp_fm_redistribute_type), PRIVATE, &

      SAVE                                     :: work_redistribute


   ! Public subroutines


   PUBLIC :: cp_fm_redistribute_start, &

             cp_fm_redistribute_end, &

             cp_fm_redistribute_init


CONTAINS


! **************************************************************************************************

!> \brief Write the redistribute info nicely formatted to the given I/O unit

!> \param self reference to the cp_fm_redistribute_info instance

!> \param io_unit I/O unit to use for writing

! **************************************************************************************************

   SUBROUTINE cp_fm_redistribute_info_write(self, io_unit)

      CLASS(cp_fm_redistribute_info), INTENT(IN) :: self

      INTEGER, INTENT(IN) :: io_unit


      WRITE (unit=io_unit, fmt="(A)") ""

      WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &

         "CP_FM_DIAG| Number of processes over which the matrix is distributed ", self%num_pe_old, &

         "CP_FM_DIAG| Matrix order ", self%matrix_order

      WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &

         "CP_FM_DIAG| Optimal number of CPUs ", self%num_pe_opt

      IF (self%num_pe_max_nz_col < 0) THEN

         WRITE (unit=io_unit, fmt="(T2,A,T71,A10)") &

            "CP_FM_DIAG| Maximum number of CPUs (with non-zero columns) ", "<N/A>"

      ELSE

         WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &

            "CP_FM_DIAG| Maximum number of CPUs (with non-zero columns): ", self%num_pe_max_nz_col

      END IF

      IF (self%redistribute) THEN

         WRITE (unit=io_unit, fmt="(T2,A,T71,I10)") &

            "CP_FM_DIAG| Number of processes for the redistribution ", self%num_pe_new

      ELSE

         WRITE (unit=io_unit, fmt="(T2,A)") &

            "CP_FM_DIAG| The matrix will NOT be redistributed"

      END IF

      WRITE (unit=io_unit, fmt="(A)") ""


   END SUBROUTINE cp_fm_redistribute_info_write


! **************************************************************************************************

!> \brief  Releases the temporary storage needed when redistributing arrays

!> \param  has_redistributed flag that determines if the processors holds a part of the

!>                           redistributed array

!> \author Nico Holmberg [01.2018]

! **************************************************************************************************

   SUBROUTINE cp_fm_redistribute_work_finalize(has_redistributed)

      LOGICAL, INTENT(IN)                                :: has_redistributed


      IF (ASSOCIATED(work_redistribute%group_distribution)) THEN

         IF (has_redistributed) THEN

            CALL cp_blacs_env_release(work_redistribute%blacs_env_new)

         END IF

         CALL work_redistribute%para_env_new%free()

         DEALLOCATE (work_redistribute%para_env_new)

         DEALLOCATE (work_redistribute%group_distribution)

         DEALLOCATE (work_redistribute%group_partition)

      END IF

      ! Return work to its initial state

      work_redistribute = cp_fm_redistribute_type()


   END SUBROUTINE cp_fm_redistribute_work_finalize


! **************************************************************************************************

!> \brief  Initializes the parameters that determine how to calculate the optimal number of CPUs

!>         for diagonalizing a matrix. The parameters are read from the GLOBAL input section.

!> \param a                integer parameter used to define the rule for determining the optimal

!>                         number of CPUs for diagonalization

!> \param x                integer parameter used to define the rule for determining the optimal

!>                         number of CPUs for diagonalization

!> \param should_print     flag that determines if information about the redistribution process

!>                         should be printed

!> \param elpa_force_redistribute  flag that if redistribution should always be performed when

!>                                 the ELPA diagonalization library is in use

!> \author Nico Holmberg [01.2018]

! **************************************************************************************************


   SUBROUTINE cp_fm_redistribute_init(a, x, should_print, elpa_force_redistribute)

      INTEGER, INTENT(IN)                                :: a, x

      LOGICAL, INTENT(IN)                                :: should_print, elpa_force_redistribute


      work_redistribute%a = a

      work_redistribute%x = x

      work_redistribute%should_print = should_print

      work_redistribute%elpa_force_redistribute = elpa_force_redistribute

      ! Init work

      work_redistribute = cp_fm_redistribute_type()


   END SUBROUTINE cp_fm_redistribute_init


! **************************************************************************************************

!> \brief  Calculates the optimal number of CPUs for diagonalizing a matrix.

!> \param  size  the size of the diagonalized matrix

!> \return the optimal number of CPUs

!> \author Nico Holmberg [01.2018]

! **************************************************************************************************

   PURE FUNCTION cp_fm_diag_get_optimal_ncpu(size) RESULT(ncpu)

      INTEGER, INTENT(IN)                                :: size

      INTEGER                                            :: ncpu


      ncpu = ((size + work_redistribute%a*work_redistribute%x - 1)/ &

              (work_redistribute%a*work_redistribute%x))*work_redistribute%a


   END FUNCTION cp_fm_diag_get_optimal_ncpu


#if defined(__SCALAPACK)

! **************************************************************************************************

!> \brief  Determines the largest number of CPUs a matrix can be distributed on without any of the

!>         processors getting a zero-width column (currently only needed for ELPA).

!> \param  matrix the matrix that will be diagonalized

!> \return the maximum number of CPUs for ELPA

!> \author Nico Holmberg [01.2018]

! **************************************************************************************************

   FUNCTION cp_fm_max_ncpu_non_zero_column(matrix) RESULT(ncpu)

      TYPE(cp_fm_type), INTENT(IN)                       :: matrix

      INTEGER                                            :: ncpu


      INTEGER                                            :: gcd_max, ipe, jpe, ncol_block, &

                                                            ncol_global, npcol, nrow_block, &

                                                            nrow_global, num_pe_old, nzero

      INTEGER, DIMENSION(:), POINTER                     :: ncol_locals

      INTEGER, EXTERNAL                                  :: numroc


      NULLIFY (ncol_locals)

      ! First check if there are any zero width columns in current layout

      CALL cp_fm_get_info(matrix, ncol_locals=ncol_locals, &

                          nrow_global=nrow_global, ncol_global=ncol_global, &

                          nrow_block=nrow_block, ncol_block=ncol_block)

      nzero = count(ncol_locals == 0)

      num_pe_old = matrix%matrix_struct%para_env%num_pe

      ncpu = num_pe_old - nzero


      ! Avoid layouts with odd number of CPUs (blacs grid layout will be square)

      IF (ncpu > 2) &

         ncpu = ncpu - modulo(ncpu, 2)


      ! if there are no zero-width columns and the number of processors was even, leave it at that

      IF (ncpu == num_pe_old) &

         RETURN


      ! Iteratively search for the maximum number of CPUs for ELPA

      ! On each step, we test whether the blacs grid created with ncpu processes

      ! contains any columns with zero width

      DO WHILE (ncpu > 1)

         ! Determine layout of new blacs grid with ncpu CPUs

         ! (snippet copied from cp_blacs_env.F:cp_blacs_env_create)

         gcd_max = -1

         DO ipe = 1, ceiling(sqrt(real(ncpu, dp)))

            jpe = ncpu/ipe

            IF (ipe*jpe .NE. ncpu) &

               cycle

            IF (gcd(ipe, jpe) >= gcd_max) THEN

               npcol = jpe

               gcd_max = gcd(ipe, jpe)

            END IF

         END DO


         ! Count the number of processors without any columns

         ! (snippet copied from cp_fm_struct.F:cp_fm_struct_create)

         nzero = 0

         DO ipe = 0, npcol - 1

            IF (numroc(ncol_global, ncol_block, ipe, 0, npcol) == 0) &

               nzero = nzero + 1

         END DO


         IF (nzero == 0) &

            EXIT


         ncpu = ncpu - nzero


         IF (ncpu > 2) &

            ncpu = ncpu - modulo(ncpu, 2)

      END DO


   END FUNCTION cp_fm_max_ncpu_non_zero_column

#endif


! **************************************************************************************************

!> \brief   Determines the optimal number of CPUs for matrix diagonalization and redistributes

!>          the input matrices if necessary

!> \param matrix           the input cp_fm_type matrix to be diagonalized

!> \param eigenvectors     the cp_fm_type matrix that will hold the eigenvectors of the input matrix

!> \param matrix_new       the redistributed input matrix which will subsequently be diagonalized,

!>                         or a pointer to the original matrix if no redistribution is required

!> \param eigenvectors_new the redistributed eigenvectors matrix, or a pointer to the original

!>                         matrix if no redistribution is required

!> \param caller_is_elpa   flag that determines if ELPA is used for diagonalization

!> \param redist_info      get info about the redistribution

!> \par History

!>      - [01.2018] created by moving redistribution related code from cp_fm_syevd here

!> \author Nico Holmberg [01.2018]

! **************************************************************************************************


   SUBROUTINE cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &

                                       caller_is_elpa, redist_info)


      TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors

      TYPE(cp_fm_type), INTENT(OUT)            :: matrix_new, eigenvectors_new

      LOGICAL, OPTIONAL, INTENT(IN)            :: caller_is_elpa


      CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_redistribute_start'


      INTEGER                                  :: handle

      LOGICAL                                  :: is_elpa

      TYPE(cp_fm_redistribute_info), OPTIONAL, INTENT(OUT) :: redist_info


#if defined(__SCALAPACK)

      REAL(kind=dp)                            :: fake_local_data(1, 1)

      INTEGER                                  :: fake_descriptor(9), mepos_old, &

                                                  io_unit, ngroups, ncol_block, blksize, nrow_block

      TYPE(cp_fm_struct_type), POINTER         :: fm_struct_new

      TYPE(mp_para_env_type), POINTER          :: para_env

      TYPE(cp_logger_type), POINTER            :: logger

      TYPE(cp_fm_redistribute_info)          :: rdinfo

#endif


      CALL timeset(routinen, handle)

      is_elpa = .false.

      IF (PRESENT(caller_is_elpa)) THEN

#if defined(__ELPA)

         is_elpa = caller_is_elpa

#else

         cpabort("CP2K compiled without the ELPA library.")

#endif

      END IF


#if defined(__SCALAPACK)


      logger => cp_get_default_logger()

      io_unit = cp_logger_get_default_io_unit(logger)


      ! first figure out the optimal number of cpus

      ! this is pure heuristics, the defaults are based on rosa timings

      ! that demonstrate that timings go up sharply if too many tasks are used

      ! we take a multiple of 4, and approximately n/60

      para_env => matrix%matrix_struct%para_env

      mepos_old = para_env%mepos

      ncol_block = -1 ! normally we also want to adjust the block size according to the optimal # of CPUs

      nrow_block = -1

      blksize = -1


      rdinfo%matrix_order = matrix%matrix_struct%nrow_global

      rdinfo%num_pe_old = para_env%num_pe

      rdinfo%num_pe_opt = cp_fm_diag_get_optimal_ncpu(rdinfo%matrix_order)

      rdinfo%num_pe_new = rdinfo%num_pe_opt

      rdinfo%num_pe_max_nz_col = -1

      rdinfo%redistribute = .false.


      IF (is_elpa) THEN

         ! with ELPA we don't have to redistribute if not necessary (scales, unlike ScaLAPACK)

         rdinfo%num_pe_new = rdinfo%num_pe_old


         ! BUT: Diagonalization with ELPA fails when a processor column has zero width

         ! Determine the maximum number of CPUs the matrix can be distributed without zero-width columns

         ! for the current block size.

         rdinfo%num_pe_max_nz_col = cp_fm_max_ncpu_non_zero_column(matrix)


         ! if the user wants to redistribute to the ScaLAPACK optimal number of CPUs anyway, let him if it's safe.

         IF (work_redistribute%elpa_force_redistribute .AND. rdinfo%num_pe_opt < rdinfo%num_pe_max_nz_col) THEN

            ! Use heuristics to determine the need for redistribution (when num_pe_opt is smaller than the safe maximum)

            ! in this case we can also take the block size used for ScaLAPACK

            rdinfo%num_pe_new = rdinfo%num_pe_opt

         ELSE IF (rdinfo%num_pe_old > rdinfo%num_pe_max_nz_col) THEN

            ! Otherwise, only redistribute if we have to

            rdinfo%num_pe_new = rdinfo%num_pe_max_nz_col

            ! do NOT let cp_fm_struct_create automatically adjust the block size because the

            ! calculated number of processors such that no block has 0 columns wouldn't match (see #578):

            ! if the automatically chosen block size is larger than the present one we would still end

            ! up with empty processors

         END IF


         CALL cp_fm_get_info(matrix, ncol_block=ncol_block, nrow_block=nrow_block)


         ! On GPUs, ELPA requires the block size to be a power of 2

         blksize = 1

         DO WHILE (2*blksize <= min(nrow_block, ncol_block))

            blksize = blksize*2

         END DO

         nrow_block = blksize

         ncol_block = blksize

      END IF


      ! finally, only redistribute if we're going to use less CPUs than before or changed the block size

      rdinfo%redistribute = (rdinfo%num_pe_old > rdinfo%num_pe_new) .OR. (blksize >= 0 .AND. &

                                   ((blksize /= matrix%matrix_struct%ncol_block) .OR. (blksize /= matrix%matrix_struct%nrow_block)))


      IF (work_redistribute%should_print .AND. io_unit > 0) THEN

         IF (is_elpa) THEN

            IF (work_redistribute%elpa_force_redistribute) THEN

               WRITE (unit=io_unit, fmt="(T2,A,T78,A3)") &

                  "CP_FM_DIAG| Force redistribute (ELPA):", "YES"

            ELSE

               WRITE (unit=io_unit, fmt="(T2,A,T79,A2)") &

                  "CP_FM_DIAG| Force redistribute (ELPA):", "NO"

            END IF

         END IF

         CALL rdinfo%write(io_unit)

      END IF

      CALL para_env%sync()


      ! if the optimal is smaller than num_pe, we will redistribute the input matrix

      IF (rdinfo%redistribute) THEN

         ! split comm, the first num_pe_new tasks will do the work

         ALLOCATE (work_redistribute%group_distribution(0:rdinfo%num_pe_old - 1))

         ALLOCATE (work_redistribute%group_partition(0:1))

         work_redistribute%group_partition = (/rdinfo%num_pe_new, rdinfo%num_pe_old - rdinfo%num_pe_new/)

         ALLOCATE (work_redistribute%para_env_new)

         CALL work_redistribute%para_env_new%from_split( &

            comm=para_env, ngroups=ngroups, group_distribution=work_redistribute%group_distribution, &

            n_subgroups=2, group_partition=work_redistribute%group_partition)


         IF (work_redistribute%group_distribution(mepos_old) == 0) THEN


            ! create blacs, should inherit the preferences for the layout and so on, from the higher level

            NULLIFY (work_redistribute%blacs_env_new)

            CALL cp_blacs_env_create(blacs_env=work_redistribute%blacs_env_new, para_env=work_redistribute%para_env_new)


            ! create new matrix

            NULLIFY (fm_struct_new)

            IF (nrow_block == -1 .OR. ncol_block == -1) THEN

               CALL cp_fm_struct_create(fmstruct=fm_struct_new, &

                                        para_env=work_redistribute%para_env_new, &

                                        context=work_redistribute%blacs_env_new, &

                                        nrow_global=rdinfo%matrix_order, ncol_global=rdinfo%matrix_order, &

                                        ncol_block=ncol_block, nrow_block=nrow_block)

            ELSE

               CALL cp_fm_struct_create(fmstruct=fm_struct_new, &

                                        para_env=work_redistribute%para_env_new, &

                                        context=work_redistribute%blacs_env_new, &

                                        nrow_global=rdinfo%matrix_order, ncol_global=rdinfo%matrix_order, &

                                        ncol_block=ncol_block, nrow_block=nrow_block, force_block=.true.)

            END IF

            CALL cp_fm_create(matrix_new, matrix_struct=fm_struct_new, name="yevd_new_mat")

            CALL cp_fm_create(eigenvectors_new, matrix_struct=fm_struct_new, name="yevd_new_vec")

            CALL cp_fm_struct_release(fm_struct_new)


            ! redistribute old

            CALL pdgemr2d(rdinfo%matrix_order, rdinfo%matrix_order, matrix%local_data(1, 1), 1, 1, &

                          matrix%matrix_struct%descriptor, &

                          matrix_new%local_data(1, 1), 1, 1, matrix_new%matrix_struct%descriptor, &

                          matrix%matrix_struct%context)

         ELSE

            ! these tasks must help redistribute (they own part of the data),

            ! but need fake 'new' data, and their descriptor must indicate this with -1

            ! see also scalapack comments on pdgemr2d

            fake_descriptor = -1

            CALL pdgemr2d(rdinfo%matrix_order, rdinfo%matrix_order, matrix%local_data(1, 1), 1, 1, &

                          matrix%matrix_struct%descriptor, &

                          fake_local_data(1, 1), 1, 1, fake_descriptor, &

                          matrix%matrix_struct%context)

         END IF

      ELSE

         ! No need to redistribute, just return pointers to the original arrays

         matrix_new = matrix

         eigenvectors_new = eigenvectors

      END IF


      IF (PRESENT(redist_info)) &

         redist_info = rdinfo

#else


      mark_used(matrix)

      mark_used(eigenvectors)

      mark_used(matrix_new)

      mark_used(eigenvectors_new)

      mark_used(redist_info)

      cpabort("Routine called in non-parallel case.")

#endif


      CALL timestop(handle)


   END SUBROUTINE cp_fm_redistribute_start


! **************************************************************************************************

!> \brief Redistributes eigenvectors and eigenvalues  back to the original communicator group

!> \param matrix           the input cp_fm_type matrix to be diagonalized

!> \param eigenvectors     the cp_fm_type matrix that will hold the eigenvectors of the input matrix

!> \param eig              global array holding the eigenvalues of the input matrixmatrix

!> \param matrix_new       the redistributed input matrix which will subsequently be diagonalized,

!>                         or a pointer to the original matrix if no redistribution is required

!> \param eigenvectors_new the redistributed eigenvectors matrix, or a pointer to the original

!>                         matrix if no redistribution is required

!> \par History

!>      - [01.2018] created by moving redistribution related code from cp_fm_syevd here

!> \author Nico Holmberg [01.2018]

! **************************************************************************************************


   SUBROUTINE cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)


      TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors

      REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: eig

      TYPE(cp_fm_type), INTENT(INOUT)          :: matrix_new, eigenvectors_new


      CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_redistribute_end'


      INTEGER                                  :: handle

#if defined(__SCALAPACK)

      REAL(kind=dp)                            :: fake_local_data(1, 1)

      INTEGER                                  :: fake_descriptor(9), mepos_old, n

      TYPE(mp_para_env_type), POINTER          :: para_env

#endif


      CALL timeset(routinen, handle)


#if defined(__SCALAPACK)


      ! Check if matrix was redistributed

      IF (ASSOCIATED(work_redistribute%group_distribution)) THEN

         n = matrix%matrix_struct%nrow_global

         para_env => matrix%matrix_struct%para_env

         mepos_old = para_env%mepos


         IF (work_redistribute%group_distribution(mepos_old) == 0) THEN

            ! redistribute results on CPUs that hold the redistributed matrix

            CALL pdgemr2d(n, n, eigenvectors_new%local_data(1, 1), 1, 1, eigenvectors_new%matrix_struct%descriptor, &

                          eigenvectors%local_data(1, 1), 1, 1, eigenvectors%matrix_struct%descriptor, &

                          eigenvectors%matrix_struct%context)

            CALL cp_fm_release(matrix_new)

            CALL cp_fm_release(eigenvectors_new)

         ELSE

            ! these tasks must help redistribute (they own part of the data),

            ! but need fake 'new' data, and their descriptor must indicate this with -1

            ! see also scalapack comments on pdgemr2d

            fake_descriptor = -1

            CALL pdgemr2d(n, n, fake_local_data(1, 1), 1, 1, fake_descriptor, &

                          eigenvectors%local_data(1, 1), 1, 1, eigenvectors%matrix_struct%descriptor, &

                          eigenvectors%matrix_struct%context)

         END IF

         ! free work

         CALL cp_fm_redistribute_work_finalize(work_redistribute%group_distribution(mepos_old) == 0)


         ! finally, also the eigenvalues need to end up on the non-group member tasks

         CALL para_env%bcast(eig, 0)

      END IF


#else


      mark_used(matrix)

      mark_used(eigenvectors)

      mark_used(eig)

      mark_used(matrix_new)

      mark_used(eigenvectors_new)

      cpabort("Routine called in non-parallel case.")

#endif


      CALL timestop(handle)


   END SUBROUTINE cp_fm_redistribute_end


END MODULE cp_fm_diag_utils

modulo
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Definition grid_common.h:117

cp_fm_types::cp_fm_release
Definition cp_fm_types.F:90

cp_blacs_env
methods related to the blacs parallel environment
Definition cp_blacs_env.F:15

cp_blacs_env::cp_blacs_env_release
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
Definition cp_blacs_env.F:282

cp_blacs_env::cp_blacs_env_create
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Definition cp_blacs_env.F:123

cp_fm_diag_utils
Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization....
Definition cp_fm_diag_utils.F:16

cp_fm_diag_utils::cp_fm_redistribute_end
subroutine, public cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
Redistributes eigenvectors and eigenvalues back to the original communicator group.
Definition cp_fm_diag_utils.F:456

cp_fm_diag_utils::cp_fm_redistribute_start
subroutine, public cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, caller_is_elpa, redist_info)
Determines the optimal number of CPUs for matrix diagonalization and redistributes the input matrices...
Definition cp_fm_diag_utils.F:264

cp_fm_diag_utils::cp_fm_redistribute_init
subroutine, public cp_fm_redistribute_init(a, x, should_print, elpa_force_redistribute)
Initializes the parameters that determine how to calculate the optimal number of CPUs for diagonalizi...
Definition cp_fm_diag_utils.F:148

cp_fm_struct
represent the structure of a full matrix
Definition cp_fm_struct.F:14

cp_fm_struct::cp_fm_struct_create
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
Definition cp_fm_struct.F:125

cp_fm_struct::cp_fm_struct_release
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition cp_fm_struct.F:320

cp_fm_types
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15

cp_fm_types::cp_fm_get_info
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
Definition cp_fm_types.F:1016

cp_fm_types::cp_fm_create
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition cp_fm_types.F:167

cp_log_handling
various routines to log and control the output. The idea is that decisions about where to log should ...
Definition cp_log_handling.F:41

cp_log_handling::cp_logger_get_default_io_unit
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
Definition cp_log_handling.F:515

cp_log_handling::cp_get_default_logger
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Definition cp_log_handling.F:234

kinds
Defines the basic variable types.
Definition kinds.F:23

kinds::dp
integer, parameter, public dp
Definition kinds.F:34

mathlib
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15

mathlib::gcd
elemental integer function, public gcd(a, b)
computes the greatest common divisor of two number
Definition mathlib.F:1291

message_passing
Interface to the message passing library MPI.
Definition message_passing.F:23

cp_blacs_env::cp_blacs_env_type
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Definition cp_blacs_env.F:53

cp_fm_diag_utils::cp_fm_redistribute_info
Definition cp_fm_diag_utils.F:42

cp_fm_struct::cp_fm_struct_type
keeps the information about the structure of a full matrix
Definition cp_fm_struct.F:82

cp_fm_types::cp_fm_type
represent a full matrix
Definition cp_fm_types.F:116

cp_log_handling::cp_logger_type
type of a logger, at the moment it contains just a print level starting at which level it should be l...
Definition cp_log_handling.F:140

message_passing::mp_para_env_type
stores all the informations relevant to an mpi environment
Definition message_passing.F:763