d8/d66/arnoldi__methods_8F_source.html

!--------------------------------------------------------------------------------------------------!

!   CP2K: A general program to perform molecular dynamics simulations                              !

!   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !

!                                                                                                  !

!   SPDX-License-Identifier: GPL-2.0-or-later                                                      !

!--------------------------------------------------------------------------------------------------!


! **************************************************************************************************

!> \brief methods for arnoldi iteration

!> \par History

!>       2014.09 created [Florian Schiffmann]

!> \author Florian Schiffmann

! **************************************************************************************************


MODULE arnoldi_methods

   USE arnoldi_geev, ONLY: arnoldi_general_local_diag, &

                           arnoldi_symm_local_diag, &

                           arnoldi_tridiag_local_diag

   USE arnoldi_types, ONLY: &

      arnoldi_control_type, arnoldi_data_c_type, arnoldi_data_d_type, arnoldi_data_s_type, &

      arnoldi_data_type, arnoldi_data_z_type, get_control, get_data_c, get_data_d, get_data_s, &

      get_data_z, has_d_cmplx, has_d_real, m_x_v_vectors_type

   USE dbcsr_api, ONLY: &

      dbcsr_add, dbcsr_copy, dbcsr_get_data_p, dbcsr_get_info, dbcsr_iterator_blocks_left, &

      dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &

      dbcsr_p_type, dbcsr_scale, dbcsr_type

   USE dbcsr_vector, ONLY: dbcsr_matrix_colvec_multiply

   USE kinds, ONLY: real_8

   USE message_passing, ONLY: mp_comm_type

#include "../base/base_uses.f90"


   IMPLICIT NONE


   PRIVATE


   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'arnoldi_methods'


   PUBLIC :: arnoldi_init, build_subspace, compute_evals, arnoldi_iram, &

             gev_arnoldi_init, gev_build_subspace, gev_update_data


   INTERFACE convert_matrix

      MODULE PROCEDURE convert_matrix_z_to_d

      MODULE PROCEDURE convert_matrix_d_to_z

      MODULE PROCEDURE convert_matrix_z_to_z

   END INTERFACE


CONTAINS


! **************************************************************************************************

!> \brief Interface for the routine calcualting the implicit restarts

!>        Currently all based on lapack

!> \param arnoldi_data ...

! **************************************************************************************************


   SUBROUTINE arnoldi_iram(arnoldi_data)

      TYPE(arnoldi_data_type)                            :: arnoldi_data


      CHARACTER(LEN=*), PARAMETER :: routinen = 'arnoldi_iram'


      INTEGER                                            :: handle


      CALL timeset(routinen, handle)


      IF (has_d_real(arnoldi_data)) CALL arnoldi_iram_d(arnoldi_data)

      IF (has_d_cmplx(arnoldi_data)) CALL arnoldi_iram_z(arnoldi_data)


      CALL timestop(handle)


   END SUBROUTINE arnoldi_iram


! **************************************************************************************************

!> \brief Interface to compute the eigenvalues of a nonsymmetric matrix

!>        This is only the serial version

!> \param arnoldi_data ...

! **************************************************************************************************


   SUBROUTINE compute_evals(arnoldi_data)

      TYPE(arnoldi_data_type)                            :: arnoldi_data


      CHARACTER(LEN=*), PARAMETER :: routinen = 'compute_evals'


      INTEGER                                            :: handle


      CALL timeset(routinen, handle)


      IF (has_d_real(arnoldi_data)) CALL compute_evals_d(arnoldi_data)

      IF (has_d_cmplx(arnoldi_data)) CALL compute_evals_z(arnoldi_data)


      CALL timestop(handle)


   END SUBROUTINE compute_evals


! **************************************************************************************************

!> \brief Interface for the initialization of the arnoldi subspace creation

!>        currently it can only setup a random vector but can be improved to

!>        various types of restarts easily

!> \param matrix pointer to the matrices as described in main interface

!> \param vectors work vectors for the matrix vector multiplications

!> \param arnoldi_data all data concerning the subspace

! **************************************************************************************************


   SUBROUTINE arnoldi_init(matrix, vectors, arnoldi_data)

      TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix

      TYPE(m_x_v_vectors_type)                           :: vectors

      TYPE(arnoldi_data_type)                            :: arnoldi_data


      CHARACTER(LEN=*), PARAMETER :: routinen = 'arnoldi_init'


      INTEGER                                            :: handle


      CALL timeset(routinen, handle)


      IF (has_d_real(arnoldi_data)) CALL arnoldi_init_d(matrix, vectors, arnoldi_data)

      IF (has_d_cmplx(arnoldi_data)) CALL arnoldi_init_z(matrix, vectors, arnoldi_data)


      CALL timestop(handle)


   END SUBROUTINE arnoldi_init


! **************************************************************************************************

!> \brief Interface for the initialization of the arnoldi subspace creation

!>        for the generalized eigenvalue problem

!> \param matrix pointer to the matrices as described in main interface

!> \param matrix_arnoldi ...

!> \param vectors work vectors for the matrix vector multiplications

!> \param arnoldi_data all data concerning the subspace

! **************************************************************************************************


   SUBROUTINE gev_arnoldi_init(matrix, matrix_arnoldi, vectors, arnoldi_data)

      TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix, matrix_arnoldi

      TYPE(m_x_v_vectors_type)                           :: vectors

      TYPE(arnoldi_data_type)                            :: arnoldi_data


      CHARACTER(LEN=*), PARAMETER :: routinen = 'gev_arnoldi_init'


      INTEGER                                            :: handle


      CALL timeset(routinen, handle)


      IF (has_d_real(arnoldi_data)) CALL gev_arnoldi_init_d(matrix, matrix_arnoldi, vectors, arnoldi_data)

      IF (has_d_cmplx(arnoldi_data)) CALL gev_arnoldi_init_z(matrix, matrix_arnoldi, vectors, arnoldi_data)


      CALL timestop(handle)


   END SUBROUTINE gev_arnoldi_init


! **************************************************************************************************

!> \brief here the iterations are performed and the krylov space is constructed

!> \param matrix see above

!> \param vectors see above

!> \param arnoldi_data see above

! **************************************************************************************************


   SUBROUTINE build_subspace(matrix, vectors, arnoldi_data)

      TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix

      TYPE(m_x_v_vectors_type)                           :: vectors

      TYPE(arnoldi_data_type)                            :: arnoldi_data


      CHARACTER(LEN=*), PARAMETER :: routinen = 'build_subspace'


      INTEGER                                            :: handle


      CALL timeset(routinen, handle)


      IF (has_d_real(arnoldi_data)) CALL build_subspace_d(matrix, vectors, arnoldi_data)

      IF (has_d_cmplx(arnoldi_data)) CALL build_subspace_z(matrix, vectors, arnoldi_data)


      CALL timestop(handle)


   END SUBROUTINE build_subspace


! **************************************************************************************************

!> \brief here the iterations are performed and the krylov space for the generalized

!>        eigenvalue problem is created

!> \param matrix see above

!> \param vectors see above

!> \param arnoldi_data see above

! **************************************************************************************************


   SUBROUTINE gev_build_subspace(matrix, vectors, arnoldi_data)

      TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix

      TYPE(m_x_v_vectors_type)                           :: vectors

      TYPE(arnoldi_data_type)                            :: arnoldi_data


      CHARACTER(LEN=*), PARAMETER :: routinen = 'gev_build_subspace'


      INTEGER                                            :: handle


      CALL timeset(routinen, handle)


      IF (has_d_real(arnoldi_data)) CALL gev_build_subspace_d(matrix, vectors, arnoldi_data)

      IF (has_d_cmplx(arnoldi_data)) CALL gev_build_subspace_z(matrix, vectors, arnoldi_data)


      CALL timestop(handle)


   END SUBROUTINE gev_build_subspace


! **************************************************************************************************

!> \brief in the generalized eigenvalue the matrix depende on the projection

!>        therefore the outer loop has to build a new set of matrices for the

!>        inner loop

!> \param matrix see above

!> \param matrix_arnoldi ...

!> \param vectors ...

!> \param arnoldi_data see above

! **************************************************************************************************


   SUBROUTINE gev_update_data(matrix, matrix_arnoldi, vectors, arnoldi_data)

      TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix, matrix_arnoldi

      TYPE(m_x_v_vectors_type)                           :: vectors

      TYPE(arnoldi_data_type)                            :: arnoldi_data


      CHARACTER(LEN=*), PARAMETER :: routinen = 'gev_update_data'


      INTEGER                                            :: handle


      CALL timeset(routinen, handle)


      IF (has_d_real(arnoldi_data)) CALL gev_update_data_d(matrix, matrix_arnoldi, vectors, arnoldi_data)

      IF (has_d_cmplx(arnoldi_data)) CALL gev_update_data_z(matrix, matrix_arnoldi, vectors, arnoldi_data)


      CALL timestop(handle)


   END SUBROUTINE gev_update_data


! **************************************************************************************************

!> \brief ...

!> \param m_out ...

!> \param m_in ...

! **************************************************************************************************

   SUBROUTINE convert_matrix_z_to_d(m_out, m_in)

      REAL(real_8), DIMENSION(:, :)                      :: m_out

      COMPLEX(real_8), DIMENSION(:, :)                   :: m_in


      m_out(:, :) = real(m_in(:, :), kind=real_8)

   END SUBROUTINE convert_matrix_z_to_d


! **************************************************************************************************

!> \brief ...

!> \param m_out ...

!> \param m_in ...

! **************************************************************************************************

   SUBROUTINE convert_matrix_d_to_z(m_out, m_in)

      COMPLEX(real_8), DIMENSION(:, :)                   :: m_out

      REAL(real_8), DIMENSION(:, :)                      :: m_in


      m_out(:, :) = cmplx(m_in, 0.0, kind=real_8)

   END SUBROUTINE convert_matrix_d_to_z


! I kno that one is stupid but like this it simplifies the template

! **************************************************************************************************

!> \brief ...

!> \param m_out ...

!> \param m_in ...

! **************************************************************************************************

   SUBROUTINE convert_matrix_z_to_z(m_out, m_in)

      COMPLEX(real_8), DIMENSION(:, :)                   :: m_out, m_in


      m_out(:, :) = m_in

   END SUBROUTINE convert_matrix_z_to_z


! **************************************************************************************************

!> \brief Call the correct eigensolver, in the arnoldi method only the right

!>        eigenvectors are used. Lefts are created here but dumped immediately

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE compute_evals_d (arnoldi_data)

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         COMPLEX(real_8), DIMENSION(:, :), ALLOCATABLE   :: levec

         TYPE(arnoldi_data_d_type), POINTER            :: ar_data

         INTEGER                                  :: ndim

         TYPE(arnoldi_control_type), POINTER           :: control


         ar_data => get_data_d(arnoldi_data)

         control => get_control(arnoldi_data)

         ndim = control%current_step

         ALLOCATE (levec(ndim, ndim))


! Needs antoher interface as the calls to real and complex geev differ (sucks!)

! only perform the diagonalization on processors which hold data

         IF (control%generalized_ev) THEN

            CALL arnoldi_symm_local_diag('V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &

                                         ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim))

         ELSE

            IF (control%symmetric) THEN

               CALL arnoldi_tridiag_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &

                                               ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)

            ELSE

               CALL arnoldi_general_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &

                                               ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)

            END IF

         END IF


         DEALLOCATE (levec)


      END SUBROUTINE compute_evals_d


! **************************************************************************************************

!> \brief Initialization of the arnoldi vector. Here a random vector is used,

!>        might be generalized in the future

!> \param matrix ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE arnoldi_init_d (matrix, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(m_x_v_vectors_type)                :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         INTEGER                                  :: i, iseed(4), row_size, col_size, &

                                                     nrow_local, ncol_local, &

                                                     row, col

         REAL(real_8)                        :: rnorm

         TYPE(dbcsr_iterator_type)                     :: iter

         REAL(kind=real_8)                         :: norm

         REAL(kind=real_8), DIMENSION(:), ALLOCATABLE :: &

            v_vec, w_vec

         REAL(kind=real_8), DIMENSION(:), POINTER          :: data_vec

         LOGICAL                                  :: local_comp

         TYPE(arnoldi_data_d_type), POINTER            :: ar_data

         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(mp_comm_type)                       :: pcol_group


         control => get_control(arnoldi_data)

         pcol_group = control%pcol_group

         local_comp = control%local_comp


         ar_data => get_data_d(arnoldi_data)


         ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         ALLOCATE (v_vec(nrow_local))

         ALLOCATE (w_vec(nrow_local))

         v_vec = 0.0_real_8; w_vec = 0.0_real_8

         ar_data%Hessenberg = 0.0_real_8


         IF (control%has_initial_vector) THEN

            ! after calling the set routine the initial vector is stored in f_vec

            CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)

         ELSE

            ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)

            CALL dbcsr_iterator_start(iter, vectors%input_vec)

            DO WHILE (dbcsr_iterator_blocks_left(iter))

               CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)

               iseed(1) = 2; iseed(2) = mod(row, 4095); iseed(3) = mod(col, 4095); iseed(4) = 11

               CALL dlarnv(2, iseed, row_size*col_size, data_vec)

            END DO

            CALL dbcsr_iterator_stop(iter)

         END IF


         CALL transfer_dbcsr_to_local_array_d (vectors%input_vec, v_vec, nrow_local, control%local_comp)


         ! compute the vector norm of the random vectorm, get it real valued as well (rnorm)

         CALL compute_norms_d (v_vec, norm, rnorm, control%pcol_group)


         IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data

         CALL dbcsr_scale(vectors%input_vec, real(1.0, real_8)/rnorm)


         ! Everything prepared, initialize the Arnoldi iteration

         CALL transfer_dbcsr_to_local_array_d (vectors%input_vec, v_vec, nrow_local, control%local_comp)


         ! This permits to compute the subspace of a matrix which is a product of multiple matrices

         DO i = 1, SIZE(matrix)

            CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &

                                              0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)

            CALL dbcsr_copy(vectors%input_vec, vectors%result_vec)

         END DO


         CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, w_vec, nrow_local, control%local_comp)


         ! Put the projection into the Hessenberg matrix, and make the vectors orthonormal

         ar_data%Hessenberg(1, 1) = dot_product(v_vec, w_vec)

         CALL pcol_group%sum(ar_data%Hessenberg(1, 1))

         ar_data%f_vec = w_vec - v_vec*ar_data%Hessenberg(1, 1)


         ar_data%local_history(:, 1) = v_vec(:)


         ! We did the first step in here so we should set the current step for the subspace generation accordingly

         control%current_step = 1


         DEALLOCATE (v_vec, w_vec)


      END SUBROUTINE arnoldi_init_d


! **************************************************************************************************

!> \brief Alogorithm for the implicit restarts in the arnoldi method

!>        this is an early implementation which scales subspace size^4

!>        by replacing the lapack calls with direct math the

!>        QR and  gemms can be made linear and a N^2 sacling will be acchieved

!>        however this already sets the framework but should be used with care

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE arnoldi_iram_d (arnoldi_data)

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         TYPE(arnoldi_data_d_type), POINTER            :: ar_data

         TYPE(arnoldi_control_type), POINTER                     :: control

         COMPLEX(real_8), DIMENSION(:, :), ALLOCATABLE  :: tmp_mat, safe_mat, q, tmp_mat1

         COMPLEX(real_8), DIMENSION(:), ALLOCATABLE    :: work, tau, work_measure

         INTEGER                                   :: msize, lwork, i, j, info, nwant

         REAL(kind=real_8)                          :: beta, sigma

         REAL(kind=real_8), DIMENSION(:, :), ALLOCATABLE :: qdata


         ! This is just a terribly inefficient implementation but I hope it is correct and might serve as a reference

         ar_data => get_data_d(arnoldi_data)

         control => get_control(arnoldi_data)

         msize = control%current_step

         nwant = control%nval_out

         ALLOCATE (tmp_mat(msize, msize)); ALLOCATE (safe_mat(msize, msize))

         ALLOCATE (q(msize, msize)); ALLOCATE (tmp_mat1(msize, msize))

         ALLOCATE (work_measure(1))

         ALLOCATE (tau(msize)); ALLOCATE (qdata(msize, msize))

         !make Q identity

         q = cmplx(0.0, 0.0, real_8)

         DO i = 1, msize

            q(i, i) = cmplx(1.0, 0.0, real_8)

         END DO


         ! Looks a bit odd, but safe_mat will contain the result in the end, while tmpmat gets violated by lapack

         CALL convert_matrix(tmp_mat, ar_data%Hessenberg(1:msize, 1:msize))

         safe_mat(:, :) = tmp_mat(:, :)


         DO i = 1, msize

            ! A bit a strange check but in the end we only want to shift the unwanted evals

            IF (any(control%selected_ind == i)) cycle

            ! Here we shift the matrix by subtracting unwanted evals from the diagonal

            DO j = 1, msize

               tmp_mat(j, j) = tmp_mat(j, j) - ar_data%evals(i)

            END DO

            ! Now we repair the damage by QR factorizing

            lwork = -1

            CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work_measure, lwork, info)

            lwork = int(work_measure(1))

            IF (ALLOCATED(work)) THEN

               IF (SIZE(work) .LT. lwork) THEN

                  DEALLOCATE (work)

               END IF

            END IF

            IF (.NOT. ALLOCATED(work)) ALLOCATE (work(lwork))

            CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work, lwork, info)

            ! Ask Lapack to reconstruct Q from its own way of storing data (tmpmat will contain Q)

            CALL zungqr(msize, msize, msize, tmp_mat, msize, tau, work, lwork, info)

            ! update Q=Q*Q_current

            tmp_mat1(:, :) = q(:, :)

            CALL zgemm('N', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat1, msize, tmp_mat, msize, cmplx(0.0, 0.0,&

                & real_8), &

                               q, msize)

            ! Update H=(Q*)HQ

            CALL zgemm('C', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat, msize, safe_mat, msize, cmplx(0.0, 0.0,&

                & real_8), &

                               tmp_mat1, msize)

            CALL zgemm('N', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat1, msize, tmp_mat, msize, cmplx(0.0, 0.0,&

                & real_8), &

                               safe_mat, msize)


            ! this one is crucial for numerics not to accumulate noise in the subdiagonals

            DO j = 1, msize

               safe_mat(j + 2:msize, j) = cmplx(0.0, 0.0, real_8)

            END DO

            tmp_mat(:, :) = safe_mat(:, :)

         END DO


         ! Now we can compute our restart quantities

         ar_data%Hessenberg = 0.0_real_8

         CALL convert_matrix(ar_data%Hessenberg(1:msize, 1:msize), safe_mat)

         CALL convert_matrix(qdata, q)


         beta = ar_data%Hessenberg(nwant + 1, nwant); sigma = qdata(msize, nwant)


         !update the residuum and the basis vectors

         IF (control%local_comp) THEN

            ar_data%f_vec = matmul(ar_data%local_history(:, 1:msize), qdata(1:msize, nwant + 1))*beta + ar_data%f_vec(:)*sigma

            ar_data%local_history(:, 1:nwant) = matmul(ar_data%local_history(:, 1:msize), qdata(1:msize, 1:nwant))

         END IF

         ! Set the current step to nwant so the subspace build knows where to start

         control%current_step = nwant


         DEALLOCATE (tmp_mat, safe_mat, q, qdata, tmp_mat1, work, tau, work_measure)


      END SUBROUTINE arnoldi_iram_d


! **************************************************************************************************

!> \brief Here we create the Krylov subspace and fill the Hessenberg matrix

!>        convergence check is only performed on subspace convergence

!>        Gram Schidt is used to orthonogonalize.

!>        If this is numericall not sufficient a Daniel, Gragg, Kaufman and Steward

!>        correction is performed

!> \param matrix ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE build_subspace_d (matrix, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(m_x_v_vectors_type), TARGET        :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         INTEGER                                  :: i, j, ncol_local, nrow_local

         REAL(real_8)                        :: rnorm

         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(arnoldi_data_d_type), POINTER  :: ar_data

         REAL(kind=real_8)                         :: norm

         REAL(kind=real_8), ALLOCATABLE, DIMENSION(:)      :: h_vec, s_vec, v_vec, w_vec

         TYPE(dbcsr_type), POINTER                 :: input_vec, result_vec, swap_vec


         ar_data => get_data_d(arnoldi_data)

         control => get_control(arnoldi_data)

         control%converged = .false.


         ! create the vectors required during the iterations

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))

         v_vec = 0.0_real_8; w_vec = 0.0_real_8

         ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))


         DO j = control%current_step, control%max_iter - 1


            ! compute the vector norm of the residuum, get it real valued as well (rnorm)

            CALL compute_norms_d (ar_data%f_vec, norm, rnorm, control%pcol_group)


            ! check convergence and inform everybody about it, a bit annoying to talk to everybody because of that

            IF (control%myproc == 0) control%converged = rnorm .LT. real(control%threshold, real_8)

            CALL control%mp_group%bcast(control%converged, 0)

            IF (control%converged) EXIT


            ! transfer normalized residdum to history and its norm to the Hessenberg matrix

            IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data

            v_vec(:) = ar_data%f_vec(:)/rnorm; ar_data%local_history(:, j + 1) = v_vec(:); ar_data%Hessenberg(j + 1, j) = norm


            input_vec => vectors%input_vec

            result_vec => vectors%result_vec

            CALL transfer_local_array_to_dbcsr_d (input_vec, v_vec, nrow_local, control%local_comp)


            ! This permits to compute the subspace of a matrix which is a product of two matrices

            DO i = 1, SIZE(matrix)

               CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, input_vec, result_vec, 1.0_real_8, &

                                                 0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)

               swap_vec => input_vec

               input_vec => result_vec

               result_vec => swap_vec

            END DO


            CALL transfer_dbcsr_to_local_array_d (input_vec, w_vec, nrow_local, control%local_comp)


            ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme

            CALL gram_schmidt_ortho_d (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j + 1, &

                                               ar_data%local_history, ar_data%local_history, control%local_comp, control%pcol_group)


            ! A bit more expensive but simply always top up with a DGKS correction, otherwise numerics

            ! can become a problem later on, there is probably a good check whether it's necessary, but we don't perform it

            CALL dgks_ortho_d (h_vec, ar_data%f_vec, s_vec, nrow_local, j + 1, ar_data%local_history, &

                                           ar_data%local_history, control%local_comp, control%pcol_group)

            ! Finally we can put the projections into our Hessenberg matrix

            ar_data%Hessenberg(1:j + 1, j + 1) = h_vec(1:j + 1)

            control%current_step = j + 1

         END DO


         ! compute the vector norm of the final residuum and put it in to Hessenberg

         CALL compute_norms_d (ar_data%f_vec, norm, rnorm, control%pcol_group)

         ar_data%Hessenberg(control%current_step + 1, control%current_step) = norm


         ! broadcast the Hessenberg matrix so we don't need to care later on

         CALL control%mp_group%bcast(ar_data%Hessenberg, 0)


         DEALLOCATE (v_vec, w_vec, h_vec, s_vec)


      END SUBROUTINE build_subspace_d


! **************************************************************************************************

!> \brief Helper routine to transfer the all data of a dbcsr matrix to a local array

!> \param vec ...

!> \param array ...

!> \param n ...

!> \param is_local ...

! **************************************************************************************************

      SUBROUTINE transfer_dbcsr_to_local_array_d (vec, array, n, is_local)

         TYPE(dbcsr_type)                          :: vec

         REAL(kind=real_8), DIMENSION(:)           :: array

         INTEGER                                  :: n

         LOGICAL                                  :: is_local

         REAL(kind=real_8), DIMENSION(:), POINTER          :: data_vec


         data_vec => dbcsr_get_data_p(vec, select_data_type=0.0_real_8)

         IF (is_local) array(1:n) = data_vec(1:n)


      END SUBROUTINE transfer_dbcsr_to_local_array_d


! **************************************************************************************************

!> \brief The inverse routine transferring data back from an array to a dbcsr

!> \param vec ...

!> \param array ...

!> \param n ...

!> \param is_local ...

! **************************************************************************************************

      SUBROUTINE transfer_local_array_to_dbcsr_d (vec, array, n, is_local)

         TYPE(dbcsr_type)                          :: vec

         REAL(kind=real_8), DIMENSION(:)           :: array

         INTEGER                                  :: n

         LOGICAL                                  :: is_local

         REAL(kind=real_8), DIMENSION(:), POINTER          :: data_vec


         data_vec => dbcsr_get_data_p(vec, select_data_type=0.0_real_8)

         IF (is_local) data_vec(1:n) = array(1:n)


      END SUBROUTINE transfer_local_array_to_dbcsr_d


! **************************************************************************************************

!> \brief Gram-Schmidt in matrix vector form

!> \param h_vec ...

!> \param f_vec ...

!> \param s_vec ...

!> \param w_vec ...

!> \param nrow_local ...

!> \param j ...

!> \param local_history ...

!> \param reorth_mat ...

!> \param local_comp ...

!> \param pcol_group ...

! **************************************************************************************************

      SUBROUTINE gram_schmidt_ortho_d (h_vec, f_vec, s_vec, w_vec, nrow_local, &

                                                   j, local_history, reorth_mat, local_comp, pcol_group)

         REAL(kind=real_8), DIMENSION(:)      :: h_vec, s_vec, f_vec, w_vec

         REAL(kind=real_8), DIMENSION(:, :)    :: local_history, reorth_mat

         INTEGER                                          :: nrow_local, j

         TYPE(mp_comm_type), INTENT(IN)                   :: pcol_group

         LOGICAL                                          :: local_comp


         CHARACTER(LEN=*), PARAMETER :: routinen = 'Gram_Schmidt_ortho_d'

         INTEGER                                  :: handle


         CALL timeset(routinen, handle)


         ! Let's do the orthonormalization, first try the Gram Schmidt scheme

         h_vec = 0.0_real_8; f_vec = 0.0_real_8; s_vec = 0.0_real_8

         IF (local_comp) CALL dgemv('T', nrow_local, j, 1.0_real_8, local_history, &

                                                nrow_local, w_vec, 1, 0.0_real_8, h_vec, 1)

         CALL pcol_group%sum(h_vec(1:j))


         IF (local_comp) CALL dgemv('N', nrow_local, j, 1.0_real_8, reorth_mat, &

                                                nrow_local, h_vec, 1, 0.0_real_8, f_vec, 1)

         f_vec(:) = w_vec(:) - f_vec(:)


         CALL timestop(handle)


      END SUBROUTINE gram_schmidt_ortho_d


! **************************************************************************************************

!> \brief Compute the  Daniel, Gragg, Kaufman and Steward correction

!> \param h_vec ...

!> \param f_vec ...

!> \param s_vec ...

!> \param nrow_local ...

!> \param j ...

!> \param local_history ...

!> \param reorth_mat ...

!> \param local_comp ...

!> \param pcol_group ...

! **************************************************************************************************

      SUBROUTINE dgks_ortho_d (h_vec, f_vec, s_vec, nrow_local, j, &

                                           local_history, reorth_mat, local_comp, pcol_group)

         REAL(kind=real_8), DIMENSION(:)      :: h_vec, s_vec, f_vec

         REAL(kind=real_8), DIMENSION(:, :)    :: local_history, reorth_mat

         INTEGER                                          :: nrow_local, j

         TYPE(mp_comm_type), INTENT(IN)           :: pcol_group


         CHARACTER(LEN=*), PARAMETER :: routinen = 'DGKS_ortho_d'

         LOGICAL                                          :: local_comp

         INTEGER                                  :: handle


         CALL timeset(routinen, handle)


         IF (local_comp) CALL dgemv('T', nrow_local, j, 1.0_real_8, local_history, &

                                                nrow_local, f_vec, 1, 0.0_real_8, s_vec, 1)

         CALL pcol_group%sum(s_vec(1:j))

         IF (local_comp) CALL dgemv('N', nrow_local, j, -1.0_real_8, reorth_mat, &

                                                nrow_local, s_vec, 1, 1.0_real_8, f_vec, 1)

         h_vec(1:j) = h_vec(1:j) + s_vec(1:j)


         CALL timestop(handle)


      END SUBROUTINE dgks_ortho_d


! **************************************************************************************************

!> \brief Compute the norm of a vector distributed along proc_col

!>        as local arrays. Always return the real part next to the complex rep.

!> \param vec ...

!> \param norm ...

!> \param rnorm ...

!> \param pcol_group ...

! **************************************************************************************************

      SUBROUTINE compute_norms_d (vec, norm, rnorm, pcol_group)

         REAL(kind=real_8), DIMENSION(:)           :: vec

         REAL(real_8)                        :: rnorm

         REAL(kind=real_8)                         :: norm

         TYPE(mp_comm_type), INTENT(IN)             :: pcol_group


         ! the norm is computed along the processor column

         norm = dot_product(vec, vec)

         CALL pcol_group%sum(norm)

         rnorm = sqrt(real(norm, real_8))

         norm=rnorm


      END SUBROUTINE compute_norms_d


! **************************************************************************************************

!> \brief Computes the initial guess for the solution of the generalized eigenvalue

!>        using the arnoldi method

!> \param matrix ...

!> \param matrix_arnoldi ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************


      SUBROUTINE gev_arnoldi_init_d (matrix, matrix_arnoldi, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix_arnoldi

         TYPE(m_x_v_vectors_type)                :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         INTEGER                                  :: iseed(4), row_size, col_size, &

                                                     nrow_local, ncol_local, &

                                                     row, col

         REAL(real_8)                        :: rnorm

         TYPE(dbcsr_iterator_type)                     :: iter

         REAL(kind=real_8)                         :: norm, denom

         REAL(kind=real_8), DIMENSION(:), ALLOCATABLE :: &

            v_vec, w_vec

         REAL(kind=real_8), DIMENSION(:), POINTER          :: data_vec

         LOGICAL                                  :: local_comp

         TYPE(arnoldi_data_d_type), POINTER            :: ar_data

         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(mp_comm_type)                         :: pcol_group


         control => get_control(arnoldi_data)

         pcol_group = control%pcol_group

         local_comp = control%local_comp


         ar_data => get_data_d(arnoldi_data)


         ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         ALLOCATE (v_vec(nrow_local))

         ALLOCATE (w_vec(nrow_local))

         v_vec = 0.0_real_8; w_vec = 0.0_real_8

         ar_data%Hessenberg = 0.0_real_8


         IF (control%has_initial_vector) THEN

            ! after calling the set routine the initial vector is stored in f_vec

            CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)

         ELSE

            ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)

            CALL dbcsr_iterator_start(iter, vectors%input_vec)

            DO WHILE (dbcsr_iterator_blocks_left(iter))

               CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)

               iseed(1) = 2; iseed(2) = mod(row, 4095); iseed(3) = mod(col, 4095); iseed(4) = 11

               CALL dlarnv(2, iseed, row_size*col_size, data_vec)

            END DO

            CALL dbcsr_iterator_stop(iter)

         END IF


         CALL transfer_dbcsr_to_local_array_d (vectors%input_vec, v_vec, nrow_local, control%local_comp)


         ! compute the vector norm of the reandom vectorm, get it real valued as well (rnorm)

         CALL compute_norms_d (v_vec, norm, rnorm, control%pcol_group)


         IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data

         CALL dbcsr_scale(vectors%input_vec, real(1.0, real_8)/rnorm)


         CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &

                                           0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)


         CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, w_vec, nrow_local, control%local_comp)


         ar_data%rho_scale = 0.0_real_8

         ar_data%rho_scale = dot_product(v_vec, w_vec)

         CALL pcol_group%sum(ar_data%rho_scale)


         CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &

                                           0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)


         CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, w_vec, nrow_local, control%local_comp)


         denom = 0.0_real_8

         denom = dot_product(v_vec, w_vec)

         CALL pcol_group%sum(denom)

         IF (control%myproc == 0) ar_data%rho_scale = ar_data%rho_scale/denom

         CALL control%mp_group%bcast(ar_data%rho_scale, 0)


         ! if the maximum ev is requested we need to optimize with -A-rho*B

         CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)

         CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, 1.0_real_8, -ar_data%rho_scale)


         ar_data%x_vec = v_vec


      END SUBROUTINE gev_arnoldi_init_d


! **************************************************************************************************

!> \brief builds the basis rothogonal wrt. the metric.

!>        The structure looks similar to normal arnoldi but norms, vectors and

!>        matrix_vector products are very differently defined. Therefore it is

!>        cleaner to put it in a separate subroutine to avoid confusion

!> \param matrix ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE gev_build_subspace_d (matrix, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(m_x_v_vectors_type)                :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         INTEGER                                  :: j, ncol_local, nrow_local

         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(arnoldi_data_d_type), POINTER            :: ar_data

         REAL(kind=real_8)                         :: norm

         REAL(kind=real_8), ALLOCATABLE, DIMENSION(:)      :: h_vec, s_vec, v_vec, w_vec

         REAL(kind=real_8), ALLOCATABLE, DIMENSION(:, :)    :: zmat, czmat, bzmat

         TYPE(mp_comm_type)                           :: pcol_group


         ar_data => get_data_d(arnoldi_data)

         control => get_control(arnoldi_data)

         control%converged = .false.

         pcol_group = control%pcol_group


         ! create the vectors required during the iterations

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))

         v_vec = 0.0_real_8; w_vec = 0.0_real_8

         ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))

         ALLOCATE (zmat(nrow_local, control%max_iter)); ALLOCATE (czmat(nrow_local, control%max_iter))

         ALLOCATE (bzmat(nrow_local, control%max_iter))


         CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, ar_data%x_vec, nrow_local, control%local_comp)

         CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &

                                           0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)

         CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, bzmat(:, 1), nrow_local, control%local_comp)


         norm = 0.0_real_8

         norm = dot_product(ar_data%x_vec, bzmat(:, 1))

         CALL pcol_group%sum(norm)

         IF (control%local_comp) THEN

            zmat(:, 1) = ar_data%x_vec/sqrt(norm); bzmat(:, 1) = bzmat(:, 1)/sqrt(norm)

         END IF


         DO j = 1, control%max_iter

            control%current_step = j

            CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, zmat(:, j), nrow_local, control%local_comp)

            CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &

                                              0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)

            CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, czmat(:, j), nrow_local, control%local_comp)

            w_vec(:) = czmat(:, j)


            ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme

            CALL gram_schmidt_ortho_d (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j, &

                                                   bzmat, zmat, control%local_comp, control%pcol_group)


            ! A bit more expensive but simpliy always top up with a DGKS correction, otherwise numerics

            ! can becom a problem later on, there is probably a good check, but we don't perform it

            CALL dgks_ortho_d (h_vec, ar_data%f_vec, s_vec, nrow_local, j, bzmat, &

                                           zmat, control%local_comp, control%pcol_group)


            CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)

            CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &

                                              0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)

            CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, v_vec, nrow_local, control%local_comp)

            norm = 0.0_real_8

            norm = dot_product(ar_data%f_vec, v_vec)

            CALL pcol_group%sum(norm)


            IF (control%myproc == 0) control%converged = real(norm, real_8) .LT. epsilon(real(1.0, real_8))

            CALL control%mp_group%bcast(control%converged, 0)

            IF (control%converged) EXIT

            IF (j == control%max_iter - 1) EXIT


            IF (control%local_comp) THEN

               zmat(:, j + 1) = ar_data%f_vec/sqrt(norm); bzmat(:, j + 1) = v_vec(:)/sqrt(norm)

            END IF

         END DO


! getting a bit more complicated here as the final matrix is again a product which has to be computed with the

! distributed vectors, therefore a sum along the first proc_col is necessary. As we want that matrix everywhere,

! we set it to zero before and compute the distributed product only on the first col and then sum over the full grid

         ar_data%Hessenberg = 0.0_real_8

         IF (control%local_comp) THEN

            ar_data%Hessenberg(1:control%current_step, 1:control%current_step) = &

               matmul(transpose(czmat(:, 1:control%current_step)), zmat(:, 1:control%current_step))

         END IF

         CALL control%mp_group%sum(ar_data%Hessenberg)


         ar_data%local_history = zmat

         ! broadcast the Hessenberg matrix so we don't need to care later on


         DEALLOCATE (v_vec); DEALLOCATE (w_vec); DEALLOCATE (s_vec); DEALLOCATE (h_vec); DEALLOCATE (czmat);

         DEALLOCATE (zmat); DEALLOCATE (bzmat)


      END SUBROUTINE gev_build_subspace_d


! **************************************************************************************************

!> \brief Updates all data after an inner loop of the generalized ev arnoldi.

!>        Updates rho and C=A-rho*B accordingly.

!>        As an update scheme is used for he ev, the output ev has to be replaced

!>        with the updated one.

!>        Furthermore a convergence check is performed. The mv product could

!>        be skiiped by making clever use of the precomputed data, However,

!>        it is most likely not worth the effort.

!> \param matrix ...

!> \param matrix_arnoldi ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE gev_update_data_d (matrix, matrix_arnoldi, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix_arnoldi

         TYPE(m_x_v_vectors_type)                :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(arnoldi_data_d_type), POINTER            :: ar_data

         INTEGER                                  :: ncol_local, nrow_local, ind, i

         REAL(kind=real_8), ALLOCATABLE, DIMENSION(:)      :: v_vec

         REAL(real_8)                        :: rnorm

         REAL(kind=real_8)                         :: norm

         COMPLEX(real_8)                     :: val


         control => get_control(arnoldi_data)


         ar_data => get_data_d(arnoldi_data)


! compute the new shift, hack around the problem templating the conversion

         val = ar_data%evals(control%selected_ind(1))

         ar_data%rho_scale = ar_data%rho_scale + real(val,real_8)

! compute the new eigenvector / initial guess for the next arnoldi loop

         ar_data%x_vec = 0.0_real_8

         DO i = 1, control%current_step

            val = ar_data%revec(i, control%selected_ind(1))

            ar_data%x_vec(:) = ar_data%x_vec(:) + ar_data%local_history(:, i)*real(val,real_8)

         END DO

!      ar_data%x_vec(:)=MATMUL(ar_data%local_history(:,1:control%current_step),&

!                        ar_data%revec(1:control%current_step,control%selected_ind(1)))


! update the C-matrix (A-rho*B), if the maximum value is requested we have to use -A-rho*B

         CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)

         CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, 1.0_real_8, -ar_data%rho_scale)


! compute convergence and set the correct eigenvalue and eigenvector

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         IF (ncol_local > 0) THEN

            ALLOCATE (v_vec(nrow_local))

            CALL compute_norms_d (ar_data%x_vec, norm, rnorm, control%pcol_group)

            v_vec(:) = ar_data%x_vec(:)/rnorm

         END IF

         CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, v_vec, nrow_local, control%local_comp)

         CALL dbcsr_matrix_colvec_multiply(matrix_arnoldi(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &

                                           0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)

         CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, v_vec, nrow_local, control%local_comp)

         IF (ncol_local > 0) THEN

            CALL compute_norms_d (v_vec, norm, rnorm, control%pcol_group)

            ! check convergence

            control%converged = rnorm .LT. control%threshold

            DEALLOCATE (v_vec)

         END IF

         ! and broadcast the real eigenvalue

         CALL control%mp_group%bcast(control%converged, 0)

         ind = control%selected_ind(1)

         CALL control%mp_group%bcast(ar_data%rho_scale, 0)


! Again the maximum value request is done on -A therefore the eigenvalue needs the opposite sign

         ar_data%evals(ind) = ar_data%rho_scale


      END SUBROUTINE gev_update_data_d

! **************************************************************************************************

!> \brief Call the correct eigensolver, in the arnoldi method only the right

!>        eigenvectors are used. Lefts are created here but dumped immediately

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE compute_evals_z (arnoldi_data)

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         COMPLEX(real_8), DIMENSION(:, :), ALLOCATABLE   :: levec

         TYPE(arnoldi_data_z_type), POINTER            :: ar_data

         INTEGER                                  :: ndim

         TYPE(arnoldi_control_type), POINTER           :: control


         ar_data => get_data_z(arnoldi_data)

         control => get_control(arnoldi_data)

         ndim = control%current_step

         ALLOCATE (levec(ndim, ndim))


! Needs antoher interface as the calls to real and complex geev differ (sucks!)

! only perform the diagonalization on processors which hold data

         IF (control%generalized_ev) THEN

            CALL arnoldi_symm_local_diag('V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &

                                         ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim))

         ELSE

            IF (control%symmetric) THEN

               CALL arnoldi_tridiag_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &

                                               ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)

            ELSE

               CALL arnoldi_general_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &

                                               ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)

            END IF

         END IF


         DEALLOCATE (levec)


      END SUBROUTINE compute_evals_z


! **************************************************************************************************

!> \brief Initialization of the arnoldi vector. Here a random vector is used,

!>        might be generalized in the future

!> \param matrix ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE arnoldi_init_z (matrix, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(m_x_v_vectors_type)                :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         INTEGER                                  :: i, iseed(4), row_size, col_size, &

                                                     nrow_local, ncol_local, &

                                                     row, col

         REAL(real_8)                        :: rnorm

         TYPE(dbcsr_iterator_type)                     :: iter

         COMPLEX(kind=real_8)                         :: norm

         COMPLEX(kind=real_8), DIMENSION(:), ALLOCATABLE :: &

            v_vec, w_vec

         COMPLEX(kind=real_8), DIMENSION(:), POINTER          :: data_vec

         LOGICAL                                  :: local_comp

         TYPE(arnoldi_data_z_type), POINTER            :: ar_data

         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(mp_comm_type)                       :: pcol_group


         control => get_control(arnoldi_data)

         pcol_group = control%pcol_group

         local_comp = control%local_comp


         ar_data => get_data_z(arnoldi_data)


         ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         ALLOCATE (v_vec(nrow_local))

         ALLOCATE (w_vec(nrow_local))

         v_vec = cmplx(0.0, 0.0, real_8); w_vec = cmplx(0.0, 0.0, real_8)

         ar_data%Hessenberg = cmplx(0.0, 0.0, real_8)


         IF (control%has_initial_vector) THEN

            ! after calling the set routine the initial vector is stored in f_vec

            CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)

         ELSE

            ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)

            CALL dbcsr_iterator_start(iter, vectors%input_vec)

            DO WHILE (dbcsr_iterator_blocks_left(iter))

               CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)

               iseed(1) = 2; iseed(2) = mod(row, 4095); iseed(3) = mod(col, 4095); iseed(4) = 11

               CALL zlarnv(2, iseed, row_size*col_size, data_vec)

            END DO

            CALL dbcsr_iterator_stop(iter)

         END IF


         CALL transfer_dbcsr_to_local_array_z (vectors%input_vec, v_vec, nrow_local, control%local_comp)


         ! compute the vector norm of the random vectorm, get it real valued as well (rnorm)

         CALL compute_norms_z (v_vec, norm, rnorm, control%pcol_group)


         IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data

         CALL dbcsr_scale(vectors%input_vec, real(1.0, real_8)/rnorm)


         ! Everything prepared, initialize the Arnoldi iteration

         CALL transfer_dbcsr_to_local_array_z (vectors%input_vec, v_vec, nrow_local, control%local_comp)


         ! This permits to compute the subspace of a matrix which is a product of multiple matrices

         DO i = 1, SIZE(matrix)

            CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &

                                              cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)

            CALL dbcsr_copy(vectors%input_vec, vectors%result_vec)

         END DO


         CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, w_vec, nrow_local, control%local_comp)


         ! Put the projection into the Hessenberg matrix, and make the vectors orthonormal

         ar_data%Hessenberg(1, 1) = dot_product(v_vec, w_vec)

         CALL pcol_group%sum(ar_data%Hessenberg(1, 1))

         ar_data%f_vec = w_vec - v_vec*ar_data%Hessenberg(1, 1)


         ar_data%local_history(:, 1) = v_vec(:)


         ! We did the first step in here so we should set the current step for the subspace generation accordingly

         control%current_step = 1


         DEALLOCATE (v_vec, w_vec)


      END SUBROUTINE arnoldi_init_z


! **************************************************************************************************

!> \brief Alogorithm for the implicit restarts in the arnoldi method

!>        this is an early implementation which scales subspace size^4

!>        by replacing the lapack calls with direct math the

!>        QR and  gemms can be made linear and a N^2 sacling will be acchieved

!>        however this already sets the framework but should be used with care

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE arnoldi_iram_z (arnoldi_data)

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         TYPE(arnoldi_data_z_type), POINTER            :: ar_data

         TYPE(arnoldi_control_type), POINTER                     :: control

         COMPLEX(real_8), DIMENSION(:, :), ALLOCATABLE  :: tmp_mat, safe_mat, q, tmp_mat1

         COMPLEX(real_8), DIMENSION(:), ALLOCATABLE    :: work, tau, work_measure

         INTEGER                                   :: msize, lwork, i, j, info, nwant

         COMPLEX(kind=real_8)                          :: beta, sigma

         COMPLEX(kind=real_8), DIMENSION(:, :), ALLOCATABLE :: qdata


         ! This is just a terribly inefficient implementation but I hope it is correct and might serve as a reference

         ar_data => get_data_z(arnoldi_data)

         control => get_control(arnoldi_data)

         msize = control%current_step

         nwant = control%nval_out

         ALLOCATE (tmp_mat(msize, msize)); ALLOCATE (safe_mat(msize, msize))

         ALLOCATE (q(msize, msize)); ALLOCATE (tmp_mat1(msize, msize))

         ALLOCATE (work_measure(1))

         ALLOCATE (tau(msize)); ALLOCATE (qdata(msize, msize))

         !make Q identity

         q = cmplx(0.0, 0.0, real_8)

         DO i = 1, msize

            q(i, i) = cmplx(1.0, 0.0, real_8)

         END DO


         ! Looks a bit odd, but safe_mat will contain the result in the end, while tmpmat gets violated by lapack

         CALL convert_matrix(tmp_mat, ar_data%Hessenberg(1:msize, 1:msize))

         safe_mat(:, :) = tmp_mat(:, :)


         DO i = 1, msize

            ! A bit a strange check but in the end we only want to shift the unwanted evals

            IF (any(control%selected_ind == i)) cycle

            ! Here we shift the matrix by subtracting unwanted evals from the diagonal

            DO j = 1, msize

               tmp_mat(j, j) = tmp_mat(j, j) - ar_data%evals(i)

            END DO

            ! Now we repair the damage by QR factorizing

            lwork = -1

            CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work_measure, lwork, info)

            lwork = int(work_measure(1))

            IF (ALLOCATED(work)) THEN

               IF (SIZE(work) .LT. lwork) THEN

                  DEALLOCATE (work)

               END IF

            END IF

            IF (.NOT. ALLOCATED(work)) ALLOCATE (work(lwork))

            CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work, lwork, info)

            ! Ask Lapack to reconstruct Q from its own way of storing data (tmpmat will contain Q)

            CALL zungqr(msize, msize, msize, tmp_mat, msize, tau, work, lwork, info)

            ! update Q=Q*Q_current

            tmp_mat1(:, :) = q(:, :)

            CALL zgemm('N', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat1, msize, tmp_mat, msize, cmplx(0.0, 0.0,&

                & real_8), &

                               q, msize)

            ! Update H=(Q*)HQ

            CALL zgemm('C', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat, msize, safe_mat, msize, cmplx(0.0, 0.0,&

                & real_8), &

                               tmp_mat1, msize)

            CALL zgemm('N', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat1, msize, tmp_mat, msize, cmplx(0.0, 0.0,&

                & real_8), &

                               safe_mat, msize)


            ! this one is crucial for numerics not to accumulate noise in the subdiagonals

            DO j = 1, msize

               safe_mat(j + 2:msize, j) = cmplx(0.0, 0.0, real_8)

            END DO

            tmp_mat(:, :) = safe_mat(:, :)

         END DO


         ! Now we can compute our restart quantities

         ar_data%Hessenberg = cmplx(0.0, 0.0, real_8)

         CALL convert_matrix(ar_data%Hessenberg(1:msize, 1:msize), safe_mat)

         CALL convert_matrix(qdata, q)


         beta = ar_data%Hessenberg(nwant + 1, nwant); sigma = qdata(msize, nwant)


         !update the residuum and the basis vectors

         IF (control%local_comp) THEN

            ar_data%f_vec = matmul(ar_data%local_history(:, 1:msize), qdata(1:msize, nwant + 1))*beta + ar_data%f_vec(:)*sigma

            ar_data%local_history(:, 1:nwant) = matmul(ar_data%local_history(:, 1:msize), qdata(1:msize, 1:nwant))

         END IF

         ! Set the current step to nwant so the subspace build knows where to start

         control%current_step = nwant


         DEALLOCATE (tmp_mat, safe_mat, q, qdata, tmp_mat1, work, tau, work_measure)


      END SUBROUTINE arnoldi_iram_z


! **************************************************************************************************

!> \brief Here we create the Krylov subspace and fill the Hessenberg matrix

!>        convergence check is only performed on subspace convergence

!>        Gram Schidt is used to orthonogonalize.

!>        If this is numericall not sufficient a Daniel, Gragg, Kaufman and Steward

!>        correction is performed

!> \param matrix ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE build_subspace_z (matrix, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(m_x_v_vectors_type), TARGET        :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         INTEGER                                  :: i, j, ncol_local, nrow_local

         REAL(real_8)                        :: rnorm

         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(arnoldi_data_z_type), POINTER  :: ar_data

         COMPLEX(kind=real_8)                         :: norm

         COMPLEX(kind=real_8), ALLOCATABLE, DIMENSION(:)      :: h_vec, s_vec, v_vec, w_vec

         TYPE(dbcsr_type), POINTER                 :: input_vec, result_vec, swap_vec


         ar_data => get_data_z(arnoldi_data)

         control => get_control(arnoldi_data)

         control%converged = .false.


         ! create the vectors required during the iterations

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))

         v_vec = cmplx(0.0, 0.0, real_8); w_vec = cmplx(0.0, 0.0, real_8)

         ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))


         DO j = control%current_step, control%max_iter - 1


            ! compute the vector norm of the residuum, get it real valued as well (rnorm)

            CALL compute_norms_z (ar_data%f_vec, norm, rnorm, control%pcol_group)


            ! check convergence and inform everybody about it, a bit annoying to talk to everybody because of that

            IF (control%myproc == 0) control%converged = rnorm .LT. real(control%threshold, real_8)

            CALL control%mp_group%bcast(control%converged, 0)

            IF (control%converged) EXIT


            ! transfer normalized residdum to history and its norm to the Hessenberg matrix

            IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data

            v_vec(:) = ar_data%f_vec(:)/rnorm; ar_data%local_history(:, j + 1) = v_vec(:); ar_data%Hessenberg(j + 1, j) = norm


            input_vec => vectors%input_vec

            result_vec => vectors%result_vec

            CALL transfer_local_array_to_dbcsr_z (input_vec, v_vec, nrow_local, control%local_comp)


            ! This permits to compute the subspace of a matrix which is a product of two matrices

            DO i = 1, SIZE(matrix)

               CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, input_vec, result_vec, cmplx(1.0, 0.0, real_8), &

                                                 cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)

               swap_vec => input_vec

               input_vec => result_vec

               result_vec => swap_vec

            END DO


            CALL transfer_dbcsr_to_local_array_z (input_vec, w_vec, nrow_local, control%local_comp)


            ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme

            CALL gram_schmidt_ortho_z (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j + 1, &

                                               ar_data%local_history, ar_data%local_history, control%local_comp, control%pcol_group)


            ! A bit more expensive but simply always top up with a DGKS correction, otherwise numerics

            ! can become a problem later on, there is probably a good check whether it's necessary, but we don't perform it

            CALL dgks_ortho_z (h_vec, ar_data%f_vec, s_vec, nrow_local, j + 1, ar_data%local_history, &

                                           ar_data%local_history, control%local_comp, control%pcol_group)

            ! Finally we can put the projections into our Hessenberg matrix

            ar_data%Hessenberg(1:j + 1, j + 1) = h_vec(1:j + 1)

            control%current_step = j + 1

         END DO


         ! compute the vector norm of the final residuum and put it in to Hessenberg

         CALL compute_norms_z (ar_data%f_vec, norm, rnorm, control%pcol_group)

         ar_data%Hessenberg(control%current_step + 1, control%current_step) = norm


         ! broadcast the Hessenberg matrix so we don't need to care later on

         CALL control%mp_group%bcast(ar_data%Hessenberg, 0)


         DEALLOCATE (v_vec, w_vec, h_vec, s_vec)


      END SUBROUTINE build_subspace_z


! **************************************************************************************************

!> \brief Helper routine to transfer the all data of a dbcsr matrix to a local array

!> \param vec ...

!> \param array ...

!> \param n ...

!> \param is_local ...

! **************************************************************************************************

      SUBROUTINE transfer_dbcsr_to_local_array_z (vec, array, n, is_local)

         TYPE(dbcsr_type)                          :: vec

         COMPLEX(kind=real_8), DIMENSION(:)           :: array

         INTEGER                                  :: n

         LOGICAL                                  :: is_local

         COMPLEX(kind=real_8), DIMENSION(:), POINTER          :: data_vec


         data_vec => dbcsr_get_data_p(vec, select_data_type=cmplx(0.0, 0.0, real_8))

         IF (is_local) array(1:n) = data_vec(1:n)


      END SUBROUTINE transfer_dbcsr_to_local_array_z


! **************************************************************************************************

!> \brief The inverse routine transferring data back from an array to a dbcsr

!> \param vec ...

!> \param array ...

!> \param n ...

!> \param is_local ...

! **************************************************************************************************

      SUBROUTINE transfer_local_array_to_dbcsr_z (vec, array, n, is_local)

         TYPE(dbcsr_type)                          :: vec

         COMPLEX(kind=real_8), DIMENSION(:)           :: array

         INTEGER                                  :: n

         LOGICAL                                  :: is_local

         COMPLEX(kind=real_8), DIMENSION(:), POINTER          :: data_vec


         data_vec => dbcsr_get_data_p(vec, select_data_type=cmplx(0.0, 0.0, real_8))

         IF (is_local) data_vec(1:n) = array(1:n)


      END SUBROUTINE transfer_local_array_to_dbcsr_z


! **************************************************************************************************

!> \brief Gram-Schmidt in matrix vector form

!> \param h_vec ...

!> \param f_vec ...

!> \param s_vec ...

!> \param w_vec ...

!> \param nrow_local ...

!> \param j ...

!> \param local_history ...

!> \param reorth_mat ...

!> \param local_comp ...

!> \param pcol_group ...

! **************************************************************************************************

      SUBROUTINE gram_schmidt_ortho_z (h_vec, f_vec, s_vec, w_vec, nrow_local, &

                                                   j, local_history, reorth_mat, local_comp, pcol_group)

         COMPLEX(kind=real_8), DIMENSION(:)      :: h_vec, s_vec, f_vec, w_vec

         COMPLEX(kind=real_8), DIMENSION(:, :)    :: local_history, reorth_mat

         INTEGER                                          :: nrow_local, j

         TYPE(mp_comm_type), INTENT(IN)                   :: pcol_group

         LOGICAL                                          :: local_comp


         CHARACTER(LEN=*), PARAMETER :: routinen = 'Gram_Schmidt_ortho_z'

         INTEGER                                  :: handle


         CALL timeset(routinen, handle)


         ! Let's do the orthonormalization, first try the Gram Schmidt scheme

         h_vec = cmplx(0.0, 0.0, real_8); f_vec = cmplx(0.0, 0.0, real_8); s_vec = cmplx(0.0, 0.0, real_8)

         IF (local_comp) CALL zgemv('T', nrow_local, j, cmplx(1.0, 0.0, real_8), local_history, &

                                                nrow_local, w_vec, 1, cmplx(0.0, 0.0, real_8), h_vec, 1)

         CALL pcol_group%sum(h_vec(1:j))


         IF (local_comp) CALL zgemv('N', nrow_local, j, cmplx(1.0, 0.0, real_8), reorth_mat, &

                                                nrow_local, h_vec, 1, cmplx(0.0, 0.0, real_8), f_vec, 1)

         f_vec(:) = w_vec(:) - f_vec(:)


         CALL timestop(handle)


      END SUBROUTINE gram_schmidt_ortho_z


! **************************************************************************************************

!> \brief Compute the  Daniel, Gragg, Kaufman and Steward correction

!> \param h_vec ...

!> \param f_vec ...

!> \param s_vec ...

!> \param nrow_local ...

!> \param j ...

!> \param local_history ...

!> \param reorth_mat ...

!> \param local_comp ...

!> \param pcol_group ...

! **************************************************************************************************

      SUBROUTINE dgks_ortho_z (h_vec, f_vec, s_vec, nrow_local, j, &

                                           local_history, reorth_mat, local_comp, pcol_group)

         COMPLEX(kind=real_8), DIMENSION(:)      :: h_vec, s_vec, f_vec

         COMPLEX(kind=real_8), DIMENSION(:, :)    :: local_history, reorth_mat

         INTEGER                                          :: nrow_local, j

         TYPE(mp_comm_type), INTENT(IN)           :: pcol_group


         CHARACTER(LEN=*), PARAMETER :: routinen = 'DGKS_ortho_z'

         LOGICAL                                          :: local_comp

         INTEGER                                  :: handle


         CALL timeset(routinen, handle)


         IF (local_comp) CALL zgemv('T', nrow_local, j, cmplx(1.0, 0.0, real_8), local_history, &

                                                nrow_local, f_vec, 1, cmplx(0.0, 0.0, real_8), s_vec, 1)

         CALL pcol_group%sum(s_vec(1:j))

         IF (local_comp) CALL zgemv('N', nrow_local, j, cmplx(-1.0, 0.0, real_8), reorth_mat, &

                                                nrow_local, s_vec, 1, cmplx(1.0, 0.0, real_8), f_vec, 1)

         h_vec(1:j) = h_vec(1:j) + s_vec(1:j)


         CALL timestop(handle)


      END SUBROUTINE dgks_ortho_z


! **************************************************************************************************

!> \brief Compute the norm of a vector distributed along proc_col

!>        as local arrays. Always return the real part next to the complex rep.

!> \param vec ...

!> \param norm ...

!> \param rnorm ...

!> \param pcol_group ...

! **************************************************************************************************

      SUBROUTINE compute_norms_z (vec, norm, rnorm, pcol_group)

         COMPLEX(kind=real_8), DIMENSION(:)           :: vec

         REAL(real_8)                        :: rnorm

         COMPLEX(kind=real_8)                         :: norm

         TYPE(mp_comm_type), INTENT(IN)             :: pcol_group


         ! the norm is computed along the processor column

         norm = dot_product(vec, vec)

         CALL pcol_group%sum(norm)

         rnorm = sqrt(real(norm, real_8))

         norm=cmplx(rnorm, 0.0, real_8)


      END SUBROUTINE compute_norms_z


! **************************************************************************************************

!> \brief Computes the initial guess for the solution of the generalized eigenvalue

!>        using the arnoldi method

!> \param matrix ...

!> \param matrix_arnoldi ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************


      SUBROUTINE gev_arnoldi_init_z (matrix, matrix_arnoldi, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix_arnoldi

         TYPE(m_x_v_vectors_type)                :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         INTEGER                                  :: iseed(4), row_size, col_size, &

                                                     nrow_local, ncol_local, &

                                                     row, col

         REAL(real_8)                        :: rnorm

         TYPE(dbcsr_iterator_type)                     :: iter

         COMPLEX(kind=real_8)                         :: norm, denom

         COMPLEX(kind=real_8), DIMENSION(:), ALLOCATABLE :: &

            v_vec, w_vec

         COMPLEX(kind=real_8), DIMENSION(:), POINTER          :: data_vec

         LOGICAL                                  :: local_comp

         TYPE(arnoldi_data_z_type), POINTER            :: ar_data

         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(mp_comm_type)                         :: pcol_group


         control => get_control(arnoldi_data)

         pcol_group = control%pcol_group

         local_comp = control%local_comp


         ar_data => get_data_z(arnoldi_data)


         ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         ALLOCATE (v_vec(nrow_local))

         ALLOCATE (w_vec(nrow_local))

         v_vec = cmplx(0.0, 0.0, real_8); w_vec = cmplx(0.0, 0.0, real_8)

         ar_data%Hessenberg = cmplx(0.0, 0.0, real_8)


         IF (control%has_initial_vector) THEN

            ! after calling the set routine the initial vector is stored in f_vec

            CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)

         ELSE

            ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)

            CALL dbcsr_iterator_start(iter, vectors%input_vec)

            DO WHILE (dbcsr_iterator_blocks_left(iter))

               CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)

               iseed(1) = 2; iseed(2) = mod(row, 4095); iseed(3) = mod(col, 4095); iseed(4) = 11

               CALL zlarnv(2, iseed, row_size*col_size, data_vec)

            END DO

            CALL dbcsr_iterator_stop(iter)

         END IF


         CALL transfer_dbcsr_to_local_array_z (vectors%input_vec, v_vec, nrow_local, control%local_comp)


         ! compute the vector norm of the reandom vectorm, get it real valued as well (rnorm)

         CALL compute_norms_z (v_vec, norm, rnorm, control%pcol_group)


         IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data

         CALL dbcsr_scale(vectors%input_vec, real(1.0, real_8)/rnorm)


         CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &

                                           cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)


         CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, w_vec, nrow_local, control%local_comp)


         ar_data%rho_scale = cmplx(0.0, 0.0, real_8)

         ar_data%rho_scale = dot_product(v_vec, w_vec)

         CALL pcol_group%sum(ar_data%rho_scale)


         CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &

                                           cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)


         CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, w_vec, nrow_local, control%local_comp)


         denom = cmplx(0.0, 0.0, real_8)

         denom = dot_product(v_vec, w_vec)

         CALL pcol_group%sum(denom)

         IF (control%myproc == 0) ar_data%rho_scale = ar_data%rho_scale/denom

         CALL control%mp_group%bcast(ar_data%rho_scale, 0)


         ! if the maximum ev is requested we need to optimize with -A-rho*B

         CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)

         CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, cmplx(1.0, 0.0, real_8), -ar_data%rho_scale)


         ar_data%x_vec = v_vec


      END SUBROUTINE gev_arnoldi_init_z


! **************************************************************************************************

!> \brief builds the basis rothogonal wrt. the metric.

!>        The structure looks similar to normal arnoldi but norms, vectors and

!>        matrix_vector products are very differently defined. Therefore it is

!>        cleaner to put it in a separate subroutine to avoid confusion

!> \param matrix ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE gev_build_subspace_z (matrix, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(m_x_v_vectors_type)                :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         INTEGER                                  :: j, ncol_local, nrow_local

         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(arnoldi_data_z_type), POINTER            :: ar_data

         COMPLEX(kind=real_8)                         :: norm

         COMPLEX(kind=real_8), ALLOCATABLE, DIMENSION(:)      :: h_vec, s_vec, v_vec, w_vec

         COMPLEX(kind=real_8), ALLOCATABLE, DIMENSION(:, :)    :: zmat, czmat, bzmat

         TYPE(mp_comm_type)                           :: pcol_group


         ar_data => get_data_z(arnoldi_data)

         control => get_control(arnoldi_data)

         control%converged = .false.

         pcol_group = control%pcol_group


         ! create the vectors required during the iterations

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))

         v_vec = cmplx(0.0, 0.0, real_8); w_vec = cmplx(0.0, 0.0, real_8)

         ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))

         ALLOCATE (zmat(nrow_local, control%max_iter)); ALLOCATE (czmat(nrow_local, control%max_iter))

         ALLOCATE (bzmat(nrow_local, control%max_iter))


         CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, ar_data%x_vec, nrow_local, control%local_comp)

         CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &

                                           cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)

         CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, bzmat(:, 1), nrow_local, control%local_comp)


         norm = cmplx(0.0, 0.0, real_8)

         norm = dot_product(ar_data%x_vec, bzmat(:, 1))

         CALL pcol_group%sum(norm)

         IF (control%local_comp) THEN

            zmat(:, 1) = ar_data%x_vec/sqrt(norm); bzmat(:, 1) = bzmat(:, 1)/sqrt(norm)

         END IF


         DO j = 1, control%max_iter

            control%current_step = j

            CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, zmat(:, j), nrow_local, control%local_comp)

            CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &

                                              cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)

            CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, czmat(:, j), nrow_local, control%local_comp)

            w_vec(:) = czmat(:, j)


            ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme

            CALL gram_schmidt_ortho_z (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j, &

                                                   bzmat, zmat, control%local_comp, control%pcol_group)


            ! A bit more expensive but simpliy always top up with a DGKS correction, otherwise numerics

            ! can becom a problem later on, there is probably a good check, but we don't perform it

            CALL dgks_ortho_z (h_vec, ar_data%f_vec, s_vec, nrow_local, j, bzmat, &

                                           zmat, control%local_comp, control%pcol_group)


            CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)

            CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &

                                              cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)

            CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, v_vec, nrow_local, control%local_comp)

            norm = cmplx(0.0, 0.0, real_8)

            norm = dot_product(ar_data%f_vec, v_vec)

            CALL pcol_group%sum(norm)


            IF (control%myproc == 0) control%converged = real(norm, real_8) .LT. epsilon(real(1.0, real_8))

            CALL control%mp_group%bcast(control%converged, 0)

            IF (control%converged) EXIT

            IF (j == control%max_iter - 1) EXIT


            IF (control%local_comp) THEN

               zmat(:, j + 1) = ar_data%f_vec/sqrt(norm); bzmat(:, j + 1) = v_vec(:)/sqrt(norm)

            END IF

         END DO


! getting a bit more complicated here as the final matrix is again a product which has to be computed with the

! distributed vectors, therefore a sum along the first proc_col is necessary. As we want that matrix everywhere,

! we set it to zero before and compute the distributed product only on the first col and then sum over the full grid

         ar_data%Hessenberg = cmplx(0.0, 0.0, real_8)

         IF (control%local_comp) THEN

            ar_data%Hessenberg(1:control%current_step, 1:control%current_step) = &

               matmul(transpose(czmat(:, 1:control%current_step)), zmat(:, 1:control%current_step))

         END IF

         CALL control%mp_group%sum(ar_data%Hessenberg)


         ar_data%local_history = zmat

         ! broadcast the Hessenberg matrix so we don't need to care later on


         DEALLOCATE (v_vec); DEALLOCATE (w_vec); DEALLOCATE (s_vec); DEALLOCATE (h_vec); DEALLOCATE (czmat);

         DEALLOCATE (zmat); DEALLOCATE (bzmat)


      END SUBROUTINE gev_build_subspace_z


! **************************************************************************************************

!> \brief Updates all data after an inner loop of the generalized ev arnoldi.

!>        Updates rho and C=A-rho*B accordingly.

!>        As an update scheme is used for he ev, the output ev has to be replaced

!>        with the updated one.

!>        Furthermore a convergence check is performed. The mv product could

!>        be skiiped by making clever use of the precomputed data, However,

!>        it is most likely not worth the effort.

!> \param matrix ...

!> \param matrix_arnoldi ...

!> \param vectors ...

!> \param arnoldi_data ...

! **************************************************************************************************

      SUBROUTINE gev_update_data_z (matrix, matrix_arnoldi, vectors, arnoldi_data)

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix

         TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix_arnoldi

         TYPE(m_x_v_vectors_type)                :: vectors

         TYPE(arnoldi_data_type)                 :: arnoldi_data


         TYPE(arnoldi_control_type), POINTER           :: control

         TYPE(arnoldi_data_z_type), POINTER            :: ar_data

         INTEGER                                  :: ncol_local, nrow_local, ind, i

         COMPLEX(kind=real_8), ALLOCATABLE, DIMENSION(:)      :: v_vec

         REAL(real_8)                        :: rnorm

         COMPLEX(kind=real_8)                         :: norm

         COMPLEX(real_8)                     :: val


         control => get_control(arnoldi_data)


         ar_data => get_data_z(arnoldi_data)


! compute the new shift, hack around the problem templating the conversion

         val = ar_data%evals(control%selected_ind(1))

         ar_data%rho_scale = ar_data%rho_scale + val

! compute the new eigenvector / initial guess for the next arnoldi loop

         ar_data%x_vec = cmplx(0.0, 0.0, real_8)

         DO i = 1, control%current_step

            val = ar_data%revec(i, control%selected_ind(1))

            ar_data%x_vec(:) = ar_data%x_vec(:) + ar_data%local_history(:, i)*val

         END DO

!      ar_data%x_vec(:)=MATMUL(ar_data%local_history(:,1:control%current_step),&

!                        ar_data%revec(1:control%current_step,control%selected_ind(1)))


! update the C-matrix (A-rho*B), if the maximum value is requested we have to use -A-rho*B

         CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)

         CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, cmplx(1.0, 0.0, real_8), -ar_data%rho_scale)


! compute convergence and set the correct eigenvalue and eigenvector

         CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)

         IF (ncol_local > 0) THEN

            ALLOCATE (v_vec(nrow_local))

            CALL compute_norms_z (ar_data%x_vec, norm, rnorm, control%pcol_group)

            v_vec(:) = ar_data%x_vec(:)/rnorm

         END IF

         CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, v_vec, nrow_local, control%local_comp)

         CALL dbcsr_matrix_colvec_multiply(matrix_arnoldi(1)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0,&

             & real_8), &

                                           cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)

         CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, v_vec, nrow_local, control%local_comp)

         IF (ncol_local > 0) THEN

            CALL compute_norms_z (v_vec, norm, rnorm, control%pcol_group)

            ! check convergence

            control%converged = rnorm .LT. control%threshold

            DEALLOCATE (v_vec)

         END IF

         ! and broadcast the real eigenvalue

         CALL control%mp_group%bcast(control%converged, 0)

         ind = control%selected_ind(1)

         CALL control%mp_group%bcast(ar_data%rho_scale, 0)


! Again the maximum value request is done on -A therefore the eigenvalue needs the opposite sign

         ar_data%evals(ind) = ar_data%rho_scale


      END SUBROUTINE gev_update_data_z


END MODULE arnoldi_methods

arnoldi_geev::arnoldi_general_local_diag
Definition arnoldi_geev.F:28

arnoldi_geev::arnoldi_symm_local_diag
Definition arnoldi_geev.F:38

arnoldi_geev::arnoldi_tridiag_local_diag
Definition arnoldi_geev.F:33

dbcsr_vector::dbcsr_matrix_colvec_multiply
Definition dbcsr_vector.F:92

arnoldi_geev
provides a unified interface to lapack geev routines
Definition arnoldi_geev.F:15

arnoldi_methods
methods for arnoldi iteration
Definition arnoldi_methods.F:15

arnoldi_methods::arnoldi_init
subroutine, public arnoldi_init(matrix, vectors, arnoldi_data)
Interface for the initialization of the arnoldi subspace creation currently it can only setup a rando...
Definition arnoldi_methods.F:100

arnoldi_methods::gev_update_data
subroutine, public gev_update_data(matrix, matrix_arnoldi, vectors, arnoldi_data)
in the generalized eigenvalue the matrix depende on the projection therefore the outer loop has to bu...
Definition arnoldi_methods.F:202

arnoldi_methods::gev_build_subspace
subroutine, public gev_build_subspace(matrix, vectors, arnoldi_data)
here the iterations are performed and the krylov space for the generalized eigenvalue problem is crea...
Definition arnoldi_methods.F:175

arnoldi_methods::compute_evals
subroutine, public compute_evals(arnoldi_data)
Interface to compute the eigenvalues of a nonsymmetric matrix This is only the serial version.
Definition arnoldi_methods.F:76

arnoldi_methods::gev_arnoldi_init
subroutine, public gev_arnoldi_init(matrix, matrix_arnoldi, vectors, arnoldi_data)
Interface for the initialization of the arnoldi subspace creation for the generalized eigenvalue prob...
Definition arnoldi_methods.F:126

arnoldi_methods::build_subspace
subroutine, public build_subspace(matrix, vectors, arnoldi_data)
here the iterations are performed and the krylov space is constructed
Definition arnoldi_methods.F:150

arnoldi_methods::arnoldi_iram
subroutine, public arnoldi_iram(arnoldi_data)
Interface for the routine calcualting the implicit restarts Currently all based on lapack.
Definition arnoldi_methods.F:55

arnoldi_types
collection of types used in arnoldi
Definition arnoldi_types.F:15

arnoldi_types::get_data_c
type(arnoldi_data_c_type) function, pointer, public get_data_c(ar_data)
...
Definition arnoldi_types.F:180

arnoldi_types::get_data_s
type(arnoldi_data_s_type) function, pointer, public get_data_s(ar_data)
...
Definition arnoldi_types.F:154

arnoldi_types::get_data_z
type(arnoldi_data_z_type) function, pointer, public get_data_z(ar_data)
...
Definition arnoldi_types.F:167

arnoldi_types::has_d_real
logical function, public has_d_real(ar_data)
...
Definition arnoldi_types.F:258

arnoldi_types::get_data_d
type(arnoldi_data_d_type) function, pointer, public get_data_d(ar_data)
...
Definition arnoldi_types.F:141

arnoldi_types::get_control
type(arnoldi_control_type) function, pointer, public get_control(ar_data)
...
Definition arnoldi_types.F:245

arnoldi_types::has_d_cmplx
elemental logical function, public has_d_cmplx(ar_data)
...
Definition arnoldi_types.F:284

dbcsr_vector
operations for skinny matrices/vectors expressed in dbcsr form
Definition dbcsr_vector.F:15

kinds
Defines the basic variable types.
Definition kinds.F:23

kinds::real_8
integer, parameter, public real_8
Definition kinds.F:41

message_passing
Interface to the message passing library MPI.
Definition message_passing.F:23

arnoldi_types::arnoldi_control_type
Definition arnoldi_types.F:26

arnoldi_types::arnoldi_data_c_type
Definition arnoldi_types.F:74

arnoldi_types::arnoldi_data_d_type
Definition arnoldi_types.F:44

arnoldi_types::arnoldi_data_s_type
Definition arnoldi_types.F:54

arnoldi_types::arnoldi_data_type
Definition arnoldi_types.F:84

arnoldi_types::arnoldi_data_z_type
Definition arnoldi_types.F:64

arnoldi_types::m_x_v_vectors_type
Definition arnoldi_types.F:92

message_passing::mp_comm_type
Definition message_passing.F:189