(git:b195825)
arnoldi_methods.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief methods for arnoldi iteration
10 !> \par History
11 !> 2014.09 created [Florian Schiffmann]
12 !> \author Florian Schiffmann
13 ! **************************************************************************************************
14 
16  USE arnoldi_geev, ONLY: arnoldi_general_local_diag, &
17  arnoldi_symm_local_diag, &
18  arnoldi_tridiag_local_diag
19  USE arnoldi_types, ONLY: &
23  USE dbcsr_api, ONLY: &
24  dbcsr_add, dbcsr_copy, dbcsr_get_data_p, dbcsr_get_info, dbcsr_iterator_blocks_left, &
25  dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
26  dbcsr_p_type, dbcsr_scale, dbcsr_type
27  USE dbcsr_vector, ONLY: dbcsr_matrix_colvec_multiply
28  USE kinds, ONLY: real_8
29  USE message_passing, ONLY: mp_comm_type
30 #include "../base/base_uses.f90"
31 
32  IMPLICIT NONE
33 
34  PRIVATE
35 
36  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'arnoldi_methods'
37 
40 
41  INTERFACE convert_matrix
42  MODULE PROCEDURE convert_matrix_z_to_d
43  MODULE PROCEDURE convert_matrix_d_to_z
44  MODULE PROCEDURE convert_matrix_z_to_z
45  END INTERFACE
46 
47 CONTAINS
48 
49 ! **************************************************************************************************
50 !> \brief Interface for the routine calcualting the implicit restarts
51 !> Currently all based on lapack
52 !> \param arnoldi_data ...
53 ! **************************************************************************************************
54  SUBROUTINE arnoldi_iram(arnoldi_data)
55  TYPE(arnoldi_data_type) :: arnoldi_data
56 
57  CHARACTER(LEN=*), PARAMETER :: routinen = 'arnoldi_iram'
58 
59  INTEGER :: handle
60 
61  CALL timeset(routinen, handle)
62 
63  IF (has_d_real(arnoldi_data)) CALL arnoldi_iram_d(arnoldi_data)
64  IF (has_d_cmplx(arnoldi_data)) CALL arnoldi_iram_z(arnoldi_data)
65 
66  CALL timestop(handle)
67 
68  END SUBROUTINE arnoldi_iram
69 
70 ! **************************************************************************************************
71 !> \brief Interface to compute the eigenvalues of a nonsymmetric matrix
72 !> This is only the serial version
73 !> \param arnoldi_data ...
74 ! **************************************************************************************************
75  SUBROUTINE compute_evals(arnoldi_data)
76  TYPE(arnoldi_data_type) :: arnoldi_data
77 
78  CHARACTER(LEN=*), PARAMETER :: routinen = 'compute_evals'
79 
80  INTEGER :: handle
81 
82  CALL timeset(routinen, handle)
83 
84  IF (has_d_real(arnoldi_data)) CALL compute_evals_d(arnoldi_data)
85  IF (has_d_cmplx(arnoldi_data)) CALL compute_evals_z(arnoldi_data)
86 
87  CALL timestop(handle)
88 
89  END SUBROUTINE compute_evals
90 
91 ! **************************************************************************************************
92 !> \brief Interface for the initialization of the arnoldi subspace creation
93 !> currently it can only setup a random vector but can be improved to
94 !> various types of restarts easily
95 !> \param matrix pointer to the matrices as described in main interface
96 !> \param vectors work vectors for the matrix vector multiplications
97 !> \param arnoldi_data all data concerning the subspace
98 ! **************************************************************************************************
99  SUBROUTINE arnoldi_init(matrix, vectors, arnoldi_data)
100  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
101  TYPE(m_x_v_vectors_type) :: vectors
102  TYPE(arnoldi_data_type) :: arnoldi_data
103 
104  CHARACTER(LEN=*), PARAMETER :: routinen = 'arnoldi_init'
105 
106  INTEGER :: handle
107 
108  CALL timeset(routinen, handle)
109 
110  IF (has_d_real(arnoldi_data)) CALL arnoldi_init_d(matrix, vectors, arnoldi_data)
111  IF (has_d_cmplx(arnoldi_data)) CALL arnoldi_init_z(matrix, vectors, arnoldi_data)
112 
113  CALL timestop(handle)
114 
115  END SUBROUTINE arnoldi_init
116 
117 ! **************************************************************************************************
118 !> \brief Interface for the initialization of the arnoldi subspace creation
119 !> for the generalized eigenvalue problem
120 !> \param matrix pointer to the matrices as described in main interface
121 !> \param matrix_arnoldi ...
122 !> \param vectors work vectors for the matrix vector multiplications
123 !> \param arnoldi_data all data concerning the subspace
124 ! **************************************************************************************************
125  SUBROUTINE gev_arnoldi_init(matrix, matrix_arnoldi, vectors, arnoldi_data)
126  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix, matrix_arnoldi
127  TYPE(m_x_v_vectors_type) :: vectors
128  TYPE(arnoldi_data_type) :: arnoldi_data
129 
130  CHARACTER(LEN=*), PARAMETER :: routinen = 'gev_arnoldi_init'
131 
132  INTEGER :: handle
133 
134  CALL timeset(routinen, handle)
135 
136  IF (has_d_real(arnoldi_data)) CALL gev_arnoldi_init_d(matrix, matrix_arnoldi, vectors, arnoldi_data)
137  IF (has_d_cmplx(arnoldi_data)) CALL gev_arnoldi_init_z(matrix, matrix_arnoldi, vectors, arnoldi_data)
138 
139  CALL timestop(handle)
140 
141  END SUBROUTINE gev_arnoldi_init
142 
143 ! **************************************************************************************************
144 !> \brief here the iterations are performed and the krylov space is constructed
145 !> \param matrix see above
146 !> \param vectors see above
147 !> \param arnoldi_data see above
148 ! **************************************************************************************************
149  SUBROUTINE build_subspace(matrix, vectors, arnoldi_data)
150  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
151  TYPE(m_x_v_vectors_type) :: vectors
152  TYPE(arnoldi_data_type) :: arnoldi_data
153 
154  CHARACTER(LEN=*), PARAMETER :: routinen = 'build_subspace'
155 
156  INTEGER :: handle
157 
158  CALL timeset(routinen, handle)
159 
160  IF (has_d_real(arnoldi_data)) CALL build_subspace_d(matrix, vectors, arnoldi_data)
161  IF (has_d_cmplx(arnoldi_data)) CALL build_subspace_z(matrix, vectors, arnoldi_data)
162 
163  CALL timestop(handle)
164 
165  END SUBROUTINE build_subspace
166 
167 ! **************************************************************************************************
168 !> \brief here the iterations are performed and the krylov space for the generalized
169 !> eigenvalue problem is created
170 !> \param matrix see above
171 !> \param vectors see above
172 !> \param arnoldi_data see above
173 ! **************************************************************************************************
174  SUBROUTINE gev_build_subspace(matrix, vectors, arnoldi_data)
175  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
176  TYPE(m_x_v_vectors_type) :: vectors
177  TYPE(arnoldi_data_type) :: arnoldi_data
178 
179  CHARACTER(LEN=*), PARAMETER :: routinen = 'gev_build_subspace'
180 
181  INTEGER :: handle
182 
183  CALL timeset(routinen, handle)
184 
185  IF (has_d_real(arnoldi_data)) CALL gev_build_subspace_d(matrix, vectors, arnoldi_data)
186  IF (has_d_cmplx(arnoldi_data)) CALL gev_build_subspace_z(matrix, vectors, arnoldi_data)
187 
188  CALL timestop(handle)
189 
190  END SUBROUTINE gev_build_subspace
191 
192 ! **************************************************************************************************
193 !> \brief in the generalized eigenvalue the matrix depende on the projection
194 !> therefore the outer loop has to build a new set of matrices for the
195 !> inner loop
196 !> \param matrix see above
197 !> \param matrix_arnoldi ...
198 !> \param vectors ...
199 !> \param arnoldi_data see above
200 ! **************************************************************************************************
201  SUBROUTINE gev_update_data(matrix, matrix_arnoldi, vectors, arnoldi_data)
202  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix, matrix_arnoldi
203  TYPE(m_x_v_vectors_type) :: vectors
204  TYPE(arnoldi_data_type) :: arnoldi_data
205 
206  CHARACTER(LEN=*), PARAMETER :: routinen = 'gev_update_data'
207 
208  INTEGER :: handle
209 
210  CALL timeset(routinen, handle)
211 
212  IF (has_d_real(arnoldi_data)) CALL gev_update_data_d(matrix, matrix_arnoldi, vectors, arnoldi_data)
213  IF (has_d_cmplx(arnoldi_data)) CALL gev_update_data_z(matrix, matrix_arnoldi, vectors, arnoldi_data)
214 
215  CALL timestop(handle)
216 
217  END SUBROUTINE gev_update_data
218 
219 ! **************************************************************************************************
220 !> \brief ...
221 !> \param m_out ...
222 !> \param m_in ...
223 ! **************************************************************************************************
224  SUBROUTINE convert_matrix_z_to_d(m_out, m_in)
225  REAL(real_8), DIMENSION(:, :) :: m_out
226  COMPLEX(real_8), DIMENSION(:, :) :: m_in
227 
228  m_out(:, :) = real(m_in(:, :), kind=real_8)
229  END SUBROUTINE convert_matrix_z_to_d
230 
231 ! **************************************************************************************************
232 !> \brief ...
233 !> \param m_out ...
234 !> \param m_in ...
235 ! **************************************************************************************************
236  SUBROUTINE convert_matrix_d_to_z(m_out, m_in)
237  COMPLEX(real_8), DIMENSION(:, :) :: m_out
238  REAL(real_8), DIMENSION(:, :) :: m_in
239 
240  m_out(:, :) = cmplx(m_in, 0.0, kind=real_8)
241  END SUBROUTINE convert_matrix_d_to_z
242 
243 ! I kno that one is stupid but like this it simplifies the template
244 ! **************************************************************************************************
245 !> \brief ...
246 !> \param m_out ...
247 !> \param m_in ...
248 ! **************************************************************************************************
249  SUBROUTINE convert_matrix_z_to_z(m_out, m_in)
250  COMPLEX(real_8), DIMENSION(:, :) :: m_out, m_in
251 
252  m_out(:, :) = m_in
253  END SUBROUTINE convert_matrix_z_to_z
254 
255 ! **************************************************************************************************
256 !> \brief Call the correct eigensolver, in the arnoldi method only the right
257 !> eigenvectors are used. Lefts are created here but dumped immediately
258 !> \param arnoldi_data ...
259 ! **************************************************************************************************
260  SUBROUTINE compute_evals_d (arnoldi_data)
261  TYPE(arnoldi_data_type) :: arnoldi_data
262 
263  COMPLEX(real_8), DIMENSION(:, :), ALLOCATABLE :: levec
264  TYPE(arnoldi_data_d_type), POINTER :: ar_data
265  INTEGER :: ndim
266  TYPE(arnoldi_control_type), POINTER :: control
267 
268  ar_data => get_data_d(arnoldi_data)
269  control => get_control(arnoldi_data)
270  ndim = control%current_step
271  ALLOCATE (levec(ndim, ndim))
272 
273 ! Needs antoher interface as the calls to real and complex geev differ (sucks!)
274 ! only perform the diagonalization on processors which hold data
275  IF (control%generalized_ev) THEN
276  CALL arnoldi_symm_local_diag('V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
277  ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim))
278  ELSE
279  IF (control%symmetric) THEN
280  CALL arnoldi_tridiag_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
281  ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
282  ELSE
283  CALL arnoldi_general_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
284  ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
285  END IF
286  END IF
287 
288  DEALLOCATE (levec)
289 
290  END SUBROUTINE compute_evals_d
291 
292 ! **************************************************************************************************
293 !> \brief Initialization of the arnoldi vector. Here a random vector is used,
294 !> might be generalized in the future
295 !> \param matrix ...
296 !> \param vectors ...
297 !> \param arnoldi_data ...
298 ! **************************************************************************************************
299  SUBROUTINE arnoldi_init_d (matrix, vectors, arnoldi_data)
300  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
301  TYPE(m_x_v_vectors_type) :: vectors
302  TYPE(arnoldi_data_type) :: arnoldi_data
303 
304  INTEGER :: i, iseed(4), row_size, col_size, &
305  nrow_local, ncol_local, &
306  row, col
307  REAL(real_8) :: rnorm
308  TYPE(dbcsr_iterator_type) :: iter
309  REAL(kind=real_8) :: norm
310  REAL(kind=real_8), DIMENSION(:), ALLOCATABLE :: &
311  v_vec, w_vec
312  REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
313  LOGICAL :: local_comp
314  TYPE(arnoldi_data_d_type), POINTER :: ar_data
315  TYPE(arnoldi_control_type), POINTER :: control
316  TYPE(mp_comm_type) :: pcol_group
317 
318  control => get_control(arnoldi_data)
319  pcol_group = control%pcol_group
320  local_comp = control%local_comp
321 
322  ar_data => get_data_d(arnoldi_data)
323 
324  ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
325  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
326  ALLOCATE (v_vec(nrow_local))
327  ALLOCATE (w_vec(nrow_local))
328  v_vec = 0.0_real_8; w_vec = 0.0_real_8
329  ar_data%Hessenberg = 0.0_real_8
330 
331  IF (control%has_initial_vector) THEN
332  ! after calling the set routine the initial vector is stored in f_vec
333  CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
334  ELSE
335  ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
336  CALL dbcsr_iterator_start(iter, vectors%input_vec)
337  DO WHILE (dbcsr_iterator_blocks_left(iter))
338  CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
339  iseed(1) = 2; iseed(2) = mod(row, 4095); iseed(3) = mod(col, 4095); iseed(4) = 11
340  CALL dlarnv(2, iseed, row_size*col_size, data_vec)
341  END DO
342  CALL dbcsr_iterator_stop(iter)
343  END IF
344 
345  CALL transfer_dbcsr_to_local_array_d (vectors%input_vec, v_vec, nrow_local, control%local_comp)
346 
347  ! compute the vector norm of the random vectorm, get it real valued as well (rnorm)
348  CALL compute_norms_d (v_vec, norm, rnorm, control%pcol_group)
349 
350  IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
351  CALL dbcsr_scale(vectors%input_vec, real(1.0, real_8)/rnorm)
352 
353  ! Everything prepared, initialize the Arnoldi iteration
354  CALL transfer_dbcsr_to_local_array_d (vectors%input_vec, v_vec, nrow_local, control%local_comp)
355 
356  ! This permits to compute the subspace of a matrix which is a product of multiple matrices
357  DO i = 1, SIZE(matrix)
358  CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &
359  0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)
360  CALL dbcsr_copy(vectors%input_vec, vectors%result_vec)
361  END DO
362 
363  CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, w_vec, nrow_local, control%local_comp)
364 
365  ! Put the projection into the Hessenberg matrix, and make the vectors orthonormal
366  ar_data%Hessenberg(1, 1) = dot_product(v_vec, w_vec)
367  CALL pcol_group%sum(ar_data%Hessenberg(1, 1))
368  ar_data%f_vec = w_vec - v_vec*ar_data%Hessenberg(1, 1)
369 
370  ar_data%local_history(:, 1) = v_vec(:)
371 
372  ! We did the first step in here so we should set the current step for the subspace generation accordingly
373  control%current_step = 1
374 
375  DEALLOCATE (v_vec, w_vec)
376 
377  END SUBROUTINE arnoldi_init_d
378 
379 ! **************************************************************************************************
380 !> \brief Alogorithm for the implicit restarts in the arnoldi method
381 !> this is an early implementation which scales subspace size^4
382 !> by replacing the lapack calls with direct math the
383 !> QR and gemms can be made linear and a N^2 sacling will be acchieved
384 !> however this already sets the framework but should be used with care
385 !> \param arnoldi_data ...
386 ! **************************************************************************************************
387  SUBROUTINE arnoldi_iram_d (arnoldi_data)
388  TYPE(arnoldi_data_type) :: arnoldi_data
389 
390  TYPE(arnoldi_data_d_type), POINTER :: ar_data
391  TYPE(arnoldi_control_type), POINTER :: control
392  COMPLEX(real_8), DIMENSION(:, :), ALLOCATABLE :: tmp_mat, safe_mat, q, tmp_mat1
393  COMPLEX(real_8), DIMENSION(:), ALLOCATABLE :: work, tau, work_measure
394  INTEGER :: msize, lwork, i, j, info, nwant
395  REAL(kind=real_8) :: beta, sigma
396  REAL(kind=real_8), DIMENSION(:, :), ALLOCATABLE :: qdata
397 
398  ! This is just a terribly inefficient implementation but I hope it is correct and might serve as a reference
399  ar_data => get_data_d(arnoldi_data)
400  control => get_control(arnoldi_data)
401  msize = control%current_step
402  nwant = control%nval_out
403  ALLOCATE (tmp_mat(msize, msize)); ALLOCATE (safe_mat(msize, msize))
404  ALLOCATE (q(msize, msize)); ALLOCATE (tmp_mat1(msize, msize))
405  ALLOCATE (work_measure(1))
406  ALLOCATE (tau(msize)); ALLOCATE (qdata(msize, msize))
407  !make Q identity
408  q = cmplx(0.0, 0.0, real_8)
409  DO i = 1, msize
410  q(i, i) = cmplx(1.0, 0.0, real_8)
411  END DO
412 
413  ! Looks a bit odd, but safe_mat will contain the result in the end, while tmpmat gets violated by lapack
414  CALL convert_matrix(tmp_mat, ar_data%Hessenberg(1:msize, 1:msize))
415  safe_mat(:, :) = tmp_mat(:, :)
416 
417  DO i = 1, msize
418  ! A bit a strange check but in the end we only want to shift the unwanted evals
419  IF (any(control%selected_ind == i)) cycle
420  ! Here we shift the matrix by subtracting unwanted evals from the diagonal
421  DO j = 1, msize
422  tmp_mat(j, j) = tmp_mat(j, j) - ar_data%evals(i)
423  END DO
424  ! Now we repair the damage by QR factorizing
425  lwork = -1
426  CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work_measure, lwork, info)
427  lwork = int(work_measure(1))
428  IF (ALLOCATED(work)) THEN
429  IF (SIZE(work) .LT. lwork) THEN
430  DEALLOCATE (work)
431  END IF
432  END IF
433  IF (.NOT. ALLOCATED(work)) ALLOCATE (work(lwork))
434  CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work, lwork, info)
435  ! Ask Lapack to reconstruct Q from its own way of storing data (tmpmat will contain Q)
436  CALL zungqr(msize, msize, msize, tmp_mat, msize, tau, work, lwork, info)
437  ! update Q=Q*Q_current
438  tmp_mat1(:, :) = q(:, :)
439  CALL zgemm('N', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat1, msize, tmp_mat, msize, cmplx(0.0, 0.0,&
440  & real_8), &
441  q, msize)
442  ! Update H=(Q*)HQ
443  CALL zgemm('C', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat, msize, safe_mat, msize, cmplx(0.0, 0.0,&
444  & real_8), &
445  tmp_mat1, msize)
446  CALL zgemm('N', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat1, msize, tmp_mat, msize, cmplx(0.0, 0.0,&
447  & real_8), &
448  safe_mat, msize)
449 
450  ! this one is crucial for numerics not to accumulate noise in the subdiagonals
451  DO j = 1, msize
452  safe_mat(j + 2:msize, j) = cmplx(0.0, 0.0, real_8)
453  END DO
454  tmp_mat(:, :) = safe_mat(:, :)
455  END DO
456 
457  ! Now we can compute our restart quantities
458  ar_data%Hessenberg = 0.0_real_8
459  CALL convert_matrix(ar_data%Hessenberg(1:msize, 1:msize), safe_mat)
460  CALL convert_matrix(qdata, q)
461 
462  beta = ar_data%Hessenberg(nwant + 1, nwant); sigma = qdata(msize, nwant)
463 
464  !update the residuum and the basis vectors
465  IF (control%local_comp) THEN
466  ar_data%f_vec = matmul(ar_data%local_history(:, 1:msize), qdata(1:msize, nwant + 1))*beta + ar_data%f_vec(:)*sigma
467  ar_data%local_history(:, 1:nwant) = matmul(ar_data%local_history(:, 1:msize), qdata(1:msize, 1:nwant))
468  END IF
469  ! Set the current step to nwant so the subspace build knows where to start
470  control%current_step = nwant
471 
472  DEALLOCATE (tmp_mat, safe_mat, q, qdata, tmp_mat1, work, tau, work_measure)
473 
474  END SUBROUTINE arnoldi_iram_d
475 
476 ! **************************************************************************************************
477 !> \brief Here we create the Krylov subspace and fill the Hessenberg matrix
478 !> convergence check is only performed on subspace convergence
479 !> Gram Schidt is used to orthonogonalize.
480 !> If this is numericall not sufficient a Daniel, Gragg, Kaufman and Steward
481 !> correction is performed
482 !> \param matrix ...
483 !> \param vectors ...
484 !> \param arnoldi_data ...
485 ! **************************************************************************************************
486  SUBROUTINE build_subspace_d (matrix, vectors, arnoldi_data)
487  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
488  TYPE(m_x_v_vectors_type), TARGET :: vectors
489  TYPE(arnoldi_data_type) :: arnoldi_data
490 
491  INTEGER :: i, j, ncol_local, nrow_local
492  REAL(real_8) :: rnorm
493  TYPE(arnoldi_control_type), POINTER :: control
494  TYPE(arnoldi_data_d_type), POINTER :: ar_data
495  REAL(kind=real_8) :: norm
496  REAL(kind=real_8), ALLOCATABLE, DIMENSION(:) :: h_vec, s_vec, v_vec, w_vec
497  TYPE(dbcsr_type), POINTER :: input_vec, result_vec, swap_vec
498 
499  ar_data => get_data_d(arnoldi_data)
500  control => get_control(arnoldi_data)
501  control%converged = .false.
502 
503  ! create the vectors required during the iterations
504  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
505  ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
506  v_vec = 0.0_real_8; w_vec = 0.0_real_8
507  ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
508 
509  DO j = control%current_step, control%max_iter - 1
510 
511  ! compute the vector norm of the residuum, get it real valued as well (rnorm)
512  CALL compute_norms_d (ar_data%f_vec, norm, rnorm, control%pcol_group)
513 
514  ! check convergence and inform everybody about it, a bit annoying to talk to everybody because of that
515  IF (control%myproc == 0) control%converged = rnorm .LT. real(control%threshold, real_8)
516  CALL control%mp_group%bcast(control%converged, 0)
517  IF (control%converged) EXIT
518 
519  ! transfer normalized residdum to history and its norm to the Hessenberg matrix
520  IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
521  v_vec(:) = ar_data%f_vec(:)/rnorm; ar_data%local_history(:, j + 1) = v_vec(:); ar_data%Hessenberg(j + 1, j) = norm
522 
523  input_vec => vectors%input_vec
524  result_vec => vectors%result_vec
525  CALL transfer_local_array_to_dbcsr_d (input_vec, v_vec, nrow_local, control%local_comp)
526 
527  ! This permits to compute the subspace of a matrix which is a product of two matrices
528  DO i = 1, SIZE(matrix)
529  CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, input_vec, result_vec, 1.0_real_8, &
530  0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)
531  swap_vec => input_vec
532  input_vec => result_vec
533  result_vec => swap_vec
534  END DO
535 
536  CALL transfer_dbcsr_to_local_array_d (input_vec, w_vec, nrow_local, control%local_comp)
537 
538  ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
539  CALL gram_schmidt_ortho_d (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j + 1, &
540  ar_data%local_history, ar_data%local_history, control%local_comp, control%pcol_group)
541 
542  ! A bit more expensive but simply always top up with a DGKS correction, otherwise numerics
543  ! can become a problem later on, there is probably a good check whether it's necessary, but we don't perform it
544  CALL dgks_ortho_d (h_vec, ar_data%f_vec, s_vec, nrow_local, j + 1, ar_data%local_history, &
545  ar_data%local_history, control%local_comp, control%pcol_group)
546  ! Finally we can put the projections into our Hessenberg matrix
547  ar_data%Hessenberg(1:j + 1, j + 1) = h_vec(1:j + 1)
548  control%current_step = j + 1
549  END DO
550 
551  ! compute the vector norm of the final residuum and put it in to Hessenberg
552  CALL compute_norms_d (ar_data%f_vec, norm, rnorm, control%pcol_group)
553  ar_data%Hessenberg(control%current_step + 1, control%current_step) = norm
554 
555  ! broadcast the Hessenberg matrix so we don't need to care later on
556  CALL control%mp_group%bcast(ar_data%Hessenberg, 0)
557 
558  DEALLOCATE (v_vec, w_vec, h_vec, s_vec)
559 
560  END SUBROUTINE build_subspace_d
561 
562 ! **************************************************************************************************
563 !> \brief Helper routine to transfer the all data of a dbcsr matrix to a local array
564 !> \param vec ...
565 !> \param array ...
566 !> \param n ...
567 !> \param is_local ...
568 ! **************************************************************************************************
569  SUBROUTINE transfer_dbcsr_to_local_array_d (vec, array, n, is_local)
570  TYPE(dbcsr_type) :: vec
571  REAL(kind=real_8), DIMENSION(:) :: array
572  INTEGER :: n
573  LOGICAL :: is_local
574  REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
575 
576  data_vec => dbcsr_get_data_p(vec, select_data_type=0.0_real_8)
577  IF (is_local) array(1:n) = data_vec(1:n)
578 
579  END SUBROUTINE transfer_dbcsr_to_local_array_d
580 
581 ! **************************************************************************************************
582 !> \brief The inverse routine transferring data back from an array to a dbcsr
583 !> \param vec ...
584 !> \param array ...
585 !> \param n ...
586 !> \param is_local ...
587 ! **************************************************************************************************
588  SUBROUTINE transfer_local_array_to_dbcsr_d (vec, array, n, is_local)
589  TYPE(dbcsr_type) :: vec
590  REAL(kind=real_8), DIMENSION(:) :: array
591  INTEGER :: n
592  LOGICAL :: is_local
593  REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
594 
595  data_vec => dbcsr_get_data_p(vec, select_data_type=0.0_real_8)
596  IF (is_local) data_vec(1:n) = array(1:n)
597 
598  END SUBROUTINE transfer_local_array_to_dbcsr_d
599 
600 ! **************************************************************************************************
601 !> \brief Gram-Schmidt in matrix vector form
602 !> \param h_vec ...
603 !> \param f_vec ...
604 !> \param s_vec ...
605 !> \param w_vec ...
606 !> \param nrow_local ...
607 !> \param j ...
608 !> \param local_history ...
609 !> \param reorth_mat ...
610 !> \param local_comp ...
611 !> \param pcol_group ...
612 ! **************************************************************************************************
613  SUBROUTINE gram_schmidt_ortho_d (h_vec, f_vec, s_vec, w_vec, nrow_local, &
614  j, local_history, reorth_mat, local_comp, pcol_group)
615  REAL(kind=real_8), DIMENSION(:) :: h_vec, s_vec, f_vec, w_vec
616  REAL(kind=real_8), DIMENSION(:, :) :: local_history, reorth_mat
617  INTEGER :: nrow_local, j
618  TYPE(mp_comm_type), INTENT(IN) :: pcol_group
619  LOGICAL :: local_comp
620 
621  CHARACTER(LEN=*), PARAMETER :: routinen = 'Gram_Schmidt_ortho_d'
622  INTEGER :: handle
623 
624  CALL timeset(routinen, handle)
625 
626  ! Let's do the orthonormalization, first try the Gram Schmidt scheme
627  h_vec = 0.0_real_8; f_vec = 0.0_real_8; s_vec = 0.0_real_8
628  IF (local_comp) CALL dgemv('T', nrow_local, j, 1.0_real_8, local_history, &
629  nrow_local, w_vec, 1, 0.0_real_8, h_vec, 1)
630  CALL pcol_group%sum(h_vec(1:j))
631 
632  IF (local_comp) CALL dgemv('N', nrow_local, j, 1.0_real_8, reorth_mat, &
633  nrow_local, h_vec, 1, 0.0_real_8, f_vec, 1)
634  f_vec(:) = w_vec(:) - f_vec(:)
635 
636  CALL timestop(handle)
637 
638  END SUBROUTINE gram_schmidt_ortho_d
639 
640 ! **************************************************************************************************
641 !> \brief Compute the Daniel, Gragg, Kaufman and Steward correction
642 !> \param h_vec ...
643 !> \param f_vec ...
644 !> \param s_vec ...
645 !> \param nrow_local ...
646 !> \param j ...
647 !> \param local_history ...
648 !> \param reorth_mat ...
649 !> \param local_comp ...
650 !> \param pcol_group ...
651 ! **************************************************************************************************
652  SUBROUTINE dgks_ortho_d (h_vec, f_vec, s_vec, nrow_local, j, &
653  local_history, reorth_mat, local_comp, pcol_group)
654  REAL(kind=real_8), DIMENSION(:) :: h_vec, s_vec, f_vec
655  REAL(kind=real_8), DIMENSION(:, :) :: local_history, reorth_mat
656  INTEGER :: nrow_local, j
657  TYPE(mp_comm_type), INTENT(IN) :: pcol_group
658 
659  CHARACTER(LEN=*), PARAMETER :: routinen = 'DGKS_ortho_d'
660  LOGICAL :: local_comp
661  INTEGER :: handle
662 
663  CALL timeset(routinen, handle)
664 
665  IF (local_comp) CALL dgemv('T', nrow_local, j, 1.0_real_8, local_history, &
666  nrow_local, f_vec, 1, 0.0_real_8, s_vec, 1)
667  CALL pcol_group%sum(s_vec(1:j))
668  IF (local_comp) CALL dgemv('N', nrow_local, j, -1.0_real_8, reorth_mat, &
669  nrow_local, s_vec, 1, 1.0_real_8, f_vec, 1)
670  h_vec(1:j) = h_vec(1:j) + s_vec(1:j)
671 
672  CALL timestop(handle)
673 
674  END SUBROUTINE dgks_ortho_d
675 
676 ! **************************************************************************************************
677 !> \brief Compute the norm of a vector distributed along proc_col
678 !> as local arrays. Always return the real part next to the complex rep.
679 !> \param vec ...
680 !> \param norm ...
681 !> \param rnorm ...
682 !> \param pcol_group ...
683 ! **************************************************************************************************
684  SUBROUTINE compute_norms_d (vec, norm, rnorm, pcol_group)
685  REAL(kind=real_8), DIMENSION(:) :: vec
686  REAL(real_8) :: rnorm
687  REAL(kind=real_8) :: norm
688  TYPE(mp_comm_type), INTENT(IN) :: pcol_group
689 
690  ! the norm is computed along the processor column
691  norm = dot_product(vec, vec)
692  CALL pcol_group%sum(norm)
693  rnorm = sqrt(real(norm, real_8))
694  norm=rnorm
695 
696  END SUBROUTINE compute_norms_d
697 
698 ! **************************************************************************************************
699 !> \brief Computes the initial guess for the solution of the generalized eigenvalue
700 !> using the arnoldi method
701 !> \param matrix ...
702 !> \param matrix_arnoldi ...
703 !> \param vectors ...
704 !> \param arnoldi_data ...
705 ! **************************************************************************************************
706 
707  SUBROUTINE gev_arnoldi_init_d (matrix, matrix_arnoldi, vectors, arnoldi_data)
708  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
709  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix_arnoldi
710  TYPE(m_x_v_vectors_type) :: vectors
711  TYPE(arnoldi_data_type) :: arnoldi_data
712 
713  INTEGER :: iseed(4), row_size, col_size, &
714  nrow_local, ncol_local, &
715  row, col
716  REAL(real_8) :: rnorm
717  TYPE(dbcsr_iterator_type) :: iter
718  REAL(kind=real_8) :: norm, denom
719  REAL(kind=real_8), DIMENSION(:), ALLOCATABLE :: &
720  v_vec, w_vec
721  REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
722  LOGICAL :: local_comp
723  TYPE(arnoldi_data_d_type), POINTER :: ar_data
724  TYPE(arnoldi_control_type), POINTER :: control
725  TYPE(mp_comm_type) :: pcol_group
726 
727  control => get_control(arnoldi_data)
728  pcol_group = control%pcol_group
729  local_comp = control%local_comp
730 
731  ar_data => get_data_d(arnoldi_data)
732 
733  ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
734  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
735  ALLOCATE (v_vec(nrow_local))
736  ALLOCATE (w_vec(nrow_local))
737  v_vec = 0.0_real_8; w_vec = 0.0_real_8
738  ar_data%Hessenberg = 0.0_real_8
739 
740  IF (control%has_initial_vector) THEN
741  ! after calling the set routine the initial vector is stored in f_vec
742  CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
743  ELSE
744  ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
745  CALL dbcsr_iterator_start(iter, vectors%input_vec)
746  DO WHILE (dbcsr_iterator_blocks_left(iter))
747  CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
748  iseed(1) = 2; iseed(2) = mod(row, 4095); iseed(3) = mod(col, 4095); iseed(4) = 11
749  CALL dlarnv(2, iseed, row_size*col_size, data_vec)
750  END DO
751  CALL dbcsr_iterator_stop(iter)
752  END IF
753 
754  CALL transfer_dbcsr_to_local_array_d (vectors%input_vec, v_vec, nrow_local, control%local_comp)
755 
756  ! compute the vector norm of the reandom vectorm, get it real valued as well (rnorm)
757  CALL compute_norms_d (v_vec, norm, rnorm, control%pcol_group)
758 
759  IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
760  CALL dbcsr_scale(vectors%input_vec, real(1.0, real_8)/rnorm)
761 
762  CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &
763  0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)
764 
765  CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, w_vec, nrow_local, control%local_comp)
766 
767  ar_data%rho_scale = 0.0_real_8
768  ar_data%rho_scale = dot_product(v_vec, w_vec)
769  CALL pcol_group%sum(ar_data%rho_scale)
770 
771  CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &
772  0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)
773 
774  CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, w_vec, nrow_local, control%local_comp)
775 
776  denom = 0.0_real_8
777  denom = dot_product(v_vec, w_vec)
778  CALL pcol_group%sum(denom)
779  IF (control%myproc == 0) ar_data%rho_scale = ar_data%rho_scale/denom
780  CALL control%mp_group%bcast(ar_data%rho_scale, 0)
781 
782  ! if the maximum ev is requested we need to optimize with -A-rho*B
783  CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
784  CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, 1.0_real_8, -ar_data%rho_scale)
785 
786  ar_data%x_vec = v_vec
787 
788  END SUBROUTINE gev_arnoldi_init_d
789 
790 ! **************************************************************************************************
791 !> \brief builds the basis rothogonal wrt. the metric.
792 !> The structure looks similar to normal arnoldi but norms, vectors and
793 !> matrix_vector products are very differently defined. Therefore it is
794 !> cleaner to put it in a separate subroutine to avoid confusion
795 !> \param matrix ...
796 !> \param vectors ...
797 !> \param arnoldi_data ...
798 ! **************************************************************************************************
799  SUBROUTINE gev_build_subspace_d (matrix, vectors, arnoldi_data)
800  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
801  TYPE(m_x_v_vectors_type) :: vectors
802  TYPE(arnoldi_data_type) :: arnoldi_data
803 
804  INTEGER :: j, ncol_local, nrow_local
805  TYPE(arnoldi_control_type), POINTER :: control
806  TYPE(arnoldi_data_d_type), POINTER :: ar_data
807  REAL(kind=real_8) :: norm
808  REAL(kind=real_8), ALLOCATABLE, DIMENSION(:) :: h_vec, s_vec, v_vec, w_vec
809  REAL(kind=real_8), ALLOCATABLE, DIMENSION(:, :) :: zmat, czmat, bzmat
810  TYPE(mp_comm_type) :: pcol_group
811 
812  ar_data => get_data_d(arnoldi_data)
813  control => get_control(arnoldi_data)
814  control%converged = .false.
815  pcol_group = control%pcol_group
816 
817  ! create the vectors required during the iterations
818  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
819  ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
820  v_vec = 0.0_real_8; w_vec = 0.0_real_8
821  ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
822  ALLOCATE (zmat(nrow_local, control%max_iter)); ALLOCATE (czmat(nrow_local, control%max_iter))
823  ALLOCATE (bzmat(nrow_local, control%max_iter))
824 
825  CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, ar_data%x_vec, nrow_local, control%local_comp)
826  CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &
827  0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)
828  CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, bzmat(:, 1), nrow_local, control%local_comp)
829 
830  norm = 0.0_real_8
831  norm = dot_product(ar_data%x_vec, bzmat(:, 1))
832  CALL pcol_group%sum(norm)
833  IF (control%local_comp) THEN
834  zmat(:, 1) = ar_data%x_vec/sqrt(norm); bzmat(:, 1) = bzmat(:, 1)/sqrt(norm)
835  END IF
836 
837  DO j = 1, control%max_iter
838  control%current_step = j
839  CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, zmat(:, j), nrow_local, control%local_comp)
840  CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &
841  0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)
842  CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, czmat(:, j), nrow_local, control%local_comp)
843  w_vec(:) = czmat(:, j)
844 
845  ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
846  CALL gram_schmidt_ortho_d (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j, &
847  bzmat, zmat, control%local_comp, control%pcol_group)
848 
849  ! A bit more expensive but simpliy always top up with a DGKS correction, otherwise numerics
850  ! can becom a problem later on, there is probably a good check, but we don't perform it
851  CALL dgks_ortho_d (h_vec, ar_data%f_vec, s_vec, nrow_local, j, bzmat, &
852  zmat, control%local_comp, control%pcol_group)
853 
854  CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
855  CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &
856  0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)
857  CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, v_vec, nrow_local, control%local_comp)
858  norm = 0.0_real_8
859  norm = dot_product(ar_data%f_vec, v_vec)
860  CALL pcol_group%sum(norm)
861 
862  IF (control%myproc == 0) control%converged = real(norm, real_8) .LT. epsilon(real(1.0, real_8))
863  CALL control%mp_group%bcast(control%converged, 0)
864  IF (control%converged) EXIT
865  IF (j == control%max_iter - 1) EXIT
866 
867  IF (control%local_comp) THEN
868  zmat(:, j + 1) = ar_data%f_vec/sqrt(norm); bzmat(:, j + 1) = v_vec(:)/sqrt(norm)
869  END IF
870  END DO
871 
872 ! getting a bit more complicated here as the final matrix is again a product which has to be computed with the
873 ! distributed vectors, therefore a sum along the first proc_col is necessary. As we want that matrix everywhere,
874 ! we set it to zero before and compute the distributed product only on the first col and then sum over the full grid
875  ar_data%Hessenberg = 0.0_real_8
876  IF (control%local_comp) THEN
877  ar_data%Hessenberg(1:control%current_step, 1:control%current_step) = &
878  matmul(transpose(czmat(:, 1:control%current_step)), zmat(:, 1:control%current_step))
879  END IF
880  CALL control%mp_group%sum(ar_data%Hessenberg)
881 
882  ar_data%local_history = zmat
883  ! broadcast the Hessenberg matrix so we don't need to care later on
884 
885  DEALLOCATE (v_vec); DEALLOCATE (w_vec); DEALLOCATE (s_vec); DEALLOCATE (h_vec); DEALLOCATE (czmat);
886  DEALLOCATE (zmat); DEALLOCATE (bzmat)
887 
888  END SUBROUTINE gev_build_subspace_d
889 
890 ! **************************************************************************************************
891 !> \brief Updates all data after an inner loop of the generalized ev arnoldi.
892 !> Updates rho and C=A-rho*B accordingly.
893 !> As an update scheme is used for he ev, the output ev has to be replaced
894 !> with the updated one.
895 !> Furthermore a convergence check is performed. The mv product could
896 !> be skiiped by making clever use of the precomputed data, However,
897 !> it is most likely not worth the effort.
898 !> \param matrix ...
899 !> \param matrix_arnoldi ...
900 !> \param vectors ...
901 !> \param arnoldi_data ...
902 ! **************************************************************************************************
903  SUBROUTINE gev_update_data_d (matrix, matrix_arnoldi, vectors, arnoldi_data)
904  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
905  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix_arnoldi
906  TYPE(m_x_v_vectors_type) :: vectors
907  TYPE(arnoldi_data_type) :: arnoldi_data
908 
909  TYPE(arnoldi_control_type), POINTER :: control
910  TYPE(arnoldi_data_d_type), POINTER :: ar_data
911  INTEGER :: ncol_local, nrow_local, ind, i
912  REAL(kind=real_8), ALLOCATABLE, DIMENSION(:) :: v_vec
913  REAL(real_8) :: rnorm
914  REAL(kind=real_8) :: norm
915  COMPLEX(real_8) :: val
916 
917  control => get_control(arnoldi_data)
918 
919  ar_data => get_data_d(arnoldi_data)
920 
921 ! compute the new shift, hack around the problem templating the conversion
922  val = ar_data%evals(control%selected_ind(1))
923  ar_data%rho_scale = ar_data%rho_scale + real(val,real_8)
924 ! compute the new eigenvector / initial guess for the next arnoldi loop
925  ar_data%x_vec = 0.0_real_8
926  DO i = 1, control%current_step
927  val = ar_data%revec(i, control%selected_ind(1))
928  ar_data%x_vec(:) = ar_data%x_vec(:) + ar_data%local_history(:, i)*real(val,real_8)
929  END DO
930 ! ar_data%x_vec(:)=MATMUL(ar_data%local_history(:,1:control%current_step),&
931 ! ar_data%revec(1:control%current_step,control%selected_ind(1)))
932 
933 ! update the C-matrix (A-rho*B), if the maximum value is requested we have to use -A-rho*B
934  CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
935  CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, 1.0_real_8, -ar_data%rho_scale)
936 
937 ! compute convergence and set the correct eigenvalue and eigenvector
938  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
939  IF (ncol_local > 0) THEN
940  ALLOCATE (v_vec(nrow_local))
941  CALL compute_norms_d (ar_data%x_vec, norm, rnorm, control%pcol_group)
942  v_vec(:) = ar_data%x_vec(:)/rnorm
943  END IF
944  CALL transfer_local_array_to_dbcsr_d (vectors%input_vec, v_vec, nrow_local, control%local_comp)
945  CALL dbcsr_matrix_colvec_multiply(matrix_arnoldi(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_real_8, &
946  0.0_real_8, vectors%rep_row_vec, vectors%rep_col_vec)
947  CALL transfer_dbcsr_to_local_array_d (vectors%result_vec, v_vec, nrow_local, control%local_comp)
948  IF (ncol_local > 0) THEN
949  CALL compute_norms_d (v_vec, norm, rnorm, control%pcol_group)
950  ! check convergence
951  control%converged = rnorm .LT. control%threshold
952  DEALLOCATE (v_vec)
953  END IF
954  ! and broadcast the real eigenvalue
955  CALL control%mp_group%bcast(control%converged, 0)
956  ind = control%selected_ind(1)
957  CALL control%mp_group%bcast(ar_data%rho_scale, 0)
958 
959 ! Again the maximum value request is done on -A therefore the eigenvalue needs the opposite sign
960  ar_data%evals(ind) = ar_data%rho_scale
961 
962  END SUBROUTINE gev_update_data_d
963 ! **************************************************************************************************
964 !> \brief Call the correct eigensolver, in the arnoldi method only the right
965 !> eigenvectors are used. Lefts are created here but dumped immediately
966 !> \param arnoldi_data ...
967 ! **************************************************************************************************
968  SUBROUTINE compute_evals_z (arnoldi_data)
969  TYPE(arnoldi_data_type) :: arnoldi_data
970 
971  COMPLEX(real_8), DIMENSION(:, :), ALLOCATABLE :: levec
972  TYPE(arnoldi_data_z_type), POINTER :: ar_data
973  INTEGER :: ndim
974  TYPE(arnoldi_control_type), POINTER :: control
975 
976  ar_data => get_data_z(arnoldi_data)
977  control => get_control(arnoldi_data)
978  ndim = control%current_step
979  ALLOCATE (levec(ndim, ndim))
980 
981 ! Needs antoher interface as the calls to real and complex geev differ (sucks!)
982 ! only perform the diagonalization on processors which hold data
983  IF (control%generalized_ev) THEN
984  CALL arnoldi_symm_local_diag('V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
985  ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim))
986  ELSE
987  IF (control%symmetric) THEN
988  CALL arnoldi_tridiag_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
989  ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
990  ELSE
991  CALL arnoldi_general_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
992  ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
993  END IF
994  END IF
995 
996  DEALLOCATE (levec)
997 
998  END SUBROUTINE compute_evals_z
999 
1000 ! **************************************************************************************************
1001 !> \brief Initialization of the arnoldi vector. Here a random vector is used,
1002 !> might be generalized in the future
1003 !> \param matrix ...
1004 !> \param vectors ...
1005 !> \param arnoldi_data ...
1006 ! **************************************************************************************************
1007  SUBROUTINE arnoldi_init_z (matrix, vectors, arnoldi_data)
1008  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
1009  TYPE(m_x_v_vectors_type) :: vectors
1010  TYPE(arnoldi_data_type) :: arnoldi_data
1011 
1012  INTEGER :: i, iseed(4), row_size, col_size, &
1013  nrow_local, ncol_local, &
1014  row, col
1015  REAL(real_8) :: rnorm
1016  TYPE(dbcsr_iterator_type) :: iter
1017  COMPLEX(kind=real_8) :: norm
1018  COMPLEX(kind=real_8), DIMENSION(:), ALLOCATABLE :: &
1019  v_vec, w_vec
1020  COMPLEX(kind=real_8), DIMENSION(:), POINTER :: data_vec
1021  LOGICAL :: local_comp
1022  TYPE(arnoldi_data_z_type), POINTER :: ar_data
1023  TYPE(arnoldi_control_type), POINTER :: control
1024  TYPE(mp_comm_type) :: pcol_group
1025 
1026  control => get_control(arnoldi_data)
1027  pcol_group = control%pcol_group
1028  local_comp = control%local_comp
1029 
1030  ar_data => get_data_z(arnoldi_data)
1031 
1032  ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
1033  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
1034  ALLOCATE (v_vec(nrow_local))
1035  ALLOCATE (w_vec(nrow_local))
1036  v_vec = cmplx(0.0, 0.0, real_8); w_vec = cmplx(0.0, 0.0, real_8)
1037  ar_data%Hessenberg = cmplx(0.0, 0.0, real_8)
1038 
1039  IF (control%has_initial_vector) THEN
1040  ! after calling the set routine the initial vector is stored in f_vec
1041  CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
1042  ELSE
1043  ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
1044  CALL dbcsr_iterator_start(iter, vectors%input_vec)
1045  DO WHILE (dbcsr_iterator_blocks_left(iter))
1046  CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
1047  iseed(1) = 2; iseed(2) = mod(row, 4095); iseed(3) = mod(col, 4095); iseed(4) = 11
1048  CALL zlarnv(2, iseed, row_size*col_size, data_vec)
1049  END DO
1050  CALL dbcsr_iterator_stop(iter)
1051  END IF
1052 
1053  CALL transfer_dbcsr_to_local_array_z (vectors%input_vec, v_vec, nrow_local, control%local_comp)
1054 
1055  ! compute the vector norm of the random vectorm, get it real valued as well (rnorm)
1056  CALL compute_norms_z (v_vec, norm, rnorm, control%pcol_group)
1057 
1058  IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
1059  CALL dbcsr_scale(vectors%input_vec, real(1.0, real_8)/rnorm)
1060 
1061  ! Everything prepared, initialize the Arnoldi iteration
1062  CALL transfer_dbcsr_to_local_array_z (vectors%input_vec, v_vec, nrow_local, control%local_comp)
1063 
1064  ! This permits to compute the subspace of a matrix which is a product of multiple matrices
1065  DO i = 1, SIZE(matrix)
1066  CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &
1067  cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)
1068  CALL dbcsr_copy(vectors%input_vec, vectors%result_vec)
1069  END DO
1070 
1071  CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, w_vec, nrow_local, control%local_comp)
1072 
1073  ! Put the projection into the Hessenberg matrix, and make the vectors orthonormal
1074  ar_data%Hessenberg(1, 1) = dot_product(v_vec, w_vec)
1075  CALL pcol_group%sum(ar_data%Hessenberg(1, 1))
1076  ar_data%f_vec = w_vec - v_vec*ar_data%Hessenberg(1, 1)
1077 
1078  ar_data%local_history(:, 1) = v_vec(:)
1079 
1080  ! We did the first step in here so we should set the current step for the subspace generation accordingly
1081  control%current_step = 1
1082 
1083  DEALLOCATE (v_vec, w_vec)
1084 
1085  END SUBROUTINE arnoldi_init_z
1086 
1087 ! **************************************************************************************************
1088 !> \brief Alogorithm for the implicit restarts in the arnoldi method
1089 !> this is an early implementation which scales subspace size^4
1090 !> by replacing the lapack calls with direct math the
1091 !> QR and gemms can be made linear and a N^2 sacling will be acchieved
1092 !> however this already sets the framework but should be used with care
1093 !> \param arnoldi_data ...
1094 ! **************************************************************************************************
1095  SUBROUTINE arnoldi_iram_z (arnoldi_data)
1096  TYPE(arnoldi_data_type) :: arnoldi_data
1097 
1098  TYPE(arnoldi_data_z_type), POINTER :: ar_data
1099  TYPE(arnoldi_control_type), POINTER :: control
1100  COMPLEX(real_8), DIMENSION(:, :), ALLOCATABLE :: tmp_mat, safe_mat, q, tmp_mat1
1101  COMPLEX(real_8), DIMENSION(:), ALLOCATABLE :: work, tau, work_measure
1102  INTEGER :: msize, lwork, i, j, info, nwant
1103  COMPLEX(kind=real_8) :: beta, sigma
1104  COMPLEX(kind=real_8), DIMENSION(:, :), ALLOCATABLE :: qdata
1105 
1106  ! This is just a terribly inefficient implementation but I hope it is correct and might serve as a reference
1107  ar_data => get_data_z(arnoldi_data)
1108  control => get_control(arnoldi_data)
1109  msize = control%current_step
1110  nwant = control%nval_out
1111  ALLOCATE (tmp_mat(msize, msize)); ALLOCATE (safe_mat(msize, msize))
1112  ALLOCATE (q(msize, msize)); ALLOCATE (tmp_mat1(msize, msize))
1113  ALLOCATE (work_measure(1))
1114  ALLOCATE (tau(msize)); ALLOCATE (qdata(msize, msize))
1115  !make Q identity
1116  q = cmplx(0.0, 0.0, real_8)
1117  DO i = 1, msize
1118  q(i, i) = cmplx(1.0, 0.0, real_8)
1119  END DO
1120 
1121  ! Looks a bit odd, but safe_mat will contain the result in the end, while tmpmat gets violated by lapack
1122  CALL convert_matrix(tmp_mat, ar_data%Hessenberg(1:msize, 1:msize))
1123  safe_mat(:, :) = tmp_mat(:, :)
1124 
1125  DO i = 1, msize
1126  ! A bit a strange check but in the end we only want to shift the unwanted evals
1127  IF (any(control%selected_ind == i)) cycle
1128  ! Here we shift the matrix by subtracting unwanted evals from the diagonal
1129  DO j = 1, msize
1130  tmp_mat(j, j) = tmp_mat(j, j) - ar_data%evals(i)
1131  END DO
1132  ! Now we repair the damage by QR factorizing
1133  lwork = -1
1134  CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work_measure, lwork, info)
1135  lwork = int(work_measure(1))
1136  IF (ALLOCATED(work)) THEN
1137  IF (SIZE(work) .LT. lwork) THEN
1138  DEALLOCATE (work)
1139  END IF
1140  END IF
1141  IF (.NOT. ALLOCATED(work)) ALLOCATE (work(lwork))
1142  CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work, lwork, info)
1143  ! Ask Lapack to reconstruct Q from its own way of storing data (tmpmat will contain Q)
1144  CALL zungqr(msize, msize, msize, tmp_mat, msize, tau, work, lwork, info)
1145  ! update Q=Q*Q_current
1146  tmp_mat1(:, :) = q(:, :)
1147  CALL zgemm('N', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat1, msize, tmp_mat, msize, cmplx(0.0, 0.0,&
1148  & real_8), &
1149  q, msize)
1150  ! Update H=(Q*)HQ
1151  CALL zgemm('C', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat, msize, safe_mat, msize, cmplx(0.0, 0.0,&
1152  & real_8), &
1153  tmp_mat1, msize)
1154  CALL zgemm('N', 'N', msize, msize, msize, cmplx(1.0, 0.0, real_8), tmp_mat1, msize, tmp_mat, msize, cmplx(0.0, 0.0,&
1155  & real_8), &
1156  safe_mat, msize)
1157 
1158  ! this one is crucial for numerics not to accumulate noise in the subdiagonals
1159  DO j = 1, msize
1160  safe_mat(j + 2:msize, j) = cmplx(0.0, 0.0, real_8)
1161  END DO
1162  tmp_mat(:, :) = safe_mat(:, :)
1163  END DO
1164 
1165  ! Now we can compute our restart quantities
1166  ar_data%Hessenberg = cmplx(0.0, 0.0, real_8)
1167  CALL convert_matrix(ar_data%Hessenberg(1:msize, 1:msize), safe_mat)
1168  CALL convert_matrix(qdata, q)
1169 
1170  beta = ar_data%Hessenberg(nwant + 1, nwant); sigma = qdata(msize, nwant)
1171 
1172  !update the residuum and the basis vectors
1173  IF (control%local_comp) THEN
1174  ar_data%f_vec = matmul(ar_data%local_history(:, 1:msize), qdata(1:msize, nwant + 1))*beta + ar_data%f_vec(:)*sigma
1175  ar_data%local_history(:, 1:nwant) = matmul(ar_data%local_history(:, 1:msize), qdata(1:msize, 1:nwant))
1176  END IF
1177  ! Set the current step to nwant so the subspace build knows where to start
1178  control%current_step = nwant
1179 
1180  DEALLOCATE (tmp_mat, safe_mat, q, qdata, tmp_mat1, work, tau, work_measure)
1181 
1182  END SUBROUTINE arnoldi_iram_z
1183 
1184 ! **************************************************************************************************
1185 !> \brief Here we create the Krylov subspace and fill the Hessenberg matrix
1186 !> convergence check is only performed on subspace convergence
1187 !> Gram Schidt is used to orthonogonalize.
1188 !> If this is numericall not sufficient a Daniel, Gragg, Kaufman and Steward
1189 !> correction is performed
1190 !> \param matrix ...
1191 !> \param vectors ...
1192 !> \param arnoldi_data ...
1193 ! **************************************************************************************************
1194  SUBROUTINE build_subspace_z (matrix, vectors, arnoldi_data)
1195  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
1196  TYPE(m_x_v_vectors_type), TARGET :: vectors
1197  TYPE(arnoldi_data_type) :: arnoldi_data
1198 
1199  INTEGER :: i, j, ncol_local, nrow_local
1200  REAL(real_8) :: rnorm
1201  TYPE(arnoldi_control_type), POINTER :: control
1202  TYPE(arnoldi_data_z_type), POINTER :: ar_data
1203  COMPLEX(kind=real_8) :: norm
1204  COMPLEX(kind=real_8), ALLOCATABLE, DIMENSION(:) :: h_vec, s_vec, v_vec, w_vec
1205  TYPE(dbcsr_type), POINTER :: input_vec, result_vec, swap_vec
1206 
1207  ar_data => get_data_z(arnoldi_data)
1208  control => get_control(arnoldi_data)
1209  control%converged = .false.
1210 
1211  ! create the vectors required during the iterations
1212  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
1213  ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
1214  v_vec = cmplx(0.0, 0.0, real_8); w_vec = cmplx(0.0, 0.0, real_8)
1215  ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
1216 
1217  DO j = control%current_step, control%max_iter - 1
1218 
1219  ! compute the vector norm of the residuum, get it real valued as well (rnorm)
1220  CALL compute_norms_z (ar_data%f_vec, norm, rnorm, control%pcol_group)
1221 
1222  ! check convergence and inform everybody about it, a bit annoying to talk to everybody because of that
1223  IF (control%myproc == 0) control%converged = rnorm .LT. real(control%threshold, real_8)
1224  CALL control%mp_group%bcast(control%converged, 0)
1225  IF (control%converged) EXIT
1226 
1227  ! transfer normalized residdum to history and its norm to the Hessenberg matrix
1228  IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
1229  v_vec(:) = ar_data%f_vec(:)/rnorm; ar_data%local_history(:, j + 1) = v_vec(:); ar_data%Hessenberg(j + 1, j) = norm
1230 
1231  input_vec => vectors%input_vec
1232  result_vec => vectors%result_vec
1233  CALL transfer_local_array_to_dbcsr_z (input_vec, v_vec, nrow_local, control%local_comp)
1234 
1235  ! This permits to compute the subspace of a matrix which is a product of two matrices
1236  DO i = 1, SIZE(matrix)
1237  CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, input_vec, result_vec, cmplx(1.0, 0.0, real_8), &
1238  cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)
1239  swap_vec => input_vec
1240  input_vec => result_vec
1241  result_vec => swap_vec
1242  END DO
1243 
1244  CALL transfer_dbcsr_to_local_array_z (input_vec, w_vec, nrow_local, control%local_comp)
1245 
1246  ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
1247  CALL gram_schmidt_ortho_z (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j + 1, &
1248  ar_data%local_history, ar_data%local_history, control%local_comp, control%pcol_group)
1249 
1250  ! A bit more expensive but simply always top up with a DGKS correction, otherwise numerics
1251  ! can become a problem later on, there is probably a good check whether it's necessary, but we don't perform it
1252  CALL dgks_ortho_z (h_vec, ar_data%f_vec, s_vec, nrow_local, j + 1, ar_data%local_history, &
1253  ar_data%local_history, control%local_comp, control%pcol_group)
1254  ! Finally we can put the projections into our Hessenberg matrix
1255  ar_data%Hessenberg(1:j + 1, j + 1) = h_vec(1:j + 1)
1256  control%current_step = j + 1
1257  END DO
1258 
1259  ! compute the vector norm of the final residuum and put it in to Hessenberg
1260  CALL compute_norms_z (ar_data%f_vec, norm, rnorm, control%pcol_group)
1261  ar_data%Hessenberg(control%current_step + 1, control%current_step) = norm
1262 
1263  ! broadcast the Hessenberg matrix so we don't need to care later on
1264  CALL control%mp_group%bcast(ar_data%Hessenberg, 0)
1265 
1266  DEALLOCATE (v_vec, w_vec, h_vec, s_vec)
1267 
1268  END SUBROUTINE build_subspace_z
1269 
1270 ! **************************************************************************************************
1271 !> \brief Helper routine to transfer the all data of a dbcsr matrix to a local array
1272 !> \param vec ...
1273 !> \param array ...
1274 !> \param n ...
1275 !> \param is_local ...
1276 ! **************************************************************************************************
1277  SUBROUTINE transfer_dbcsr_to_local_array_z (vec, array, n, is_local)
1278  TYPE(dbcsr_type) :: vec
1279  COMPLEX(kind=real_8), DIMENSION(:) :: array
1280  INTEGER :: n
1281  LOGICAL :: is_local
1282  COMPLEX(kind=real_8), DIMENSION(:), POINTER :: data_vec
1283 
1284  data_vec => dbcsr_get_data_p(vec, select_data_type=cmplx(0.0, 0.0, real_8))
1285  IF (is_local) array(1:n) = data_vec(1:n)
1286 
1287  END SUBROUTINE transfer_dbcsr_to_local_array_z
1288 
1289 ! **************************************************************************************************
1290 !> \brief The inverse routine transferring data back from an array to a dbcsr
1291 !> \param vec ...
1292 !> \param array ...
1293 !> \param n ...
1294 !> \param is_local ...
1295 ! **************************************************************************************************
1296  SUBROUTINE transfer_local_array_to_dbcsr_z (vec, array, n, is_local)
1297  TYPE(dbcsr_type) :: vec
1298  COMPLEX(kind=real_8), DIMENSION(:) :: array
1299  INTEGER :: n
1300  LOGICAL :: is_local
1301  COMPLEX(kind=real_8), DIMENSION(:), POINTER :: data_vec
1302 
1303  data_vec => dbcsr_get_data_p(vec, select_data_type=cmplx(0.0, 0.0, real_8))
1304  IF (is_local) data_vec(1:n) = array(1:n)
1305 
1306  END SUBROUTINE transfer_local_array_to_dbcsr_z
1307 
1308 ! **************************************************************************************************
1309 !> \brief Gram-Schmidt in matrix vector form
1310 !> \param h_vec ...
1311 !> \param f_vec ...
1312 !> \param s_vec ...
1313 !> \param w_vec ...
1314 !> \param nrow_local ...
1315 !> \param j ...
1316 !> \param local_history ...
1317 !> \param reorth_mat ...
1318 !> \param local_comp ...
1319 !> \param pcol_group ...
1320 ! **************************************************************************************************
1321  SUBROUTINE gram_schmidt_ortho_z (h_vec, f_vec, s_vec, w_vec, nrow_local, &
1322  j, local_history, reorth_mat, local_comp, pcol_group)
1323  COMPLEX(kind=real_8), DIMENSION(:) :: h_vec, s_vec, f_vec, w_vec
1324  COMPLEX(kind=real_8), DIMENSION(:, :) :: local_history, reorth_mat
1325  INTEGER :: nrow_local, j
1326  TYPE(mp_comm_type), INTENT(IN) :: pcol_group
1327  LOGICAL :: local_comp
1328 
1329  CHARACTER(LEN=*), PARAMETER :: routinen = 'Gram_Schmidt_ortho_z'
1330  INTEGER :: handle
1331 
1332  CALL timeset(routinen, handle)
1333 
1334  ! Let's do the orthonormalization, first try the Gram Schmidt scheme
1335  h_vec = cmplx(0.0, 0.0, real_8); f_vec = cmplx(0.0, 0.0, real_8); s_vec = cmplx(0.0, 0.0, real_8)
1336  IF (local_comp) CALL zgemv('T', nrow_local, j, cmplx(1.0, 0.0, real_8), local_history, &
1337  nrow_local, w_vec, 1, cmplx(0.0, 0.0, real_8), h_vec, 1)
1338  CALL pcol_group%sum(h_vec(1:j))
1339 
1340  IF (local_comp) CALL zgemv('N', nrow_local, j, cmplx(1.0, 0.0, real_8), reorth_mat, &
1341  nrow_local, h_vec, 1, cmplx(0.0, 0.0, real_8), f_vec, 1)
1342  f_vec(:) = w_vec(:) - f_vec(:)
1343 
1344  CALL timestop(handle)
1345 
1346  END SUBROUTINE gram_schmidt_ortho_z
1347 
1348 ! **************************************************************************************************
1349 !> \brief Compute the Daniel, Gragg, Kaufman and Steward correction
1350 !> \param h_vec ...
1351 !> \param f_vec ...
1352 !> \param s_vec ...
1353 !> \param nrow_local ...
1354 !> \param j ...
1355 !> \param local_history ...
1356 !> \param reorth_mat ...
1357 !> \param local_comp ...
1358 !> \param pcol_group ...
1359 ! **************************************************************************************************
1360  SUBROUTINE dgks_ortho_z (h_vec, f_vec, s_vec, nrow_local, j, &
1361  local_history, reorth_mat, local_comp, pcol_group)
1362  COMPLEX(kind=real_8), DIMENSION(:) :: h_vec, s_vec, f_vec
1363  COMPLEX(kind=real_8), DIMENSION(:, :) :: local_history, reorth_mat
1364  INTEGER :: nrow_local, j
1365  TYPE(mp_comm_type), INTENT(IN) :: pcol_group
1366 
1367  CHARACTER(LEN=*), PARAMETER :: routinen = 'DGKS_ortho_z'
1368  LOGICAL :: local_comp
1369  INTEGER :: handle
1370 
1371  CALL timeset(routinen, handle)
1372 
1373  IF (local_comp) CALL zgemv('T', nrow_local, j, cmplx(1.0, 0.0, real_8), local_history, &
1374  nrow_local, f_vec, 1, cmplx(0.0, 0.0, real_8), s_vec, 1)
1375  CALL pcol_group%sum(s_vec(1:j))
1376  IF (local_comp) CALL zgemv('N', nrow_local, j, cmplx(-1.0, 0.0, real_8), reorth_mat, &
1377  nrow_local, s_vec, 1, cmplx(1.0, 0.0, real_8), f_vec, 1)
1378  h_vec(1:j) = h_vec(1:j) + s_vec(1:j)
1379 
1380  CALL timestop(handle)
1381 
1382  END SUBROUTINE dgks_ortho_z
1383 
1384 ! **************************************************************************************************
1385 !> \brief Compute the norm of a vector distributed along proc_col
1386 !> as local arrays. Always return the real part next to the complex rep.
1387 !> \param vec ...
1388 !> \param norm ...
1389 !> \param rnorm ...
1390 !> \param pcol_group ...
1391 ! **************************************************************************************************
1392  SUBROUTINE compute_norms_z (vec, norm, rnorm, pcol_group)
1393  COMPLEX(kind=real_8), DIMENSION(:) :: vec
1394  REAL(real_8) :: rnorm
1395  COMPLEX(kind=real_8) :: norm
1396  TYPE(mp_comm_type), INTENT(IN) :: pcol_group
1397 
1398  ! the norm is computed along the processor column
1399  norm = dot_product(vec, vec)
1400  CALL pcol_group%sum(norm)
1401  rnorm = sqrt(real(norm, real_8))
1402  norm=cmplx(rnorm, 0.0, real_8)
1403 
1404  END SUBROUTINE compute_norms_z
1405 
1406 ! **************************************************************************************************
1407 !> \brief Computes the initial guess for the solution of the generalized eigenvalue
1408 !> using the arnoldi method
1409 !> \param matrix ...
1410 !> \param matrix_arnoldi ...
1411 !> \param vectors ...
1412 !> \param arnoldi_data ...
1413 ! **************************************************************************************************
1414 
1415  SUBROUTINE gev_arnoldi_init_z (matrix, matrix_arnoldi, vectors, arnoldi_data)
1416  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
1417  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix_arnoldi
1418  TYPE(m_x_v_vectors_type) :: vectors
1419  TYPE(arnoldi_data_type) :: arnoldi_data
1420 
1421  INTEGER :: iseed(4), row_size, col_size, &
1422  nrow_local, ncol_local, &
1423  row, col
1424  REAL(real_8) :: rnorm
1425  TYPE(dbcsr_iterator_type) :: iter
1426  COMPLEX(kind=real_8) :: norm, denom
1427  COMPLEX(kind=real_8), DIMENSION(:), ALLOCATABLE :: &
1428  v_vec, w_vec
1429  COMPLEX(kind=real_8), DIMENSION(:), POINTER :: data_vec
1430  LOGICAL :: local_comp
1431  TYPE(arnoldi_data_z_type), POINTER :: ar_data
1432  TYPE(arnoldi_control_type), POINTER :: control
1433  TYPE(mp_comm_type) :: pcol_group
1434 
1435  control => get_control(arnoldi_data)
1436  pcol_group = control%pcol_group
1437  local_comp = control%local_comp
1438 
1439  ar_data => get_data_z(arnoldi_data)
1440 
1441  ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
1442  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
1443  ALLOCATE (v_vec(nrow_local))
1444  ALLOCATE (w_vec(nrow_local))
1445  v_vec = cmplx(0.0, 0.0, real_8); w_vec = cmplx(0.0, 0.0, real_8)
1446  ar_data%Hessenberg = cmplx(0.0, 0.0, real_8)
1447 
1448  IF (control%has_initial_vector) THEN
1449  ! after calling the set routine the initial vector is stored in f_vec
1450  CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
1451  ELSE
1452  ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
1453  CALL dbcsr_iterator_start(iter, vectors%input_vec)
1454  DO WHILE (dbcsr_iterator_blocks_left(iter))
1455  CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
1456  iseed(1) = 2; iseed(2) = mod(row, 4095); iseed(3) = mod(col, 4095); iseed(4) = 11
1457  CALL zlarnv(2, iseed, row_size*col_size, data_vec)
1458  END DO
1459  CALL dbcsr_iterator_stop(iter)
1460  END IF
1461 
1462  CALL transfer_dbcsr_to_local_array_z (vectors%input_vec, v_vec, nrow_local, control%local_comp)
1463 
1464  ! compute the vector norm of the reandom vectorm, get it real valued as well (rnorm)
1465  CALL compute_norms_z (v_vec, norm, rnorm, control%pcol_group)
1466 
1467  IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
1468  CALL dbcsr_scale(vectors%input_vec, real(1.0, real_8)/rnorm)
1469 
1470  CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &
1471  cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)
1472 
1473  CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, w_vec, nrow_local, control%local_comp)
1474 
1475  ar_data%rho_scale = cmplx(0.0, 0.0, real_8)
1476  ar_data%rho_scale = dot_product(v_vec, w_vec)
1477  CALL pcol_group%sum(ar_data%rho_scale)
1478 
1479  CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &
1480  cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)
1481 
1482  CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, w_vec, nrow_local, control%local_comp)
1483 
1484  denom = cmplx(0.0, 0.0, real_8)
1485  denom = dot_product(v_vec, w_vec)
1486  CALL pcol_group%sum(denom)
1487  IF (control%myproc == 0) ar_data%rho_scale = ar_data%rho_scale/denom
1488  CALL control%mp_group%bcast(ar_data%rho_scale, 0)
1489 
1490  ! if the maximum ev is requested we need to optimize with -A-rho*B
1491  CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
1492  CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, cmplx(1.0, 0.0, real_8), -ar_data%rho_scale)
1493 
1494  ar_data%x_vec = v_vec
1495 
1496  END SUBROUTINE gev_arnoldi_init_z
1497 
1498 ! **************************************************************************************************
1499 !> \brief builds the basis rothogonal wrt. the metric.
1500 !> The structure looks similar to normal arnoldi but norms, vectors and
1501 !> matrix_vector products are very differently defined. Therefore it is
1502 !> cleaner to put it in a separate subroutine to avoid confusion
1503 !> \param matrix ...
1504 !> \param vectors ...
1505 !> \param arnoldi_data ...
1506 ! **************************************************************************************************
1507  SUBROUTINE gev_build_subspace_z (matrix, vectors, arnoldi_data)
1508  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
1509  TYPE(m_x_v_vectors_type) :: vectors
1510  TYPE(arnoldi_data_type) :: arnoldi_data
1511 
1512  INTEGER :: j, ncol_local, nrow_local
1513  TYPE(arnoldi_control_type), POINTER :: control
1514  TYPE(arnoldi_data_z_type), POINTER :: ar_data
1515  COMPLEX(kind=real_8) :: norm
1516  COMPLEX(kind=real_8), ALLOCATABLE, DIMENSION(:) :: h_vec, s_vec, v_vec, w_vec
1517  COMPLEX(kind=real_8), ALLOCATABLE, DIMENSION(:, :) :: zmat, czmat, bzmat
1518  TYPE(mp_comm_type) :: pcol_group
1519 
1520  ar_data => get_data_z(arnoldi_data)
1521  control => get_control(arnoldi_data)
1522  control%converged = .false.
1523  pcol_group = control%pcol_group
1524 
1525  ! create the vectors required during the iterations
1526  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
1527  ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
1528  v_vec = cmplx(0.0, 0.0, real_8); w_vec = cmplx(0.0, 0.0, real_8)
1529  ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
1530  ALLOCATE (zmat(nrow_local, control%max_iter)); ALLOCATE (czmat(nrow_local, control%max_iter))
1531  ALLOCATE (bzmat(nrow_local, control%max_iter))
1532 
1533  CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, ar_data%x_vec, nrow_local, control%local_comp)
1534  CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &
1535  cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)
1536  CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, bzmat(:, 1), nrow_local, control%local_comp)
1537 
1538  norm = cmplx(0.0, 0.0, real_8)
1539  norm = dot_product(ar_data%x_vec, bzmat(:, 1))
1540  CALL pcol_group%sum(norm)
1541  IF (control%local_comp) THEN
1542  zmat(:, 1) = ar_data%x_vec/sqrt(norm); bzmat(:, 1) = bzmat(:, 1)/sqrt(norm)
1543  END IF
1544 
1545  DO j = 1, control%max_iter
1546  control%current_step = j
1547  CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, zmat(:, j), nrow_local, control%local_comp)
1548  CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &
1549  cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)
1550  CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, czmat(:, j), nrow_local, control%local_comp)
1551  w_vec(:) = czmat(:, j)
1552 
1553  ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
1554  CALL gram_schmidt_ortho_z (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j, &
1555  bzmat, zmat, control%local_comp, control%pcol_group)
1556 
1557  ! A bit more expensive but simpliy always top up with a DGKS correction, otherwise numerics
1558  ! can becom a problem later on, there is probably a good check, but we don't perform it
1559  CALL dgks_ortho_z (h_vec, ar_data%f_vec, s_vec, nrow_local, j, bzmat, &
1560  zmat, control%local_comp, control%pcol_group)
1561 
1562  CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
1563  CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0, real_8), &
1564  cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)
1565  CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, v_vec, nrow_local, control%local_comp)
1566  norm = cmplx(0.0, 0.0, real_8)
1567  norm = dot_product(ar_data%f_vec, v_vec)
1568  CALL pcol_group%sum(norm)
1569 
1570  IF (control%myproc == 0) control%converged = real(norm, real_8) .LT. epsilon(real(1.0, real_8))
1571  CALL control%mp_group%bcast(control%converged, 0)
1572  IF (control%converged) EXIT
1573  IF (j == control%max_iter - 1) EXIT
1574 
1575  IF (control%local_comp) THEN
1576  zmat(:, j + 1) = ar_data%f_vec/sqrt(norm); bzmat(:, j + 1) = v_vec(:)/sqrt(norm)
1577  END IF
1578  END DO
1579 
1580 ! getting a bit more complicated here as the final matrix is again a product which has to be computed with the
1581 ! distributed vectors, therefore a sum along the first proc_col is necessary. As we want that matrix everywhere,
1582 ! we set it to zero before and compute the distributed product only on the first col and then sum over the full grid
1583  ar_data%Hessenberg = cmplx(0.0, 0.0, real_8)
1584  IF (control%local_comp) THEN
1585  ar_data%Hessenberg(1:control%current_step, 1:control%current_step) = &
1586  matmul(transpose(czmat(:, 1:control%current_step)), zmat(:, 1:control%current_step))
1587  END IF
1588  CALL control%mp_group%sum(ar_data%Hessenberg)
1589 
1590  ar_data%local_history = zmat
1591  ! broadcast the Hessenberg matrix so we don't need to care later on
1592 
1593  DEALLOCATE (v_vec); DEALLOCATE (w_vec); DEALLOCATE (s_vec); DEALLOCATE (h_vec); DEALLOCATE (czmat);
1594  DEALLOCATE (zmat); DEALLOCATE (bzmat)
1595 
1596  END SUBROUTINE gev_build_subspace_z
1597 
1598 ! **************************************************************************************************
1599 !> \brief Updates all data after an inner loop of the generalized ev arnoldi.
1600 !> Updates rho and C=A-rho*B accordingly.
1601 !> As an update scheme is used for he ev, the output ev has to be replaced
1602 !> with the updated one.
1603 !> Furthermore a convergence check is performed. The mv product could
1604 !> be skiiped by making clever use of the precomputed data, However,
1605 !> it is most likely not worth the effort.
1606 !> \param matrix ...
1607 !> \param matrix_arnoldi ...
1608 !> \param vectors ...
1609 !> \param arnoldi_data ...
1610 ! **************************************************************************************************
1611  SUBROUTINE gev_update_data_z (matrix, matrix_arnoldi, vectors, arnoldi_data)
1612  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
1613  TYPE(dbcsr_p_type), DIMENSION(:) :: matrix_arnoldi
1614  TYPE(m_x_v_vectors_type) :: vectors
1615  TYPE(arnoldi_data_type) :: arnoldi_data
1616 
1617  TYPE(arnoldi_control_type), POINTER :: control
1618  TYPE(arnoldi_data_z_type), POINTER :: ar_data
1619  INTEGER :: ncol_local, nrow_local, ind, i
1620  COMPLEX(kind=real_8), ALLOCATABLE, DIMENSION(:) :: v_vec
1621  REAL(real_8) :: rnorm
1622  COMPLEX(kind=real_8) :: norm
1623  COMPLEX(real_8) :: val
1624 
1625  control => get_control(arnoldi_data)
1626 
1627  ar_data => get_data_z(arnoldi_data)
1628 
1629 ! compute the new shift, hack around the problem templating the conversion
1630  val = ar_data%evals(control%selected_ind(1))
1631  ar_data%rho_scale = ar_data%rho_scale + val
1632 ! compute the new eigenvector / initial guess for the next arnoldi loop
1633  ar_data%x_vec = cmplx(0.0, 0.0, real_8)
1634  DO i = 1, control%current_step
1635  val = ar_data%revec(i, control%selected_ind(1))
1636  ar_data%x_vec(:) = ar_data%x_vec(:) + ar_data%local_history(:, i)*val
1637  END DO
1638 ! ar_data%x_vec(:)=MATMUL(ar_data%local_history(:,1:control%current_step),&
1639 ! ar_data%revec(1:control%current_step,control%selected_ind(1)))
1640 
1641 ! update the C-matrix (A-rho*B), if the maximum value is requested we have to use -A-rho*B
1642  CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
1643  CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, cmplx(1.0, 0.0, real_8), -ar_data%rho_scale)
1644 
1645 ! compute convergence and set the correct eigenvalue and eigenvector
1646  CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
1647  IF (ncol_local > 0) THEN
1648  ALLOCATE (v_vec(nrow_local))
1649  CALL compute_norms_z (ar_data%x_vec, norm, rnorm, control%pcol_group)
1650  v_vec(:) = ar_data%x_vec(:)/rnorm
1651  END IF
1652  CALL transfer_local_array_to_dbcsr_z (vectors%input_vec, v_vec, nrow_local, control%local_comp)
1653  CALL dbcsr_matrix_colvec_multiply(matrix_arnoldi(1)%matrix, vectors%input_vec, vectors%result_vec, cmplx(1.0, 0.0,&
1654  & real_8), &
1655  cmplx(0.0, 0.0, real_8), vectors%rep_row_vec, vectors%rep_col_vec)
1656  CALL transfer_dbcsr_to_local_array_z (vectors%result_vec, v_vec, nrow_local, control%local_comp)
1657  IF (ncol_local > 0) THEN
1658  CALL compute_norms_z (v_vec, norm, rnorm, control%pcol_group)
1659  ! check convergence
1660  control%converged = rnorm .LT. control%threshold
1661  DEALLOCATE (v_vec)
1662  END IF
1663  ! and broadcast the real eigenvalue
1664  CALL control%mp_group%bcast(control%converged, 0)
1665  ind = control%selected_ind(1)
1666  CALL control%mp_group%bcast(ar_data%rho_scale, 0)
1667 
1668 ! Again the maximum value request is done on -A therefore the eigenvalue needs the opposite sign
1669  ar_data%evals(ind) = ar_data%rho_scale
1670 
1671  END SUBROUTINE gev_update_data_z
1672 
1673 END MODULE arnoldi_methods
provides a unified interface to lapack geev routines
Definition: arnoldi_geev.F:15
methods for arnoldi iteration
subroutine, public arnoldi_init(matrix, vectors, arnoldi_data)
Interface for the initialization of the arnoldi subspace creation currently it can only setup a rando...
subroutine, public gev_update_data(matrix, matrix_arnoldi, vectors, arnoldi_data)
in the generalized eigenvalue the matrix depende on the projection therefore the outer loop has to bu...
subroutine, public gev_build_subspace(matrix, vectors, arnoldi_data)
here the iterations are performed and the krylov space for the generalized eigenvalue problem is crea...
subroutine, public compute_evals(arnoldi_data)
Interface to compute the eigenvalues of a nonsymmetric matrix This is only the serial version.
subroutine, public gev_arnoldi_init(matrix, matrix_arnoldi, vectors, arnoldi_data)
Interface for the initialization of the arnoldi subspace creation for the generalized eigenvalue prob...
subroutine, public build_subspace(matrix, vectors, arnoldi_data)
here the iterations are performed and the krylov space is constructed
subroutine, public arnoldi_iram(arnoldi_data)
Interface for the routine calcualting the implicit restarts Currently all based on lapack.
collection of types used in arnoldi
Definition: arnoldi_types.F:15
type(arnoldi_data_c_type) function, pointer, public get_data_c(ar_data)
...
type(arnoldi_data_s_type) function, pointer, public get_data_s(ar_data)
...
type(arnoldi_data_z_type) function, pointer, public get_data_z(ar_data)
...
logical function, public has_d_real(ar_data)
...
type(arnoldi_data_d_type) function, pointer, public get_data_d(ar_data)
...
type(arnoldi_control_type) function, pointer, public get_control(ar_data)
...
elemental logical function, public has_d_cmplx(ar_data)
...
operations for skinny matrices/vectors expressed in dbcsr form
Definition: dbcsr_vector.F:15
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public real_8
Definition: kinds.F:41
Interface to the message passing library MPI.