(git:ccc2433)
qs_scf_lanczos.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief module that contains the algorithms to perform an itrative
10 !> diagonalization by the block-Lanczos approach
11 !> \par History
12 !> 05.2009 created [MI]
13 !> \author fawzi
14 ! **************************************************************************************************
16 
26  cp_fm_struct_type
27  USE cp_fm_types, ONLY: cp_fm_create,&
29  cp_fm_release,&
32  cp_fm_to_fm,&
33  cp_fm_type,&
35  USE cp_log_handling, ONLY: cp_to_string
36  USE kinds, ONLY: dp
37  USE parallel_gemm_api, ONLY: parallel_gemm
38  USE qs_mo_types, ONLY: get_mo_set,&
39  mo_set_type
40  USE qs_scf_types, ONLY: krylov_space_type
41  USE scf_control_types, ONLY: scf_control_type
42 #include "./base/base_uses.f90"
43 
44  IMPLICIT NONE
45  PRIVATE
46  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_scf_lanczos'
47 
49 
50 CONTAINS
51 
52 ! **************************************************************************************************
53 
54 ! **************************************************************************************************
55 !> \brief allocates matrices and vectros used in the construction of
56 !> the krylov space and for the lanczos refinement
57 !> \param krylov_space ...
58 !> \param scf_control ...
59 !> \param mos ...
60 !> \param
61 !> \par History
62 !> 05.2009 created [MI]
63 ! **************************************************************************************************
64 
65  SUBROUTINE krylov_space_allocate(krylov_space, scf_control, mos)
66 
67  TYPE(krylov_space_type), INTENT(INOUT) :: krylov_space
68  TYPE(scf_control_type), POINTER :: scf_control
69  TYPE(mo_set_type), DIMENSION(:), INTENT(IN) :: mos
70 
71  CHARACTER(LEN=*), PARAMETER :: routinen = 'krylov_space_allocate'
72 
73  INTEGER :: handle, ik, ispin, max_nmo, nao, nblock, &
74  ndim, nk, nmo, nspin
75  TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
76  TYPE(cp_fm_type), POINTER :: mo_coeff
77 
78  CALL timeset(routinen, handle)
79 
80  IF (.NOT. ASSOCIATED(krylov_space%mo_conv)) THEN
81  NULLIFY (fm_struct_tmp, mo_coeff)
82 
83  krylov_space%nkrylov = scf_control%diagonalization%nkrylov
84  krylov_space%nblock = scf_control%diagonalization%nblock_krylov
85 
86  nk = krylov_space%nkrylov
87  nblock = krylov_space%nblock
88  nspin = SIZE(mos, 1)
89 
90  ALLOCATE (krylov_space%mo_conv(nspin))
91  ALLOCATE (krylov_space%mo_refine(nspin))
92  ALLOCATE (krylov_space%chc_mat(nspin))
93  ALLOCATE (krylov_space%c_vec(nspin))
94  max_nmo = 0
95  DO ispin = 1, nspin
96  CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff, nao=nao, nmo=nmo)
97  CALL cp_fm_create(krylov_space%mo_conv(ispin), mo_coeff%matrix_struct)
98  CALL cp_fm_create(krylov_space%mo_refine(ispin), mo_coeff%matrix_struct)
99  NULLIFY (fm_struct_tmp)
100  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmo, ncol_global=nmo, &
101  para_env=mo_coeff%matrix_struct%para_env, &
102  context=mo_coeff%matrix_struct%context)
103  CALL cp_fm_create(krylov_space%chc_mat(ispin), fm_struct_tmp, "chc")
104  CALL cp_fm_create(krylov_space%c_vec(ispin), fm_struct_tmp, "vec")
105  CALL cp_fm_struct_release(fm_struct_tmp)
106  max_nmo = max(max_nmo, nmo)
107  END DO
108 
109  !the use of max_nmo might not be ok, in this case allocate nspin matrices
110  ALLOCATE (krylov_space%c_eval(max_nmo))
111 
112  ALLOCATE (krylov_space%v_mat(nk))
113 
114  NULLIFY (fm_struct_tmp)
115  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nblock, &
116  para_env=mo_coeff%matrix_struct%para_env, &
117  context=mo_coeff%matrix_struct%context)
118  DO ik = 1, nk
119  CALL cp_fm_create(krylov_space%v_mat(ik), matrix_struct=fm_struct_tmp, &
120  name="v_mat_"//trim(adjustl(cp_to_string(ik))))
121  END DO
122  ALLOCATE (krylov_space%tmp_mat)
123  CALL cp_fm_create(krylov_space%tmp_mat, matrix_struct=fm_struct_tmp, &
124  name="tmp_mat")
125  CALL cp_fm_struct_release(fm_struct_tmp)
126 
127  ! NOTE: the following matrices are small and could be defined
128 ! as standard array rather than istributed fm
129  NULLIFY (fm_struct_tmp)
130  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nblock, ncol_global=nblock, &
131  para_env=mo_coeff%matrix_struct%para_env, &
132  context=mo_coeff%matrix_struct%context)
133  ALLOCATE (krylov_space%block1_mat)
134  CALL cp_fm_create(krylov_space%block1_mat, matrix_struct=fm_struct_tmp, &
135  name="a_mat_"//trim(adjustl(cp_to_string(ik))))
136  ALLOCATE (krylov_space%block2_mat)
137  CALL cp_fm_create(krylov_space%block2_mat, matrix_struct=fm_struct_tmp, &
138  name="b_mat_"//trim(adjustl(cp_to_string(ik))))
139  ALLOCATE (krylov_space%block3_mat)
140  CALL cp_fm_create(krylov_space%block3_mat, matrix_struct=fm_struct_tmp, &
141  name="b2_mat_"//trim(adjustl(cp_to_string(ik))))
142  CALL cp_fm_struct_release(fm_struct_tmp)
143 
144  ndim = nblock*nk
145  NULLIFY (fm_struct_tmp)
146  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
147  para_env=mo_coeff%matrix_struct%para_env, &
148  context=mo_coeff%matrix_struct%context)
149  ALLOCATE (krylov_space%block4_mat)
150  CALL cp_fm_create(krylov_space%block4_mat, matrix_struct=fm_struct_tmp, &
151  name="t_mat")
152  ALLOCATE (krylov_space%block5_mat)
153  CALL cp_fm_create(krylov_space%block5_mat, matrix_struct=fm_struct_tmp, &
154  name="t_vec")
155  CALL cp_fm_struct_release(fm_struct_tmp)
156  ALLOCATE (krylov_space%t_eval(ndim))
157 
158  ELSE
159  !Nothing should be done
160  END IF
161 
162  CALL timestop(handle)
163 
164  END SUBROUTINE krylov_space_allocate
165 
166 ! **************************************************************************************************
167 !> \brief lanczos refinement by blocks of not-converged MOs
168 !> \param krylov_space ...
169 !> \param ks ...
170 !> \param c0 ...
171 !> \param c1 ...
172 !> \param eval ...
173 !> \param nao ...
174 !> \param eps_iter ...
175 !> \param ispin ...
176 !> \param check_moconv_only ...
177 !> \param
178 !> \par History
179 !> 05.2009 created [MI]
180 ! **************************************************************************************************
181 
182  SUBROUTINE lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, &
183  eps_iter, ispin, check_moconv_only)
184 
185  TYPE(krylov_space_type), POINTER :: krylov_space
186  TYPE(cp_fm_type), INTENT(IN) :: ks, c0, c1
187  REAL(dp), DIMENSION(:), POINTER :: eval
188  INTEGER, INTENT(IN) :: nao
189  REAL(dp), INTENT(IN) :: eps_iter
190  INTEGER, INTENT(IN) :: ispin
191  LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only
192 
193  CHARACTER(LEN=*), PARAMETER :: routinen = 'lanczos_refinement'
194  REAL(kind=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
195  rzero = 0.0_dp
196 
197  INTEGER :: hand1, hand2, hand3, hand4, hand5, handle, ib, ik, imo, imo_low, imo_up, it, jt, &
198  nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, num_blocks
199  INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken
200  LOGICAL :: my_check_moconv_only
201  REAL(dp) :: max_norm, min_norm, vmax
202  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: q_mat, tblock, tvblock
203  REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval
204  TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
205  TYPE(cp_fm_type) :: c2_tmp, c3_tmp, c_tmp, hc
206  TYPE(cp_fm_type), DIMENSION(:), POINTER :: v_mat
207  TYPE(cp_fm_type), POINTER :: a_mat, b2_mat, b_mat, chc, evec, t_mat, &
208  t_vec
209 
210  CALL timeset(routinen, handle)
211 
212  NULLIFY (fm_struct_tmp)
213  NULLIFY (chc, evec)
214  NULLIFY (c_res, t_eval)
215  NULLIFY (t_mat, t_vec)
216  NULLIFY (a_mat, b_mat, b2_mat, v_mat)
217 
218  nmo = SIZE(eval, 1)
219  my_check_moconv_only = .false.
220  IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
221 
222  chc => krylov_space%chc_mat(ispin)
223  evec => krylov_space%c_vec(ispin)
224  c_res => krylov_space%c_eval
225  t_eval => krylov_space%t_eval
226 
227  NULLIFY (fm_struct_tmp)
228  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
229  para_env=c0%matrix_struct%para_env, &
230  context=c0%matrix_struct%context)
231  CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
232  name="c_tmp")
233  CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
234  name="hc")
235  CALL cp_fm_struct_release(fm_struct_tmp)
236 
237  !Compute (C^t)HC
238  CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
239  CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
240 
241  !Diagonalize (C^t)HC
242  CALL timeset(routinen//"diag_chc", hand1)
243  CALL choose_eigv_solver(chc, evec, eval)
244  CALL timestop(hand1)
245 
246  !Rotate the C vectors
247  CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
248 
249  !Check for converged states
250  CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
251  CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
252  CALL cp_fm_column_scale(c1, eval)
253  CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
254  CALL cp_fm_vectorsnorm(c_tmp, c_res)
255 
256  nmo_converged = 0
257  nmo_nc = 0
258  max_norm = 0.0_dp
259  min_norm = 1.e10_dp
260  CALL cp_fm_set_all(c1, rzero)
261  DO imo = 1, nmo
262  max_norm = max(max_norm, c_res(imo))
263  min_norm = min(min_norm, c_res(imo))
264  END DO
265  DO imo = 1, nmo
266  IF (c_res(imo) <= eps_iter) THEN
267  nmo_converged = nmo_converged + 1
268  ELSE
269  nmo_nc = nmo - nmo_converged
270  EXIT
271  END IF
272  END DO
273 
274  nblock = krylov_space%nblock
275  num_blocks = nmo_nc/nblock
276 
277  krylov_space%nmo_nc = nmo_nc
278  krylov_space%nmo_conv = nmo_converged
279  krylov_space%max_res_norm = max_norm
280  krylov_space%min_res_norm = min_norm
281 
282  IF (my_check_moconv_only) THEN
283  CALL cp_fm_release(c_tmp)
284  CALL cp_fm_release(hc)
285  CALL timestop(handle)
286  RETURN
287  ELSE IF (krylov_space%nmo_nc > 0) THEN
288 
289  CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
290 
291  nblock = krylov_space%nblock
292  IF (modulo(nmo_nc, nblock) > 0.0_dp) THEN
293  num_blocks = nmo_nc/nblock + 1
294  ELSE
295  num_blocks = nmo_nc/nblock
296  END IF
297 
298  DO ib = 1, num_blocks
299 
300  imo_low = (ib - 1)*nblock + 1
301  imo_up = min(ib*nblock, nmo_nc)
302  nmob = imo_up - imo_low + 1
303  ndim = krylov_space%nkrylov*nmob
304 
305  NULLIFY (fm_struct_tmp)
306  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
307  para_env=c0%matrix_struct%para_env, &
308  context=c0%matrix_struct%context)
309  CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
310  name="c2_tmp")
311  CALL cp_fm_struct_release(fm_struct_tmp)
312  NULLIFY (fm_struct_tmp)
313  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=ndim, &
314  para_env=c0%matrix_struct%para_env, &
315  context=c0%matrix_struct%context)
316  CALL cp_fm_create(c3_tmp, matrix_struct=fm_struct_tmp, &
317  name="c3_tmp")
318  CALL cp_fm_struct_release(fm_struct_tmp)
319 
320  ! Create local matrix of right size
321  IF (nmob /= nblock) THEN
322  NULLIFY (a_mat, b_mat, b2_mat, t_mat, t_vec, v_mat)
323  ALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
324  NULLIFY (fm_struct_tmp)
325  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
326  para_env=chc%matrix_struct%para_env, &
327  context=chc%matrix_struct%context)
328  CALL cp_fm_create(a_mat, matrix_struct=fm_struct_tmp, &
329  name="a_mat")
330  CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
331  name="b_mat")
332  CALL cp_fm_create(b2_mat, matrix_struct=fm_struct_tmp, &
333  name="b2_mat")
334  CALL cp_fm_struct_release(fm_struct_tmp)
335  NULLIFY (fm_struct_tmp)
336  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
337  para_env=chc%matrix_struct%para_env, &
338  context=chc%matrix_struct%context)
339  CALL cp_fm_create(t_mat, matrix_struct=fm_struct_tmp, &
340  name="t_mat")
341  CALL cp_fm_create(t_vec, matrix_struct=fm_struct_tmp, &
342  name="t_vec")
343  CALL cp_fm_struct_release(fm_struct_tmp)
344  NULLIFY (fm_struct_tmp)
345  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
346  para_env=c0%matrix_struct%para_env, &
347  context=c0%matrix_struct%context)
348  ALLOCATE (v_mat(krylov_space%nkrylov))
349  DO ik = 1, krylov_space%nkrylov
350  CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
351  name="v_mat")
352  END DO
353  CALL cp_fm_struct_release(fm_struct_tmp)
354  ELSE
355  a_mat => krylov_space%block1_mat
356  b_mat => krylov_space%block2_mat
357  b2_mat => krylov_space%block3_mat
358  t_mat => krylov_space%block4_mat
359  t_vec => krylov_space%block5_mat
360  v_mat => krylov_space%v_mat
361  END IF
362 
363  ALLOCATE (tblock(nmob, nmob))
364  ALLOCATE (tvblock(nmob, ndim))
365 
366  CALL timeset(routinen//"_kry_loop", hand2)
367  CALL cp_fm_set_all(b_mat, rzero)
368  CALL cp_fm_set_all(t_mat, rzero)
369  CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
370 
371  !Compute A =(V^t)HV
372  CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
373  CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(1), hc, &
374  rzero, a_mat)
375 
376  !Compute the residual matrix R for next
377  !factorisation
378  CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(1), a_mat, &
379  rzero, c_tmp)
380  CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
381 
382  ! Build the block tridiagonal matrix
383  CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
384  CALL cp_fm_set_submatrix(t_mat, tblock, 1, 1, nmob, nmob)
385 
386  DO ik = 2, krylov_space%nkrylov
387 
388  ! Call lapack for QR factorization
389  CALL cp_fm_set_all(b_mat, rzero)
390  CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
391  CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
392 
393  CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.true., &
394  n_rows=nao, n_cols=nmob)
395 
396  !Compute A =(V^t)HV
397  CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
398  CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(ik), hc, rzero, a_mat)
399 
400  !Compute the !residual matrix R !for next !factorisation
401  CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(ik), a_mat, &
402  rzero, c_tmp)
403  CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
404  CALL cp_fm_to_fm(v_mat(ik - 1), hc, nmob, 1, 1)
405  CALL cp_fm_triangular_multiply(b_mat, hc, side='R', transpose_tr=.true., &
406  n_rows=nao, n_cols=nmob, alpha=rmone)
407  CALL cp_fm_scale_and_add(rone, c_tmp, rone, hc)
408 
409  ! Build the block tridiagonal matrix
410  it = (ik - 2)*nmob + 1
411  jt = (ik - 1)*nmob + 1
412 
413  CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
414  CALL cp_fm_set_submatrix(t_mat, tblock, jt, jt, nmob, nmob)
415  CALL cp_fm_transpose(b_mat, a_mat)
416  CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
417  CALL cp_fm_set_submatrix(t_mat, tblock, it, jt, nmob, nmob)
418 
419  END DO ! ik
420  CALL timestop(hand2)
421 
422  DEALLOCATE (tblock)
423 
424  CALL timeset(routinen//"_diag_tri", hand3)
425 
426  CALL choose_eigv_solver(t_mat, t_vec, t_eval)
427  ! Diagonalize the block-tridiagonal matrix
428  CALL timestop(hand3)
429 
430  CALL timeset(routinen//"_build_cnew", hand4)
431 ! !Compute the refined vectors
432  CALL cp_fm_set_all(c2_tmp, rzero)
433  DO ik = 1, krylov_space%nkrylov
434  jt = (ik - 1)*nmob
435  CALL parallel_gemm('N', 'N', nao, ndim, nmob, rone, v_mat(ik), t_vec, rone, c2_tmp, &
436  b_first_row=(jt + 1))
437  END DO
438  DEALLOCATE (tvblock)
439 
440  CALL cp_fm_set_all(c3_tmp, rzero)
441  CALL parallel_gemm('T', 'N', nmob, ndim, nao, rone, v_mat(1), c2_tmp, rzero, c3_tmp)
442 
443  !Try to avoid linear dependencies
444  ALLOCATE (q_mat(nmob, ndim))
445  !get max
446  CALL cp_fm_get_submatrix(c3_tmp, q_mat, 1, 1, nmob, ndim)
447 
448  ALLOCATE (itaken(ndim))
449  itaken = 0
450  DO it = 1, nmob
451  vmax = 0.0_dp
452  !select index ik
453  DO jt = 1, ndim
454  IF (itaken(jt) == 0 .AND. abs(q_mat(it, jt)) > vmax) THEN
455  vmax = abs(q_mat(it, jt))
456  ik = jt
457  END IF
458  END DO
459  itaken(ik) = 1
460 
461  CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
462  END DO
463  DEALLOCATE (itaken)
464  DEALLOCATE (q_mat)
465 
466  !Copy in the converged set to enlarge the converged subspace
467  CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
468  CALL timestop(hand4)
469 
470  IF (nmob < nblock) THEN
471  CALL cp_fm_release(a_mat)
472  CALL cp_fm_release(b_mat)
473  CALL cp_fm_release(b2_mat)
474  CALL cp_fm_release(t_mat)
475  CALL cp_fm_release(t_vec)
476  DEALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
477  CALL cp_fm_release(v_mat)
478  END IF
479  CALL cp_fm_release(c2_tmp)
480  CALL cp_fm_release(c3_tmp)
481  END DO ! ib
482 
483  CALL timeset(routinen//"_ortho", hand5)
484  CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
485 
486  CALL cp_fm_cholesky_decompose(chc, nmo)
487  CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.true.)
488  CALL timestop(hand5)
489 
490  CALL cp_fm_release(c_tmp)
491  CALL cp_fm_release(hc)
492  ELSE
493  CALL cp_fm_release(c_tmp)
494  CALL cp_fm_release(hc)
495  CALL timestop(handle)
496  RETURN
497  END IF
498 
499  CALL timestop(handle)
500 
501  END SUBROUTINE lanczos_refinement
502 
503 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
504 
505 ! **************************************************************************************************
506 !> \brief ...
507 !> \param krylov_space ...
508 !> \param ks ...
509 !> \param c0 ...
510 !> \param c1 ...
511 !> \param eval ...
512 !> \param nao ...
513 !> \param eps_iter ...
514 !> \param ispin ...
515 !> \param check_moconv_only ...
516 ! **************************************************************************************************
517  SUBROUTINE lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, &
518  eps_iter, ispin, check_moconv_only)
519 
520  TYPE(krylov_space_type), POINTER :: krylov_space
521  TYPE(cp_fm_type), INTENT(IN) :: ks, c0, c1
522  REAL(dp), DIMENSION(:), POINTER :: eval
523  INTEGER, INTENT(IN) :: nao
524  REAL(dp), INTENT(IN) :: eps_iter
525  INTEGER, INTENT(IN) :: ispin
526  LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only
527 
528  CHARACTER(LEN=*), PARAMETER :: routinen = 'lanczos_refinement_2v'
529  REAL(kind=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
530  rzero = 0.0_dp
531 
532  INTEGER :: hand1, hand2, hand3, hand4, hand5, hand6, handle, i, ia, ib, ik, imo, imo_low, &
533  imo_up, info, it, j, jt, liwork, lwork, nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, &
534  num_blocks
535  INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken
536  INTEGER, DIMENSION(:), POINTER :: iwork
537  LOGICAL :: my_check_moconv_only
538  REAL(dp) :: max_norm, min_norm, vmax
539  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: a_block, b_block, q_mat, t_mat
540  REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval
541  REAL(kind=dp), DIMENSION(:), POINTER :: work
542  REAL(kind=dp), DIMENSION(:, :), POINTER :: a_loc, b_loc
543  TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
544  TYPE(cp_fm_type) :: b_mat, c2_tmp, c_tmp, hc, v_tmp
545  TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: v_mat
546  TYPE(cp_fm_type), POINTER :: chc, evec
547 
548  CALL timeset(routinen, handle)
549 
550  NULLIFY (fm_struct_tmp)
551  NULLIFY (chc, evec)
552  NULLIFY (c_res, t_eval)
553  NULLIFY (b_loc, a_loc)
554 
555  nmo = SIZE(eval, 1)
556  my_check_moconv_only = .false.
557  IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
558 
559  chc => krylov_space%chc_mat(ispin)
560  evec => krylov_space%c_vec(ispin)
561  c_res => krylov_space%c_eval
562  t_eval => krylov_space%t_eval
563 
564  NULLIFY (fm_struct_tmp)
565  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
566  para_env=c0%matrix_struct%para_env, &
567  context=c0%matrix_struct%context)
568  CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
569  name="c_tmp")
570  CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
571  name="hc")
572  CALL cp_fm_struct_release(fm_struct_tmp)
573 
574  !Compute (C^t)HC
575  CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
576  CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
577 
578  !Diagonalize (C^t)HC
579  CALL timeset(routinen//"diag_chc", hand1)
580  CALL choose_eigv_solver(chc, evec, eval)
581 
582  CALL timestop(hand1)
583 
584  CALL timeset(routinen//"check_conv", hand6)
585  !Rotate the C vectors
586  CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
587 
588  !Check for converged states
589  CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
590  CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
591  CALL cp_fm_column_scale(c1, eval)
592  CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
593  CALL cp_fm_vectorsnorm(c_tmp, c_res)
594 
595  nmo_converged = 0
596  nmo_nc = 0
597  max_norm = 0.0_dp
598  min_norm = 1.e10_dp
599  CALL cp_fm_set_all(c1, rzero)
600  DO imo = 1, nmo
601  max_norm = max(max_norm, c_res(imo))
602  min_norm = min(min_norm, c_res(imo))
603  END DO
604  DO imo = 1, nmo
605  IF (c_res(imo) <= eps_iter) THEN
606  nmo_converged = nmo_converged + 1
607  ELSE
608  nmo_nc = nmo - nmo_converged
609  EXIT
610  END IF
611  END DO
612  CALL timestop(hand6)
613 
614  CALL cp_fm_release(c_tmp)
615  CALL cp_fm_release(hc)
616 
617  krylov_space%nmo_nc = nmo_nc
618  krylov_space%nmo_conv = nmo_converged
619  krylov_space%max_res_norm = max_norm
620  krylov_space%min_res_norm = min_norm
621 
622  IF (my_check_moconv_only) THEN
623  ! Do nothing
624  ELSE IF (krylov_space%nmo_nc > 0) THEN
625 
626  CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
627 
628  nblock = krylov_space%nblock
629  IF (modulo(nmo_nc, nblock) > 0.0_dp) THEN
630  num_blocks = nmo_nc/nblock + 1
631  ELSE
632  num_blocks = nmo_nc/nblock
633  END IF
634 
635  DO ib = 1, num_blocks
636 
637  imo_low = (ib - 1)*nblock + 1
638  imo_up = min(ib*nblock, nmo_nc)
639  nmob = imo_up - imo_low + 1
640  ndim = krylov_space%nkrylov*nmob
641 
642  ! Allocation
643  CALL timeset(routinen//"alloc", hand6)
644  ALLOCATE (a_block(nmob, nmob))
645  ALLOCATE (b_block(nmob, nmob))
646  ALLOCATE (t_mat(ndim, ndim))
647 
648  NULLIFY (fm_struct_tmp)
649  ! by forcing ncol_block=nmo, the needed part of the matrix is distributed on a subset of processes
650  ! this is due to the use of two-dimensional grids of processes
651  ! nrow_global is distributed over num_pe(1)
652  ! a local_data array is anyway allocated for the processes non included
653  ! this should have a minimum size
654  ! with ncol_local=1.
655  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
656  ncol_block=nmob, &
657  para_env=c0%matrix_struct%para_env, &
658  context=c0%matrix_struct%context, &
659  force_block=.true.)
660  CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
661  name="c_tmp")
662  CALL cp_fm_set_all(c_tmp, rzero)
663  CALL cp_fm_create(v_tmp, matrix_struct=fm_struct_tmp, &
664  name="v_tmp")
665  CALL cp_fm_set_all(v_tmp, rzero)
666  CALL cp_fm_struct_release(fm_struct_tmp)
667  NULLIFY (fm_struct_tmp)
668  ALLOCATE (v_mat(krylov_space%nkrylov))
669  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
670  ncol_block=nmob, &
671  para_env=c0%matrix_struct%para_env, &
672  context=c0%matrix_struct%context, &
673  force_block=.true.)
674  DO ik = 1, krylov_space%nkrylov
675  CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
676  name="v_mat")
677  END DO
678  CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
679  name="hc")
680  CALL cp_fm_struct_release(fm_struct_tmp)
681  NULLIFY (fm_struct_tmp)
682  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
683  ncol_block=ndim, &
684  para_env=c0%matrix_struct%para_env, &
685  context=c0%matrix_struct%context, &
686  force_block=.true.)
687  CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
688  name="c2_tmp")
689  CALL cp_fm_struct_release(fm_struct_tmp)
690 
691  NULLIFY (fm_struct_tmp)
692  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
693  para_env=c0%matrix_struct%para_env, &
694  context=c0%matrix_struct%context)
695  CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
696  name="b_mat")
697  CALL cp_fm_struct_release(fm_struct_tmp)
698  CALL timestop(hand6)
699  !End allocation
700 
701  CALL cp_fm_set_all(b_mat, rzero)
702  CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
703 
704  ! Here starts the construction of krylov space
705  CALL timeset(routinen//"_kry_loop", hand2)
706  !Compute A =(V^t)HV
707  CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
708 
709  a_block = 0.0_dp
710  a_loc => v_mat(1)%local_data
711  b_loc => hc%local_data
712 
713  IF (SIZE(hc%local_data, 2) == nmob) THEN
714  ! this is a work around to avoid problems due to the two dimensional grid of processes
715  CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
716  SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
717  END IF
718  CALL hc%matrix_struct%para_env%sum(a_block)
719 
720  !Compute the residual matrix R for next
721  !factorisation
722  c_tmp%local_data = 0.0_dp
723  IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
724  b_loc => c_tmp%local_data
725  CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
726  SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
727  b_loc(1, 1), SIZE(c_tmp%local_data, 1))
728  END IF
729  CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
730 
731  ! Build the block tridiagonal matrix
732  t_mat = 0.0_dp
733  DO i = 1, nmob
734  t_mat(1:nmob, i) = a_block(1:nmob, i)
735  END DO
736 
737  DO ik = 2, krylov_space%nkrylov
738  ! Call lapack for QR factorization
739  CALL cp_fm_set_all(b_mat, rzero)
740  CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
741  CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
742 
743  CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.true., &
744  n_rows=nao, n_cols=nmob)
745 
746  CALL cp_fm_get_submatrix(b_mat, b_block, 1, 1, nmob, nmob)
747 
748  !Compute A =(V^t)HV
749  CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
750 
751  a_block = 0.0_dp
752  IF (SIZE(hc%local_data, 2) == nmob) THEN
753  a_loc => v_mat(ik)%local_data
754  b_loc => hc%local_data
755  CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
756  SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
757  END IF
758  CALL hc%matrix_struct%para_env%sum(a_block)
759 
760  !Compute the residual matrix R for next
761  !factorisation
762  c_tmp%local_data = 0.0_dp
763  IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
764  a_loc => v_mat(ik)%local_data
765  b_loc => c_tmp%local_data
766  CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
767  SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
768  b_loc(1, 1), SIZE(c_tmp%local_data, 1))
769  END IF
770  CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
771 
772  IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
773  a_loc => v_mat(ik - 1)%local_data
774  DO j = 1, nmob
775  DO i = 1, j
776  DO ia = 1, SIZE(c_tmp%local_data, 1)
777  b_loc(ia, i) = b_loc(ia, i) - a_loc(ia, j)*b_block(i, j)
778  END DO
779  END DO
780  END DO
781  END IF
782 
783  ! Build the block tridiagonal matrix
784  it = (ik - 2)*nmob
785  jt = (ik - 1)*nmob
786  DO j = 1, nmob
787  t_mat(jt + 1:jt + nmob, jt + j) = a_block(1:nmob, j)
788  DO i = 1, nmob
789  t_mat(it + i, jt + j) = b_block(j, i)
790  t_mat(jt + j, it + i) = b_block(j, i)
791  END DO
792  END DO
793  END DO ! ik
794  CALL timestop(hand2)
795 
796  CALL timeset(routinen//"_diag_tri", hand3)
797  lwork = 1 + 6*ndim + 2*ndim**2 + 5000
798  liwork = 5*ndim + 3
799  ALLOCATE (work(lwork))
800  ALLOCATE (iwork(liwork))
801 
802  ! Diagonalize the block-tridiagonal matrix
803  CALL dsyevd('V', 'U', ndim, t_mat(1, 1), ndim, t_eval(1), &
804  work(1), lwork, iwork(1), liwork, info)
805  DEALLOCATE (work)
806  DEALLOCATE (iwork)
807  CALL timestop(hand3)
808 
809  CALL timeset(routinen//"_build_cnew", hand4)
810 ! !Compute the refined vectors
811 
812  c2_tmp%local_data = 0.0_dp
813  ALLOCATE (q_mat(nmob, ndim))
814  q_mat = 0.0_dp
815  b_loc => c2_tmp%local_data
816  DO ik = 1, krylov_space%nkrylov
817  CALL cp_fm_to_fm(v_mat(ik), v_tmp, nmob, 1, 1)
818  IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
819 ! a_loc => v_mat(ik)%local_data
820  a_loc => v_tmp%local_data
821  it = (ik - 1)*nmob
822  CALL dgemm('N', 'N', SIZE(c2_tmp%local_data, 1), ndim, nmob, 1.0_dp, a_loc(1, 1), &
823  SIZE(c2_tmp%local_data, 1), t_mat(it + 1, 1), ndim, 1.0_dp, &
824  b_loc(1, 1), SIZE(c2_tmp%local_data, 1))
825  END IF
826  END DO !ik
827 
828  !Try to avoid linear dependencies
829  CALL cp_fm_to_fm(v_mat(1), v_tmp, nmob, 1, 1)
830  IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
831 ! a_loc => v_mat(1)%local_data
832  a_loc => v_tmp%local_data
833  b_loc => c2_tmp%local_data
834  CALL dgemm('T', 'N', nmob, ndim, SIZE(v_tmp%local_data, 1), 1.0_dp, a_loc(1, 1), &
835  SIZE(v_tmp%local_data, 1), b_loc(1, 1), SIZE(v_tmp%local_data, 1), &
836  0.0_dp, q_mat(1, 1), nmob)
837  END IF
838  CALL hc%matrix_struct%para_env%sum(q_mat)
839 
840  ALLOCATE (itaken(ndim))
841  itaken = 0
842  DO it = 1, nmob
843  vmax = 0.0_dp
844  !select index ik
845  DO jt = 1, ndim
846  IF (itaken(jt) == 0 .AND. abs(q_mat(it, jt)) > vmax) THEN
847  vmax = abs(q_mat(it, jt))
848  ik = jt
849  END IF
850  END DO
851  itaken(ik) = 1
852 
853  CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
854  END DO
855  DEALLOCATE (itaken)
856  DEALLOCATE (q_mat)
857 
858  !Copy in the converged set to enlarge the converged subspace
859  CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
860  CALL timestop(hand4)
861 
862  CALL cp_fm_release(c2_tmp)
863  CALL cp_fm_release(c_tmp)
864  CALL cp_fm_release(hc)
865  CALL cp_fm_release(v_tmp)
866  CALL cp_fm_release(b_mat)
867 
868  DEALLOCATE (t_mat)
869  DEALLOCATE (a_block)
870  DEALLOCATE (b_block)
871 
872  CALL cp_fm_release(v_mat)
873 
874  END DO ! ib
875 
876  CALL timeset(routinen//"_ortho", hand5)
877  CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
878 
879  CALL cp_fm_cholesky_decompose(chc, nmo)
880  CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.true.)
881  CALL timestop(hand5)
882  ELSE
883  ! Do nothing
884  END IF
885 
886  CALL timestop(handle)
887  END SUBROUTINE lanczos_refinement_2v
888 
889 END MODULE qs_scf_lanczos
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Definition: grid_common.h:117
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
basic linear algebra operations for full matrices
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col)
performs a QR factorization of the input rectangular matrix A or of a submatrix of A the computed upp...
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition: cp_fm_diag.F:17
subroutine, public choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
Choose the Eigensolver depending on which library is available ELPA seems to be unstable for small sy...
Definition: cp_fm_diag.F:208
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
Definition: cp_fm_struct.F:125
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition: cp_fm_struct.F:320
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
Definition: cp_fm_types.F:768
subroutine, public cp_fm_vectorsnorm(matrix, norm_array)
find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
Definition: cp_fm_types.F:1207
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
Definition: cp_fm_types.F:535
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
Definition: cp_fm_types.F:901
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
various routines to log and control the output. The idea is that decisions about where to log should ...
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
basic linear algebra operations for full matrixes
Definition and initialisation of the mo data type.
Definition: qs_mo_types.F:22
subroutine, public get_mo_set(mo_set, maxocc, homo, lfomo, nao, nelectron, n_el_f, nmo, eigenvalues, occupation_numbers, mo_coeff, mo_coeff_b, uniform_occupation, kTS, mu, flexible_electron_count)
Get the components of a MO set data structure.
Definition: qs_mo_types.F:397
module that contains the algorithms to perform an itrative diagonalization by the block-Lanczos appro...
subroutine, public krylov_space_allocate(krylov_space, scf_control, mos)
allocates matrices and vectros used in the construction of the krylov space and for the lanczos refin...
subroutine, public lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, eps_iter, ispin, check_moconv_only)
lanczos refinement by blocks of not-converged MOs
subroutine, public lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, eps_iter, ispin, check_moconv_only)
...
module that contains the definitions of the scf types
Definition: qs_scf_types.F:14
parameters that control an scf iteration