(git:ab76537)
Loading...
Searching...
No Matches
qs_scf_lanczos.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief module that contains the algorithms to perform an iterative
10!> diagonalization by the block-Lanczos approach
11!> \par History
12!> 05.2009 created [MI]
13!> \author fawzi
14! **************************************************************************************************
16
27 USE cp_fm_types, ONLY: cp_fm_create,&
36 USE kinds, ONLY: dp
38 USE qs_mo_types, ONLY: get_mo_set,&
42#include "./base/base_uses.f90"
43
44 IMPLICIT NONE
45 PRIVATE
46 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_scf_lanczos'
47
49
50CONTAINS
51
52! **************************************************************************************************
53
54! **************************************************************************************************
55!> \brief allocates matrices and vectors used in the construction of
56!> the krylov space and for the lanczos refinement
57!> \param krylov_space ...
58!> \param scf_control ...
59!> \param mos ...
60!> \param
61!> \par History
62!> 05.2009 created [MI]
63! **************************************************************************************************
64
65 SUBROUTINE krylov_space_allocate(krylov_space, scf_control, mos)
66
67 TYPE(krylov_space_type), INTENT(INOUT) :: krylov_space
68 TYPE(scf_control_type), POINTER :: scf_control
69 TYPE(mo_set_type), DIMENSION(:), INTENT(IN) :: mos
70
71 CHARACTER(LEN=*), PARAMETER :: routinen = 'krylov_space_allocate'
72
73 INTEGER :: handle, ik, ispin, max_nmo, nao, nblock, &
74 ndim, nk, nmo, nspin
75 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
76 TYPE(cp_fm_type), POINTER :: mo_coeff
77
78 CALL timeset(routinen, handle)
79
80 IF (.NOT. ASSOCIATED(krylov_space%mo_conv)) THEN
81 NULLIFY (fm_struct_tmp, mo_coeff)
82
83 krylov_space%nkrylov = scf_control%diagonalization%nkrylov
84 krylov_space%nblock = scf_control%diagonalization%nblock_krylov
85
86 nk = krylov_space%nkrylov
87 nblock = krylov_space%nblock
88 nspin = SIZE(mos, 1)
89
90 ALLOCATE (krylov_space%mo_conv(nspin))
91 ALLOCATE (krylov_space%mo_refine(nspin))
92 ALLOCATE (krylov_space%chc_mat(nspin))
93 ALLOCATE (krylov_space%c_vec(nspin))
94 max_nmo = 0
95 DO ispin = 1, nspin
96 CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff, nao=nao, nmo=nmo)
97 CALL cp_fm_create(krylov_space%mo_conv(ispin), mo_coeff%matrix_struct)
98 CALL cp_fm_create(krylov_space%mo_refine(ispin), mo_coeff%matrix_struct)
99 NULLIFY (fm_struct_tmp)
100 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmo, ncol_global=nmo, &
101 para_env=mo_coeff%matrix_struct%para_env, &
102 context=mo_coeff%matrix_struct%context)
103 CALL cp_fm_create(krylov_space%chc_mat(ispin), fm_struct_tmp, "chc")
104 CALL cp_fm_create(krylov_space%c_vec(ispin), fm_struct_tmp, "vec")
105 CALL cp_fm_struct_release(fm_struct_tmp)
106 max_nmo = max(max_nmo, nmo)
107 END DO
108
109 !the use of max_nmo might not be ok, in this case allocate nspin matrices
110 ALLOCATE (krylov_space%c_eval(max_nmo))
111
112 ALLOCATE (krylov_space%v_mat(nk))
113
114 NULLIFY (fm_struct_tmp)
115 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nblock, &
116 para_env=mo_coeff%matrix_struct%para_env, &
117 context=mo_coeff%matrix_struct%context)
118 DO ik = 1, nk
119 CALL cp_fm_create(krylov_space%v_mat(ik), matrix_struct=fm_struct_tmp, &
120 name="v_mat_"//trim(adjustl(cp_to_string(ik))))
121 END DO
122 ALLOCATE (krylov_space%tmp_mat)
123 CALL cp_fm_create(krylov_space%tmp_mat, matrix_struct=fm_struct_tmp, &
124 name="tmp_mat")
125 CALL cp_fm_struct_release(fm_struct_tmp)
126
127 ! NOTE: the following matrices are small and could be defined
128! as standard array rather than distributed fm
129 NULLIFY (fm_struct_tmp)
130 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nblock, ncol_global=nblock, &
131 para_env=mo_coeff%matrix_struct%para_env, &
132 context=mo_coeff%matrix_struct%context)
133 ALLOCATE (krylov_space%block1_mat)
134 CALL cp_fm_create(krylov_space%block1_mat, matrix_struct=fm_struct_tmp, &
135 name="a_mat_"//trim(adjustl(cp_to_string(ik))))
136 ALLOCATE (krylov_space%block2_mat)
137 CALL cp_fm_create(krylov_space%block2_mat, matrix_struct=fm_struct_tmp, &
138 name="b_mat_"//trim(adjustl(cp_to_string(ik))))
139 ALLOCATE (krylov_space%block3_mat)
140 CALL cp_fm_create(krylov_space%block3_mat, matrix_struct=fm_struct_tmp, &
141 name="b2_mat_"//trim(adjustl(cp_to_string(ik))))
142 CALL cp_fm_struct_release(fm_struct_tmp)
143
144 ndim = nblock*nk
145 NULLIFY (fm_struct_tmp)
146 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
147 para_env=mo_coeff%matrix_struct%para_env, &
148 context=mo_coeff%matrix_struct%context)
149 ALLOCATE (krylov_space%block4_mat)
150 CALL cp_fm_create(krylov_space%block4_mat, matrix_struct=fm_struct_tmp, &
151 name="t_mat")
152 ALLOCATE (krylov_space%block5_mat)
153 CALL cp_fm_create(krylov_space%block5_mat, matrix_struct=fm_struct_tmp, &
154 name="t_vec")
155 CALL cp_fm_struct_release(fm_struct_tmp)
156 ALLOCATE (krylov_space%t_eval(ndim))
157
158 ELSE
159 !Nothing should be done
160 END IF
161
162 CALL timestop(handle)
163
164 END SUBROUTINE krylov_space_allocate
165
166! **************************************************************************************************
167!> \brief lanczos refinement by blocks of non-converged MOs
168!> \param krylov_space ...
169!> \param ks ...
170!> \param c0 ...
171!> \param c1 ...
172!> \param eval ...
173!> \param nao ...
174!> \param eps_iter ...
175!> \param ispin ...
176!> \param check_moconv_only ...
177!> \param
178!> \par History
179!> 05.2009 created [MI]
180! **************************************************************************************************
181
182 SUBROUTINE lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, &
183 eps_iter, ispin, check_moconv_only)
184
185 TYPE(krylov_space_type), POINTER :: krylov_space
186 TYPE(cp_fm_type), INTENT(IN) :: ks, c0, c1
187 REAL(dp), DIMENSION(:), POINTER :: eval
188 INTEGER, INTENT(IN) :: nao
189 REAL(dp), INTENT(IN) :: eps_iter
190 INTEGER, INTENT(IN) :: ispin
191 LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only
192
193 CHARACTER(LEN=*), PARAMETER :: routinen = 'lanczos_refinement'
194 REAL(kind=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
195 rzero = 0.0_dp
196
197 INTEGER :: hand1, hand2, hand3, hand4, hand5, handle, ib, ik, imo, imo_low, imo_up, it, jt, &
198 nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, num_blocks
199 INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken
200 LOGICAL :: my_check_moconv_only
201 REAL(dp) :: max_norm, min_norm, vmax
202 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: q_mat, tblock, tvblock
203 REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval
204 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
205 TYPE(cp_fm_type) :: c2_tmp, c3_tmp, c_tmp, hc
206 TYPE(cp_fm_type), DIMENSION(:), POINTER :: v_mat
207 TYPE(cp_fm_type), POINTER :: a_mat, b2_mat, b_mat, chc, evec, t_mat, &
208 t_vec
209
210 CALL timeset(routinen, handle)
211
212 NULLIFY (fm_struct_tmp)
213 NULLIFY (chc, evec)
214 NULLIFY (c_res, t_eval)
215 NULLIFY (t_mat, t_vec)
216 NULLIFY (a_mat, b_mat, b2_mat, v_mat)
217
218 nmo = SIZE(eval, 1)
219 my_check_moconv_only = .false.
220 IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
221
222 chc => krylov_space%chc_mat(ispin)
223 evec => krylov_space%c_vec(ispin)
224 c_res => krylov_space%c_eval
225 t_eval => krylov_space%t_eval
226
227 NULLIFY (fm_struct_tmp)
228 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
229 para_env=c0%matrix_struct%para_env, &
230 context=c0%matrix_struct%context)
231 CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
232 name="c_tmp")
233 CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
234 name="hc")
235 CALL cp_fm_struct_release(fm_struct_tmp)
236
237 !Compute (C^t)HC
238 CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
239 CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
240
241 !Diagonalize (C^t)HC
242 CALL timeset(routinen//"diag_chc", hand1)
243 CALL choose_eigv_solver(chc, evec, eval)
244 CALL timestop(hand1)
245
246 !Rotate the C vectors
247 CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
248
249 !Check for converged states
250 CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
251 CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
252 CALL cp_fm_column_scale(c1, eval)
253 CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
254 CALL cp_fm_vectorsnorm(c_tmp, c_res)
255
256 nmo_converged = 0
257 nmo_nc = 0
258 max_norm = 0.0_dp
259 min_norm = 1.e10_dp
260 CALL cp_fm_set_all(c1, rzero)
261 DO imo = 1, nmo
262 max_norm = max(max_norm, c_res(imo))
263 min_norm = min(min_norm, c_res(imo))
264 END DO
265 DO imo = 1, nmo
266 IF (c_res(imo) <= eps_iter) THEN
267 nmo_converged = nmo_converged + 1
268 ELSE
269 nmo_nc = nmo - nmo_converged
270 EXIT
271 END IF
272 END DO
273
274 nblock = krylov_space%nblock
275 num_blocks = nmo_nc/nblock
276
277 krylov_space%nmo_nc = nmo_nc
278 krylov_space%nmo_conv = nmo_converged
279 krylov_space%max_res_norm = max_norm
280 krylov_space%min_res_norm = min_norm
281
282 IF (my_check_moconv_only) THEN
283 CALL cp_fm_release(c_tmp)
284 CALL cp_fm_release(hc)
285 CALL timestop(handle)
286 RETURN
287 ELSE IF (krylov_space%nmo_nc > 0) THEN
288
289 CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
290
291 nblock = krylov_space%nblock
292 IF (modulo(nmo_nc, nblock) > 0.0_dp) THEN
293 num_blocks = nmo_nc/nblock + 1
294 ELSE
295 num_blocks = nmo_nc/nblock
296 END IF
297
298 DO ib = 1, num_blocks
299
300 imo_low = (ib - 1)*nblock + 1
301 imo_up = min(ib*nblock, nmo_nc)
302 nmob = imo_up - imo_low + 1
303 ndim = krylov_space%nkrylov*nmob
304
305 NULLIFY (fm_struct_tmp)
306 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
307 para_env=c0%matrix_struct%para_env, &
308 context=c0%matrix_struct%context)
309 CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
310 name="c2_tmp")
311 CALL cp_fm_struct_release(fm_struct_tmp)
312 NULLIFY (fm_struct_tmp)
313 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=ndim, &
314 para_env=c0%matrix_struct%para_env, &
315 context=c0%matrix_struct%context)
316 CALL cp_fm_create(c3_tmp, matrix_struct=fm_struct_tmp, &
317 name="c3_tmp")
318 CALL cp_fm_struct_release(fm_struct_tmp)
319
320 ! Create local matrix of right size
321 IF (nmob /= nblock) THEN
322 NULLIFY (a_mat, b_mat, b2_mat, t_mat, t_vec, v_mat)
323 ALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
324 NULLIFY (fm_struct_tmp)
325 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
326 para_env=chc%matrix_struct%para_env, &
327 context=chc%matrix_struct%context)
328 CALL cp_fm_create(a_mat, matrix_struct=fm_struct_tmp, &
329 name="a_mat")
330 CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
331 name="b_mat")
332 CALL cp_fm_create(b2_mat, matrix_struct=fm_struct_tmp, &
333 name="b2_mat")
334 CALL cp_fm_struct_release(fm_struct_tmp)
335 NULLIFY (fm_struct_tmp)
336 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
337 para_env=chc%matrix_struct%para_env, &
338 context=chc%matrix_struct%context)
339 CALL cp_fm_create(t_mat, matrix_struct=fm_struct_tmp, &
340 name="t_mat")
341 CALL cp_fm_create(t_vec, matrix_struct=fm_struct_tmp, &
342 name="t_vec")
343 CALL cp_fm_struct_release(fm_struct_tmp)
344 NULLIFY (fm_struct_tmp)
345 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
346 para_env=c0%matrix_struct%para_env, &
347 context=c0%matrix_struct%context)
348 ALLOCATE (v_mat(krylov_space%nkrylov))
349 DO ik = 1, krylov_space%nkrylov
350 CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
351 name="v_mat")
352 END DO
353 CALL cp_fm_struct_release(fm_struct_tmp)
354 ELSE
355 a_mat => krylov_space%block1_mat
356 b_mat => krylov_space%block2_mat
357 b2_mat => krylov_space%block3_mat
358 t_mat => krylov_space%block4_mat
359 t_vec => krylov_space%block5_mat
360 v_mat => krylov_space%v_mat
361 END IF
362
363 ALLOCATE (tblock(nmob, nmob))
364 ALLOCATE (tvblock(nmob, ndim))
365
366 CALL timeset(routinen//"_kry_loop", hand2)
367 CALL cp_fm_set_all(b_mat, rzero)
368 CALL cp_fm_set_all(t_mat, rzero)
369 CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
370
371 !Compute A =(V^t)HV
372 CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
373 CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(1), hc, &
374 rzero, a_mat)
375
376 !Compute the residual matrix R for next
377 !factorisation
378 CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(1), a_mat, &
379 rzero, c_tmp)
380 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
381
382 ! Build the block tridiagonal matrix
383 CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
384 CALL cp_fm_set_submatrix(t_mat, tblock, 1, 1, nmob, nmob)
385
386 DO ik = 2, krylov_space%nkrylov
387
388 ! Call lapack for QR factorization
389 CALL cp_fm_set_all(b_mat, rzero)
390 CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
391 CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
392
393 CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.true., &
394 n_rows=nao, n_cols=nmob)
395
396 !Compute A =(V^t)HV
397 CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
398 CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(ik), hc, rzero, a_mat)
399
400 !Compute the !residual matrix R !for next !factorisation
401 CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(ik), a_mat, &
402 rzero, c_tmp)
403 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
404 CALL cp_fm_to_fm(v_mat(ik - 1), hc, nmob, 1, 1)
405 CALL cp_fm_triangular_multiply(b_mat, hc, side='R', transpose_tr=.true., &
406 n_rows=nao, n_cols=nmob, alpha=rmone)
407 CALL cp_fm_scale_and_add(rone, c_tmp, rone, hc)
408
409 ! Build the block tridiagonal matrix
410 it = (ik - 2)*nmob + 1
411 jt = (ik - 1)*nmob + 1
412
413 CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
414 CALL cp_fm_set_submatrix(t_mat, tblock, jt, jt, nmob, nmob)
415 CALL cp_fm_transpose(b_mat, a_mat)
416 CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
417 CALL cp_fm_set_submatrix(t_mat, tblock, it, jt, nmob, nmob)
418
419 END DO ! ik
420 CALL timestop(hand2)
421
422 DEALLOCATE (tblock)
423
424 CALL timeset(routinen//"_diag_tri", hand3)
425
426 CALL choose_eigv_solver(t_mat, t_vec, t_eval)
427 ! Diagonalize the block-tridiagonal matrix
428 CALL timestop(hand3)
429
430 CALL timeset(routinen//"_build_cnew", hand4)
431! !Compute the refined vectors
432 CALL cp_fm_set_all(c2_tmp, rzero)
433 DO ik = 1, krylov_space%nkrylov
434 jt = (ik - 1)*nmob
435 CALL parallel_gemm('N', 'N', nao, ndim, nmob, rone, v_mat(ik), t_vec, rone, c2_tmp, &
436 b_first_row=(jt + 1))
437 END DO
438 DEALLOCATE (tvblock)
439
440 CALL cp_fm_set_all(c3_tmp, rzero)
441 CALL parallel_gemm('T', 'N', nmob, ndim, nao, rone, v_mat(1), c2_tmp, rzero, c3_tmp)
442
443 !Try to avoid linear dependencies
444 ALLOCATE (q_mat(nmob, ndim))
445 !get max
446 CALL cp_fm_get_submatrix(c3_tmp, q_mat, 1, 1, nmob, ndim)
447
448 ALLOCATE (itaken(ndim))
449 itaken = 0
450 DO it = 1, nmob
451 vmax = 0.0_dp
452 !select index ik
453 DO jt = 1, ndim
454 IF (itaken(jt) == 0 .AND. abs(q_mat(it, jt)) > vmax) THEN
455 vmax = abs(q_mat(it, jt))
456 ik = jt
457 END IF
458 END DO
459 itaken(ik) = 1
460
461 CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
462 END DO
463 DEALLOCATE (itaken)
464 DEALLOCATE (q_mat)
465
466 !Copy in the converged set to enlarge the converged subspace
467 CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
468 CALL timestop(hand4)
469
470 IF (nmob < nblock) THEN
471 CALL cp_fm_release(a_mat)
472 CALL cp_fm_release(b_mat)
473 CALL cp_fm_release(b2_mat)
474 CALL cp_fm_release(t_mat)
475 CALL cp_fm_release(t_vec)
476 DEALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
477 CALL cp_fm_release(v_mat)
478 END IF
479 CALL cp_fm_release(c2_tmp)
480 CALL cp_fm_release(c3_tmp)
481 END DO ! ib
482
483 CALL timeset(routinen//"_ortho", hand5)
484 CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
485
486 CALL cp_fm_cholesky_decompose(chc, nmo)
487 CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.true.)
488 CALL timestop(hand5)
489
490 CALL cp_fm_release(c_tmp)
491 CALL cp_fm_release(hc)
492 ELSE
493 CALL cp_fm_release(c_tmp)
494 CALL cp_fm_release(hc)
495 CALL timestop(handle)
496 RETURN
497 END IF
498
499 CALL timestop(handle)
500
501 END SUBROUTINE lanczos_refinement
502
503!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
504
505! **************************************************************************************************
506!> \brief ...
507!> \param krylov_space ...
508!> \param ks ...
509!> \param c0 ...
510!> \param c1 ...
511!> \param eval ...
512!> \param nao ...
513!> \param eps_iter ...
514!> \param ispin ...
515!> \param check_moconv_only ...
516! **************************************************************************************************
517 SUBROUTINE lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, &
518 eps_iter, ispin, check_moconv_only)
519
520 TYPE(krylov_space_type), POINTER :: krylov_space
521 TYPE(cp_fm_type), INTENT(IN) :: ks, c0, c1
522 REAL(dp), DIMENSION(:), POINTER :: eval
523 INTEGER, INTENT(IN) :: nao
524 REAL(dp), INTENT(IN) :: eps_iter
525 INTEGER, INTENT(IN) :: ispin
526 LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only
527
528 CHARACTER(LEN=*), PARAMETER :: routinen = 'lanczos_refinement_2v'
529 REAL(kind=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
530 rzero = 0.0_dp
531
532 INTEGER :: hand1, hand2, hand3, hand4, hand5, hand6, handle, i, ia, ib, ik, imo, imo_low, &
533 imo_up, info, it, j, jt, liwork, lwork, nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, &
534 num_blocks
535 INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken
536 INTEGER, DIMENSION(:), POINTER :: iwork
537 LOGICAL :: my_check_moconv_only
538 REAL(dp) :: max_norm, min_norm, vmax
539 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: a_block, b_block, q_mat, t_mat
540 REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval
541 REAL(kind=dp), DIMENSION(:), POINTER :: work
542 REAL(kind=dp), DIMENSION(:, :), POINTER :: a_loc, b_loc
543 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
544 TYPE(cp_fm_type) :: b_mat, c2_tmp, c_tmp, hc, v_tmp
545 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: v_mat
546 TYPE(cp_fm_type), POINTER :: chc, evec
547
548 CALL timeset(routinen, handle)
549
550 NULLIFY (fm_struct_tmp)
551 NULLIFY (chc, evec)
552 NULLIFY (c_res, t_eval)
553 NULLIFY (b_loc, a_loc)
554
555 nmo = SIZE(eval, 1)
556 my_check_moconv_only = .false.
557 IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
558
559 chc => krylov_space%chc_mat(ispin)
560 evec => krylov_space%c_vec(ispin)
561 c_res => krylov_space%c_eval
562 t_eval => krylov_space%t_eval
563
564 NULLIFY (fm_struct_tmp)
565 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
566 para_env=c0%matrix_struct%para_env, &
567 context=c0%matrix_struct%context)
568 CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
569 name="c_tmp")
570 CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
571 name="hc")
572 CALL cp_fm_struct_release(fm_struct_tmp)
573
574 !Compute (C^t)HC
575 CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
576 CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
577
578 !Diagonalize (C^t)HC
579 CALL timeset(routinen//"diag_chc", hand1)
580 CALL choose_eigv_solver(chc, evec, eval)
581
582 CALL timestop(hand1)
583
584 CALL timeset(routinen//"check_conv", hand6)
585 !Rotate the C vectors
586 CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
587
588 !Check for converged states
589 CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
590 CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
591 CALL cp_fm_column_scale(c1, eval)
592 CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
593 CALL cp_fm_vectorsnorm(c_tmp, c_res)
594
595 nmo_converged = 0
596 nmo_nc = 0
597 max_norm = 0.0_dp
598 min_norm = 1.e10_dp
599 CALL cp_fm_set_all(c1, rzero)
600 DO imo = 1, nmo
601 max_norm = max(max_norm, c_res(imo))
602 min_norm = min(min_norm, c_res(imo))
603 END DO
604 DO imo = 1, nmo
605 IF (c_res(imo) <= eps_iter) THEN
606 nmo_converged = nmo_converged + 1
607 ELSE
608 nmo_nc = nmo - nmo_converged
609 EXIT
610 END IF
611 END DO
612 CALL timestop(hand6)
613
614 CALL cp_fm_release(c_tmp)
615 CALL cp_fm_release(hc)
616
617 krylov_space%nmo_nc = nmo_nc
618 krylov_space%nmo_conv = nmo_converged
619 krylov_space%max_res_norm = max_norm
620 krylov_space%min_res_norm = min_norm
621
622 IF (my_check_moconv_only) THEN
623 ! Do nothing
624 ELSE IF (krylov_space%nmo_nc > 0) THEN
625
626 CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
627
628 nblock = krylov_space%nblock
629 IF (modulo(nmo_nc, nblock) > 0.0_dp) THEN
630 num_blocks = nmo_nc/nblock + 1
631 ELSE
632 num_blocks = nmo_nc/nblock
633 END IF
634
635 DO ib = 1, num_blocks
636
637 imo_low = (ib - 1)*nblock + 1
638 imo_up = min(ib*nblock, nmo_nc)
639 nmob = imo_up - imo_low + 1
640 ndim = krylov_space%nkrylov*nmob
641
642 ! Allocation
643 CALL timeset(routinen//"alloc", hand6)
644 ALLOCATE (a_block(nmob, nmob))
645 ALLOCATE (b_block(nmob, nmob))
646 ALLOCATE (t_mat(ndim, ndim))
647
648 NULLIFY (fm_struct_tmp)
649 ! by forcing ncol_block=nmo, the needed part of the matrix is distributed on a subset of processes
650 ! this is due to the use of two-dimensional grids of processes
651 ! nrow_global is distributed over num_pe(1)
652 ! a local_data array is anyway allocated for the processes non included
653 ! this should have a minimum size
654 ! with ncol_local=1.
655 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
656 ncol_block=nmob, &
657 para_env=c0%matrix_struct%para_env, &
658 context=c0%matrix_struct%context, &
659 force_block=.true.)
660 CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
661 name="c_tmp")
662 CALL cp_fm_set_all(c_tmp, rzero)
663 CALL cp_fm_create(v_tmp, matrix_struct=fm_struct_tmp, &
664 name="v_tmp")
665 CALL cp_fm_set_all(v_tmp, rzero)
666 CALL cp_fm_struct_release(fm_struct_tmp)
667 NULLIFY (fm_struct_tmp)
668 ALLOCATE (v_mat(krylov_space%nkrylov))
669 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
670 ncol_block=nmob, &
671 para_env=c0%matrix_struct%para_env, &
672 context=c0%matrix_struct%context, &
673 force_block=.true.)
674 DO ik = 1, krylov_space%nkrylov
675 CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
676 name="v_mat")
677 END DO
678 CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
679 name="hc")
680 CALL cp_fm_struct_release(fm_struct_tmp)
681 NULLIFY (fm_struct_tmp)
682 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
683 ncol_block=ndim, &
684 para_env=c0%matrix_struct%para_env, &
685 context=c0%matrix_struct%context, &
686 force_block=.true.)
687 CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
688 name="c2_tmp")
689 CALL cp_fm_struct_release(fm_struct_tmp)
690
691 NULLIFY (fm_struct_tmp)
692 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
693 para_env=c0%matrix_struct%para_env, &
694 context=c0%matrix_struct%context)
695 CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
696 name="b_mat")
697 CALL cp_fm_struct_release(fm_struct_tmp)
698 CALL timestop(hand6)
699 !End allocation
700
701 CALL cp_fm_set_all(b_mat, rzero)
702 CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
703
704 ! Here starts the construction of krylov space
705 CALL timeset(routinen//"_kry_loop", hand2)
706 !Compute A =(V^t)HV
707 CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
708
709 a_block = 0.0_dp
710 a_loc => v_mat(1)%local_data
711 b_loc => hc%local_data
712
713 IF (SIZE(hc%local_data, 2) == nmob) THEN
714 ! this is a work around to avoid problems due to the two dimensional grid of processes
715 CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
716 SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
717 END IF
718 CALL hc%matrix_struct%para_env%sum(a_block)
719
720 !Compute the residual matrix R for next
721 !factorisation
722 c_tmp%local_data = 0.0_dp
723 IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
724 b_loc => c_tmp%local_data
725 CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
726 SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
727 b_loc(1, 1), SIZE(c_tmp%local_data, 1))
728 END IF
729 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
730
731 ! Build the block tridiagonal matrix
732 t_mat = 0.0_dp
733 DO i = 1, nmob
734 t_mat(1:nmob, i) = a_block(1:nmob, i)
735 END DO
736
737 DO ik = 2, krylov_space%nkrylov
738 ! Call lapack for QR factorization
739 CALL cp_fm_set_all(b_mat, rzero)
740 CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
741 CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
742
743 CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.true., &
744 n_rows=nao, n_cols=nmob)
745
746 CALL cp_fm_get_submatrix(b_mat, b_block, 1, 1, nmob, nmob)
747
748 !Compute A =(V^t)HV
749 CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
750
751 a_block = 0.0_dp
752 IF (SIZE(hc%local_data, 2) == nmob) THEN
753 a_loc => v_mat(ik)%local_data
754 b_loc => hc%local_data
755 CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
756 SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
757 END IF
758 CALL hc%matrix_struct%para_env%sum(a_block)
759
760 !Compute the residual matrix R for next
761 !factorisation
762 c_tmp%local_data = 0.0_dp
763 IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
764 a_loc => v_mat(ik)%local_data
765 b_loc => c_tmp%local_data
766 CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
767 SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
768 b_loc(1, 1), SIZE(c_tmp%local_data, 1))
769 END IF
770 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
771
772 IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
773 a_loc => v_mat(ik - 1)%local_data
774 DO j = 1, nmob
775 DO i = 1, j
776 DO ia = 1, SIZE(c_tmp%local_data, 1)
777 b_loc(ia, i) = b_loc(ia, i) - a_loc(ia, j)*b_block(i, j)
778 END DO
779 END DO
780 END DO
781 END IF
782
783 ! Build the block tridiagonal matrix
784 it = (ik - 2)*nmob
785 jt = (ik - 1)*nmob
786 DO j = 1, nmob
787 t_mat(jt + 1:jt + nmob, jt + j) = a_block(1:nmob, j)
788 DO i = 1, nmob
789 t_mat(it + i, jt + j) = b_block(j, i)
790 t_mat(jt + j, it + i) = b_block(j, i)
791 END DO
792 END DO
793 END DO ! ik
794 CALL timestop(hand2)
795
796 CALL timeset(routinen//"_diag_tri", hand3)
797 lwork = 1 + 6*ndim + 2*ndim**2 + 5000
798 liwork = 5*ndim + 3
799 ALLOCATE (work(lwork))
800 ALLOCATE (iwork(liwork))
801
802 ! Diagonalize the block-tridiagonal matrix
803 CALL dsyevd('V', 'U', ndim, t_mat(1, 1), ndim, t_eval(1), &
804 work(1), lwork, iwork(1), liwork, info)
805 DEALLOCATE (work)
806 DEALLOCATE (iwork)
807 CALL timestop(hand3)
808
809 CALL timeset(routinen//"_build_cnew", hand4)
810! !Compute the refined vectors
811
812 c2_tmp%local_data = 0.0_dp
813 ALLOCATE (q_mat(nmob, ndim))
814 q_mat = 0.0_dp
815 b_loc => c2_tmp%local_data
816 DO ik = 1, krylov_space%nkrylov
817 CALL cp_fm_to_fm(v_mat(ik), v_tmp, nmob, 1, 1)
818 IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
819! a_loc => v_mat(ik)%local_data
820 a_loc => v_tmp%local_data
821 it = (ik - 1)*nmob
822 CALL dgemm('N', 'N', SIZE(c2_tmp%local_data, 1), ndim, nmob, 1.0_dp, a_loc(1, 1), &
823 SIZE(c2_tmp%local_data, 1), t_mat(it + 1, 1), ndim, 1.0_dp, &
824 b_loc(1, 1), SIZE(c2_tmp%local_data, 1))
825 END IF
826 END DO !ik
827
828 !Try to avoid linear dependencies
829 CALL cp_fm_to_fm(v_mat(1), v_tmp, nmob, 1, 1)
830 IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
831! a_loc => v_mat(1)%local_data
832 a_loc => v_tmp%local_data
833 b_loc => c2_tmp%local_data
834 CALL dgemm('T', 'N', nmob, ndim, SIZE(v_tmp%local_data, 1), 1.0_dp, a_loc(1, 1), &
835 SIZE(v_tmp%local_data, 1), b_loc(1, 1), SIZE(v_tmp%local_data, 1), &
836 0.0_dp, q_mat(1, 1), nmob)
837 END IF
838 CALL hc%matrix_struct%para_env%sum(q_mat)
839
840 ALLOCATE (itaken(ndim))
841 itaken = 0
842 DO it = 1, nmob
843 vmax = 0.0_dp
844 !select index ik
845 DO jt = 1, ndim
846 IF (itaken(jt) == 0 .AND. abs(q_mat(it, jt)) > vmax) THEN
847 vmax = abs(q_mat(it, jt))
848 ik = jt
849 END IF
850 END DO
851 itaken(ik) = 1
852
853 CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
854 END DO
855 DEALLOCATE (itaken)
856 DEALLOCATE (q_mat)
857
858 !Copy in the converged set to enlarge the converged subspace
859 CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
860 CALL timestop(hand4)
861
862 CALL cp_fm_release(c2_tmp)
863 CALL cp_fm_release(c_tmp)
864 CALL cp_fm_release(hc)
865 CALL cp_fm_release(v_tmp)
866 CALL cp_fm_release(b_mat)
867
868 DEALLOCATE (t_mat)
869 DEALLOCATE (a_block)
870 DEALLOCATE (b_block)
871
872 CALL cp_fm_release(v_mat)
873
874 END DO ! ib
875
876 CALL timeset(routinen//"_ortho", hand5)
877 CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
878
879 CALL cp_fm_cholesky_decompose(chc, nmo)
880 CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.true.)
881 CALL timestop(hand5)
882 ELSE
883 ! Do nothing
884 END IF
885
886 CALL timestop(handle)
887 END SUBROUTINE lanczos_refinement_2v
888
889END MODULE qs_scf_lanczos
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
performs a QR factorization of the input rectangular matrix A or of a submatrix of A the computed tri...
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
Choose the Eigensolver depending on which library is available ELPA seems to be unstable for small sy...
Definition cp_fm_diag.F:234
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_vectorsnorm(matrix, norm_array)
find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
various routines to log and control the output. The idea is that decisions about where to log should ...
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
basic linear algebra operations for full matrixes
Definition and initialisation of the mo data type.
Definition qs_mo_types.F:22
subroutine, public get_mo_set(mo_set, maxocc, homo, lfomo, nao, nelectron, n_el_f, nmo, eigenvalues, occupation_numbers, mo_coeff, mo_coeff_b, uniform_occupation, kts, mu, flexible_electron_count)
Get the components of a MO set data structure.
module that contains the algorithms to perform an iterative diagonalization by the block-Lanczos appr...
subroutine, public krylov_space_allocate(krylov_space, scf_control, mos)
allocates matrices and vectors used in the construction of the krylov space and for the lanczos refin...
subroutine, public lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, eps_iter, ispin, check_moconv_only)
lanczos refinement by blocks of non-converged MOs
subroutine, public lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, eps_iter, ispin, check_moconv_only)
...
module that contains the definitions of the scf types
parameters that control an scf iteration
keeps the information about the structure of a full matrix
represent a full matrix
wrapper for temporary and cached objects used in the scf iteration