(git:374b731)
Loading...
Searching...
No Matches
qs_scf_lanczos.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief module that contains the algorithms to perform an itrative
10!> diagonalization by the block-Lanczos approach
11!> \par History
12!> 05.2009 created [MI]
13!> \author fawzi
14! **************************************************************************************************
16
27 USE cp_fm_types, ONLY: cp_fm_create,&
36 USE kinds, ONLY: dp
38 USE qs_mo_types, ONLY: get_mo_set,&
42#include "./base/base_uses.f90"
43
44 IMPLICIT NONE
45 PRIVATE
46 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_scf_lanczos'
47
49
50CONTAINS
51
52! **************************************************************************************************
53
54! **************************************************************************************************
55!> \brief allocates matrices and vectros used in the construction of
56!> the krylov space and for the lanczos refinement
57!> \param krylov_space ...
58!> \param scf_control ...
59!> \param mos ...
60!> \param
61!> \par History
62!> 05.2009 created [MI]
63! **************************************************************************************************
64
65 SUBROUTINE krylov_space_allocate(krylov_space, scf_control, mos)
66
67 TYPE(krylov_space_type), INTENT(INOUT) :: krylov_space
68 TYPE(scf_control_type), POINTER :: scf_control
69 TYPE(mo_set_type), DIMENSION(:), INTENT(IN) :: mos
70
71 CHARACTER(LEN=*), PARAMETER :: routinen = 'krylov_space_allocate'
72
73 INTEGER :: handle, ik, ispin, max_nmo, nao, nblock, &
74 ndim, nk, nmo, nspin
75 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
76 TYPE(cp_fm_type), POINTER :: mo_coeff
77
78 CALL timeset(routinen, handle)
79
80 IF (.NOT. ASSOCIATED(krylov_space%mo_conv)) THEN
81 NULLIFY (fm_struct_tmp, mo_coeff)
82
83 krylov_space%nkrylov = scf_control%diagonalization%nkrylov
84 krylov_space%nblock = scf_control%diagonalization%nblock_krylov
85
86 nk = krylov_space%nkrylov
87 nblock = krylov_space%nblock
88 nspin = SIZE(mos, 1)
89
90 ALLOCATE (krylov_space%mo_conv(nspin))
91 ALLOCATE (krylov_space%mo_refine(nspin))
92 ALLOCATE (krylov_space%chc_mat(nspin))
93 ALLOCATE (krylov_space%c_vec(nspin))
94 max_nmo = 0
95 DO ispin = 1, nspin
96 CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff, nao=nao, nmo=nmo)
97 CALL cp_fm_create(krylov_space%mo_conv(ispin), mo_coeff%matrix_struct)
98 CALL cp_fm_create(krylov_space%mo_refine(ispin), mo_coeff%matrix_struct)
99 NULLIFY (fm_struct_tmp)
100 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmo, ncol_global=nmo, &
101 para_env=mo_coeff%matrix_struct%para_env, &
102 context=mo_coeff%matrix_struct%context)
103 CALL cp_fm_create(krylov_space%chc_mat(ispin), fm_struct_tmp, "chc")
104 CALL cp_fm_create(krylov_space%c_vec(ispin), fm_struct_tmp, "vec")
105 CALL cp_fm_struct_release(fm_struct_tmp)
106 max_nmo = max(max_nmo, nmo)
107 END DO
108
109 !the use of max_nmo might not be ok, in this case allocate nspin matrices
110 ALLOCATE (krylov_space%c_eval(max_nmo))
111
112 ALLOCATE (krylov_space%v_mat(nk))
113
114 NULLIFY (fm_struct_tmp)
115 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nblock, &
116 para_env=mo_coeff%matrix_struct%para_env, &
117 context=mo_coeff%matrix_struct%context)
118 DO ik = 1, nk
119 CALL cp_fm_create(krylov_space%v_mat(ik), matrix_struct=fm_struct_tmp, &
120 name="v_mat_"//trim(adjustl(cp_to_string(ik))))
121 END DO
122 ALLOCATE (krylov_space%tmp_mat)
123 CALL cp_fm_create(krylov_space%tmp_mat, matrix_struct=fm_struct_tmp, &
124 name="tmp_mat")
125 CALL cp_fm_struct_release(fm_struct_tmp)
126
127 ! NOTE: the following matrices are small and could be defined
128! as standard array rather than istributed fm
129 NULLIFY (fm_struct_tmp)
130 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nblock, ncol_global=nblock, &
131 para_env=mo_coeff%matrix_struct%para_env, &
132 context=mo_coeff%matrix_struct%context)
133 ALLOCATE (krylov_space%block1_mat)
134 CALL cp_fm_create(krylov_space%block1_mat, matrix_struct=fm_struct_tmp, &
135 name="a_mat_"//trim(adjustl(cp_to_string(ik))))
136 ALLOCATE (krylov_space%block2_mat)
137 CALL cp_fm_create(krylov_space%block2_mat, matrix_struct=fm_struct_tmp, &
138 name="b_mat_"//trim(adjustl(cp_to_string(ik))))
139 ALLOCATE (krylov_space%block3_mat)
140 CALL cp_fm_create(krylov_space%block3_mat, matrix_struct=fm_struct_tmp, &
141 name="b2_mat_"//trim(adjustl(cp_to_string(ik))))
142 CALL cp_fm_struct_release(fm_struct_tmp)
143
144 ndim = nblock*nk
145 NULLIFY (fm_struct_tmp)
146 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
147 para_env=mo_coeff%matrix_struct%para_env, &
148 context=mo_coeff%matrix_struct%context)
149 ALLOCATE (krylov_space%block4_mat)
150 CALL cp_fm_create(krylov_space%block4_mat, matrix_struct=fm_struct_tmp, &
151 name="t_mat")
152 ALLOCATE (krylov_space%block5_mat)
153 CALL cp_fm_create(krylov_space%block5_mat, matrix_struct=fm_struct_tmp, &
154 name="t_vec")
155 CALL cp_fm_struct_release(fm_struct_tmp)
156 ALLOCATE (krylov_space%t_eval(ndim))
157
158 ELSE
159 !Nothing should be done
160 END IF
161
162 CALL timestop(handle)
163
164 END SUBROUTINE krylov_space_allocate
165
166! **************************************************************************************************
167!> \brief lanczos refinement by blocks of not-converged MOs
168!> \param krylov_space ...
169!> \param ks ...
170!> \param c0 ...
171!> \param c1 ...
172!> \param eval ...
173!> \param nao ...
174!> \param eps_iter ...
175!> \param ispin ...
176!> \param check_moconv_only ...
177!> \param
178!> \par History
179!> 05.2009 created [MI]
180! **************************************************************************************************
181
182 SUBROUTINE lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, &
183 eps_iter, ispin, check_moconv_only)
184
185 TYPE(krylov_space_type), POINTER :: krylov_space
186 TYPE(cp_fm_type), INTENT(IN) :: ks, c0, c1
187 REAL(dp), DIMENSION(:), POINTER :: eval
188 INTEGER, INTENT(IN) :: nao
189 REAL(dp), INTENT(IN) :: eps_iter
190 INTEGER, INTENT(IN) :: ispin
191 LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only
192
193 CHARACTER(LEN=*), PARAMETER :: routinen = 'lanczos_refinement'
194 REAL(kind=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
195 rzero = 0.0_dp
196
197 INTEGER :: hand1, hand2, hand3, hand4, hand5, handle, ib, ik, imo, imo_low, imo_up, it, jt, &
198 nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, num_blocks
199 INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken
200 LOGICAL :: my_check_moconv_only
201 REAL(dp) :: max_norm, min_norm, vmax
202 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: q_mat, tblock, tvblock
203 REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval
204 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
205 TYPE(cp_fm_type) :: c2_tmp, c3_tmp, c_tmp, hc
206 TYPE(cp_fm_type), DIMENSION(:), POINTER :: v_mat
207 TYPE(cp_fm_type), POINTER :: a_mat, b2_mat, b_mat, chc, evec, t_mat, &
208 t_vec
209
210 CALL timeset(routinen, handle)
211
212 NULLIFY (fm_struct_tmp)
213 NULLIFY (chc, evec)
214 NULLIFY (c_res, t_eval)
215 NULLIFY (t_mat, t_vec)
216 NULLIFY (a_mat, b_mat, b2_mat, v_mat)
217
218 nmo = SIZE(eval, 1)
219 my_check_moconv_only = .false.
220 IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
221
222 chc => krylov_space%chc_mat(ispin)
223 evec => krylov_space%c_vec(ispin)
224 c_res => krylov_space%c_eval
225 t_eval => krylov_space%t_eval
226
227 NULLIFY (fm_struct_tmp)
228 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
229 para_env=c0%matrix_struct%para_env, &
230 context=c0%matrix_struct%context)
231 CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
232 name="c_tmp")
233 CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
234 name="hc")
235 CALL cp_fm_struct_release(fm_struct_tmp)
236
237 !Compute (C^t)HC
238 CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
239 CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
240
241 !Diagonalize (C^t)HC
242 CALL timeset(routinen//"diag_chc", hand1)
243 CALL choose_eigv_solver(chc, evec, eval)
244 CALL timestop(hand1)
245
246 !Rotate the C vectors
247 CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
248
249 !Check for converged states
250 CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
251 CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
252 CALL cp_fm_column_scale(c1, eval)
253 CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
254 CALL cp_fm_vectorsnorm(c_tmp, c_res)
255
256 nmo_converged = 0
257 nmo_nc = 0
258 max_norm = 0.0_dp
259 min_norm = 1.e10_dp
260 CALL cp_fm_set_all(c1, rzero)
261 DO imo = 1, nmo
262 max_norm = max(max_norm, c_res(imo))
263 min_norm = min(min_norm, c_res(imo))
264 END DO
265 DO imo = 1, nmo
266 IF (c_res(imo) <= eps_iter) THEN
267 nmo_converged = nmo_converged + 1
268 ELSE
269 nmo_nc = nmo - nmo_converged
270 EXIT
271 END IF
272 END DO
273
274 nblock = krylov_space%nblock
275 num_blocks = nmo_nc/nblock
276
277 krylov_space%nmo_nc = nmo_nc
278 krylov_space%nmo_conv = nmo_converged
279 krylov_space%max_res_norm = max_norm
280 krylov_space%min_res_norm = min_norm
281
282 IF (my_check_moconv_only) THEN
283 CALL cp_fm_release(c_tmp)
284 CALL cp_fm_release(hc)
285 CALL timestop(handle)
286 RETURN
287 ELSE IF (krylov_space%nmo_nc > 0) THEN
288
289 CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
290
291 nblock = krylov_space%nblock
292 IF (modulo(nmo_nc, nblock) > 0.0_dp) THEN
293 num_blocks = nmo_nc/nblock + 1
294 ELSE
295 num_blocks = nmo_nc/nblock
296 END IF
297
298 DO ib = 1, num_blocks
299
300 imo_low = (ib - 1)*nblock + 1
301 imo_up = min(ib*nblock, nmo_nc)
302 nmob = imo_up - imo_low + 1
303 ndim = krylov_space%nkrylov*nmob
304
305 NULLIFY (fm_struct_tmp)
306 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
307 para_env=c0%matrix_struct%para_env, &
308 context=c0%matrix_struct%context)
309 CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
310 name="c2_tmp")
311 CALL cp_fm_struct_release(fm_struct_tmp)
312 NULLIFY (fm_struct_tmp)
313 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=ndim, &
314 para_env=c0%matrix_struct%para_env, &
315 context=c0%matrix_struct%context)
316 CALL cp_fm_create(c3_tmp, matrix_struct=fm_struct_tmp, &
317 name="c3_tmp")
318 CALL cp_fm_struct_release(fm_struct_tmp)
319
320 ! Create local matrix of right size
321 IF (nmob /= nblock) THEN
322 NULLIFY (a_mat, b_mat, b2_mat, t_mat, t_vec, v_mat)
323 ALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
324 NULLIFY (fm_struct_tmp)
325 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
326 para_env=chc%matrix_struct%para_env, &
327 context=chc%matrix_struct%context)
328 CALL cp_fm_create(a_mat, matrix_struct=fm_struct_tmp, &
329 name="a_mat")
330 CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
331 name="b_mat")
332 CALL cp_fm_create(b2_mat, matrix_struct=fm_struct_tmp, &
333 name="b2_mat")
334 CALL cp_fm_struct_release(fm_struct_tmp)
335 NULLIFY (fm_struct_tmp)
336 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
337 para_env=chc%matrix_struct%para_env, &
338 context=chc%matrix_struct%context)
339 CALL cp_fm_create(t_mat, matrix_struct=fm_struct_tmp, &
340 name="t_mat")
341 CALL cp_fm_create(t_vec, matrix_struct=fm_struct_tmp, &
342 name="t_vec")
343 CALL cp_fm_struct_release(fm_struct_tmp)
344 NULLIFY (fm_struct_tmp)
345 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
346 para_env=c0%matrix_struct%para_env, &
347 context=c0%matrix_struct%context)
348 ALLOCATE (v_mat(krylov_space%nkrylov))
349 DO ik = 1, krylov_space%nkrylov
350 CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
351 name="v_mat")
352 END DO
353 CALL cp_fm_struct_release(fm_struct_tmp)
354 ELSE
355 a_mat => krylov_space%block1_mat
356 b_mat => krylov_space%block2_mat
357 b2_mat => krylov_space%block3_mat
358 t_mat => krylov_space%block4_mat
359 t_vec => krylov_space%block5_mat
360 v_mat => krylov_space%v_mat
361 END IF
362
363 ALLOCATE (tblock(nmob, nmob))
364 ALLOCATE (tvblock(nmob, ndim))
365
366 CALL timeset(routinen//"_kry_loop", hand2)
367 CALL cp_fm_set_all(b_mat, rzero)
368 CALL cp_fm_set_all(t_mat, rzero)
369 CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
370
371 !Compute A =(V^t)HV
372 CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
373 CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(1), hc, &
374 rzero, a_mat)
375
376 !Compute the residual matrix R for next
377 !factorisation
378 CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(1), a_mat, &
379 rzero, c_tmp)
380 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
381
382 ! Build the block tridiagonal matrix
383 CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
384 CALL cp_fm_set_submatrix(t_mat, tblock, 1, 1, nmob, nmob)
385
386 DO ik = 2, krylov_space%nkrylov
387
388 ! Call lapack for QR factorization
389 CALL cp_fm_set_all(b_mat, rzero)
390 CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
391 CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
392
393 CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.true., &
394 n_rows=nao, n_cols=nmob)
395
396 !Compute A =(V^t)HV
397 CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
398 CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(ik), hc, rzero, a_mat)
399
400 !Compute the !residual matrix R !for next !factorisation
401 CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(ik), a_mat, &
402 rzero, c_tmp)
403 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
404 CALL cp_fm_to_fm(v_mat(ik - 1), hc, nmob, 1, 1)
405 CALL cp_fm_triangular_multiply(b_mat, hc, side='R', transpose_tr=.true., &
406 n_rows=nao, n_cols=nmob, alpha=rmone)
407 CALL cp_fm_scale_and_add(rone, c_tmp, rone, hc)
408
409 ! Build the block tridiagonal matrix
410 it = (ik - 2)*nmob + 1
411 jt = (ik - 1)*nmob + 1
412
413 CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
414 CALL cp_fm_set_submatrix(t_mat, tblock, jt, jt, nmob, nmob)
415 CALL cp_fm_transpose(b_mat, a_mat)
416 CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
417 CALL cp_fm_set_submatrix(t_mat, tblock, it, jt, nmob, nmob)
418
419 END DO ! ik
420 CALL timestop(hand2)
421
422 DEALLOCATE (tblock)
423
424 CALL timeset(routinen//"_diag_tri", hand3)
425
426 CALL choose_eigv_solver(t_mat, t_vec, t_eval)
427 ! Diagonalize the block-tridiagonal matrix
428 CALL timestop(hand3)
429
430 CALL timeset(routinen//"_build_cnew", hand4)
431! !Compute the refined vectors
432 CALL cp_fm_set_all(c2_tmp, rzero)
433 DO ik = 1, krylov_space%nkrylov
434 jt = (ik - 1)*nmob
435 CALL parallel_gemm('N', 'N', nao, ndim, nmob, rone, v_mat(ik), t_vec, rone, c2_tmp, &
436 b_first_row=(jt + 1))
437 END DO
438 DEALLOCATE (tvblock)
439
440 CALL cp_fm_set_all(c3_tmp, rzero)
441 CALL parallel_gemm('T', 'N', nmob, ndim, nao, rone, v_mat(1), c2_tmp, rzero, c3_tmp)
442
443 !Try to avoid linear dependencies
444 ALLOCATE (q_mat(nmob, ndim))
445 !get max
446 CALL cp_fm_get_submatrix(c3_tmp, q_mat, 1, 1, nmob, ndim)
447
448 ALLOCATE (itaken(ndim))
449 itaken = 0
450 DO it = 1, nmob
451 vmax = 0.0_dp
452 !select index ik
453 DO jt = 1, ndim
454 IF (itaken(jt) == 0 .AND. abs(q_mat(it, jt)) > vmax) THEN
455 vmax = abs(q_mat(it, jt))
456 ik = jt
457 END IF
458 END DO
459 itaken(ik) = 1
460
461 CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
462 END DO
463 DEALLOCATE (itaken)
464 DEALLOCATE (q_mat)
465
466 !Copy in the converged set to enlarge the converged subspace
467 CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
468 CALL timestop(hand4)
469
470 IF (nmob < nblock) THEN
471 CALL cp_fm_release(a_mat)
472 CALL cp_fm_release(b_mat)
473 CALL cp_fm_release(b2_mat)
474 CALL cp_fm_release(t_mat)
475 CALL cp_fm_release(t_vec)
476 DEALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
477 CALL cp_fm_release(v_mat)
478 END IF
479 CALL cp_fm_release(c2_tmp)
480 CALL cp_fm_release(c3_tmp)
481 END DO ! ib
482
483 CALL timeset(routinen//"_ortho", hand5)
484 CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
485
486 CALL cp_fm_cholesky_decompose(chc, nmo)
487 CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.true.)
488 CALL timestop(hand5)
489
490 CALL cp_fm_release(c_tmp)
491 CALL cp_fm_release(hc)
492 ELSE
493 CALL cp_fm_release(c_tmp)
494 CALL cp_fm_release(hc)
495 CALL timestop(handle)
496 RETURN
497 END IF
498
499 CALL timestop(handle)
500
501 END SUBROUTINE lanczos_refinement
502
503!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
504
505! **************************************************************************************************
506!> \brief ...
507!> \param krylov_space ...
508!> \param ks ...
509!> \param c0 ...
510!> \param c1 ...
511!> \param eval ...
512!> \param nao ...
513!> \param eps_iter ...
514!> \param ispin ...
515!> \param check_moconv_only ...
516! **************************************************************************************************
517 SUBROUTINE lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, &
518 eps_iter, ispin, check_moconv_only)
519
520 TYPE(krylov_space_type), POINTER :: krylov_space
521 TYPE(cp_fm_type), INTENT(IN) :: ks, c0, c1
522 REAL(dp), DIMENSION(:), POINTER :: eval
523 INTEGER, INTENT(IN) :: nao
524 REAL(dp), INTENT(IN) :: eps_iter
525 INTEGER, INTENT(IN) :: ispin
526 LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only
527
528 CHARACTER(LEN=*), PARAMETER :: routinen = 'lanczos_refinement_2v'
529 REAL(kind=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
530 rzero = 0.0_dp
531
532 INTEGER :: hand1, hand2, hand3, hand4, hand5, hand6, handle, i, ia, ib, ik, imo, imo_low, &
533 imo_up, info, it, j, jt, liwork, lwork, nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, &
534 num_blocks
535 INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken
536 INTEGER, DIMENSION(:), POINTER :: iwork
537 LOGICAL :: my_check_moconv_only
538 REAL(dp) :: max_norm, min_norm, vmax
539 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: a_block, b_block, q_mat, t_mat
540 REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval
541 REAL(kind=dp), DIMENSION(:), POINTER :: work
542 REAL(kind=dp), DIMENSION(:, :), POINTER :: a_loc, b_loc
543 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
544 TYPE(cp_fm_type) :: b_mat, c2_tmp, c_tmp, hc, v_tmp
545 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: v_mat
546 TYPE(cp_fm_type), POINTER :: chc, evec
547
548 CALL timeset(routinen, handle)
549
550 NULLIFY (fm_struct_tmp)
551 NULLIFY (chc, evec)
552 NULLIFY (c_res, t_eval)
553 NULLIFY (b_loc, a_loc)
554
555 nmo = SIZE(eval, 1)
556 my_check_moconv_only = .false.
557 IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
558
559 chc => krylov_space%chc_mat(ispin)
560 evec => krylov_space%c_vec(ispin)
561 c_res => krylov_space%c_eval
562 t_eval => krylov_space%t_eval
563
564 NULLIFY (fm_struct_tmp)
565 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
566 para_env=c0%matrix_struct%para_env, &
567 context=c0%matrix_struct%context)
568 CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
569 name="c_tmp")
570 CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
571 name="hc")
572 CALL cp_fm_struct_release(fm_struct_tmp)
573
574 !Compute (C^t)HC
575 CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
576 CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
577
578 !Diagonalize (C^t)HC
579 CALL timeset(routinen//"diag_chc", hand1)
580 CALL choose_eigv_solver(chc, evec, eval)
581
582 CALL timestop(hand1)
583
584 CALL timeset(routinen//"check_conv", hand6)
585 !Rotate the C vectors
586 CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
587
588 !Check for converged states
589 CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
590 CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
591 CALL cp_fm_column_scale(c1, eval)
592 CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
593 CALL cp_fm_vectorsnorm(c_tmp, c_res)
594
595 nmo_converged = 0
596 nmo_nc = 0
597 max_norm = 0.0_dp
598 min_norm = 1.e10_dp
599 CALL cp_fm_set_all(c1, rzero)
600 DO imo = 1, nmo
601 max_norm = max(max_norm, c_res(imo))
602 min_norm = min(min_norm, c_res(imo))
603 END DO
604 DO imo = 1, nmo
605 IF (c_res(imo) <= eps_iter) THEN
606 nmo_converged = nmo_converged + 1
607 ELSE
608 nmo_nc = nmo - nmo_converged
609 EXIT
610 END IF
611 END DO
612 CALL timestop(hand6)
613
614 CALL cp_fm_release(c_tmp)
615 CALL cp_fm_release(hc)
616
617 krylov_space%nmo_nc = nmo_nc
618 krylov_space%nmo_conv = nmo_converged
619 krylov_space%max_res_norm = max_norm
620 krylov_space%min_res_norm = min_norm
621
622 IF (my_check_moconv_only) THEN
623 ! Do nothing
624 ELSE IF (krylov_space%nmo_nc > 0) THEN
625
626 CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
627
628 nblock = krylov_space%nblock
629 IF (modulo(nmo_nc, nblock) > 0.0_dp) THEN
630 num_blocks = nmo_nc/nblock + 1
631 ELSE
632 num_blocks = nmo_nc/nblock
633 END IF
634
635 DO ib = 1, num_blocks
636
637 imo_low = (ib - 1)*nblock + 1
638 imo_up = min(ib*nblock, nmo_nc)
639 nmob = imo_up - imo_low + 1
640 ndim = krylov_space%nkrylov*nmob
641
642 ! Allocation
643 CALL timeset(routinen//"alloc", hand6)
644 ALLOCATE (a_block(nmob, nmob))
645 ALLOCATE (b_block(nmob, nmob))
646 ALLOCATE (t_mat(ndim, ndim))
647
648 NULLIFY (fm_struct_tmp)
649 ! by forcing ncol_block=nmo, the needed part of the matrix is distributed on a subset of processes
650 ! this is due to the use of two-dimensional grids of processes
651 ! nrow_global is distributed over num_pe(1)
652 ! a local_data array is anyway allocated for the processes non included
653 ! this should have a minimum size
654 ! with ncol_local=1.
655 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
656 ncol_block=nmob, &
657 para_env=c0%matrix_struct%para_env, &
658 context=c0%matrix_struct%context, &
659 force_block=.true.)
660 CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
661 name="c_tmp")
662 CALL cp_fm_set_all(c_tmp, rzero)
663 CALL cp_fm_create(v_tmp, matrix_struct=fm_struct_tmp, &
664 name="v_tmp")
665 CALL cp_fm_set_all(v_tmp, rzero)
666 CALL cp_fm_struct_release(fm_struct_tmp)
667 NULLIFY (fm_struct_tmp)
668 ALLOCATE (v_mat(krylov_space%nkrylov))
669 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
670 ncol_block=nmob, &
671 para_env=c0%matrix_struct%para_env, &
672 context=c0%matrix_struct%context, &
673 force_block=.true.)
674 DO ik = 1, krylov_space%nkrylov
675 CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
676 name="v_mat")
677 END DO
678 CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
679 name="hc")
680 CALL cp_fm_struct_release(fm_struct_tmp)
681 NULLIFY (fm_struct_tmp)
682 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
683 ncol_block=ndim, &
684 para_env=c0%matrix_struct%para_env, &
685 context=c0%matrix_struct%context, &
686 force_block=.true.)
687 CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
688 name="c2_tmp")
689 CALL cp_fm_struct_release(fm_struct_tmp)
690
691 NULLIFY (fm_struct_tmp)
692 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
693 para_env=c0%matrix_struct%para_env, &
694 context=c0%matrix_struct%context)
695 CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
696 name="b_mat")
697 CALL cp_fm_struct_release(fm_struct_tmp)
698 CALL timestop(hand6)
699 !End allocation
700
701 CALL cp_fm_set_all(b_mat, rzero)
702 CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
703
704 ! Here starts the construction of krylov space
705 CALL timeset(routinen//"_kry_loop", hand2)
706 !Compute A =(V^t)HV
707 CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
708
709 a_block = 0.0_dp
710 a_loc => v_mat(1)%local_data
711 b_loc => hc%local_data
712
713 IF (SIZE(hc%local_data, 2) == nmob) THEN
714 ! this is a work around to avoid problems due to the two dimensional grid of processes
715 CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
716 SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
717 END IF
718 CALL hc%matrix_struct%para_env%sum(a_block)
719
720 !Compute the residual matrix R for next
721 !factorisation
722 c_tmp%local_data = 0.0_dp
723 IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
724 b_loc => c_tmp%local_data
725 CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
726 SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
727 b_loc(1, 1), SIZE(c_tmp%local_data, 1))
728 END IF
729 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
730
731 ! Build the block tridiagonal matrix
732 t_mat = 0.0_dp
733 DO i = 1, nmob
734 t_mat(1:nmob, i) = a_block(1:nmob, i)
735 END DO
736
737 DO ik = 2, krylov_space%nkrylov
738 ! Call lapack for QR factorization
739 CALL cp_fm_set_all(b_mat, rzero)
740 CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
741 CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
742
743 CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.true., &
744 n_rows=nao, n_cols=nmob)
745
746 CALL cp_fm_get_submatrix(b_mat, b_block, 1, 1, nmob, nmob)
747
748 !Compute A =(V^t)HV
749 CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
750
751 a_block = 0.0_dp
752 IF (SIZE(hc%local_data, 2) == nmob) THEN
753 a_loc => v_mat(ik)%local_data
754 b_loc => hc%local_data
755 CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
756 SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
757 END IF
758 CALL hc%matrix_struct%para_env%sum(a_block)
759
760 !Compute the residual matrix R for next
761 !factorisation
762 c_tmp%local_data = 0.0_dp
763 IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
764 a_loc => v_mat(ik)%local_data
765 b_loc => c_tmp%local_data
766 CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
767 SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
768 b_loc(1, 1), SIZE(c_tmp%local_data, 1))
769 END IF
770 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
771
772 IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
773 a_loc => v_mat(ik - 1)%local_data
774 DO j = 1, nmob
775 DO i = 1, j
776 DO ia = 1, SIZE(c_tmp%local_data, 1)
777 b_loc(ia, i) = b_loc(ia, i) - a_loc(ia, j)*b_block(i, j)
778 END DO
779 END DO
780 END DO
781 END IF
782
783 ! Build the block tridiagonal matrix
784 it = (ik - 2)*nmob
785 jt = (ik - 1)*nmob
786 DO j = 1, nmob
787 t_mat(jt + 1:jt + nmob, jt + j) = a_block(1:nmob, j)
788 DO i = 1, nmob
789 t_mat(it + i, jt + j) = b_block(j, i)
790 t_mat(jt + j, it + i) = b_block(j, i)
791 END DO
792 END DO
793 END DO ! ik
794 CALL timestop(hand2)
795
796 CALL timeset(routinen//"_diag_tri", hand3)
797 lwork = 1 + 6*ndim + 2*ndim**2 + 5000
798 liwork = 5*ndim + 3
799 ALLOCATE (work(lwork))
800 ALLOCATE (iwork(liwork))
801
802 ! Diagonalize the block-tridiagonal matrix
803 CALL dsyevd('V', 'U', ndim, t_mat(1, 1), ndim, t_eval(1), &
804 work(1), lwork, iwork(1), liwork, info)
805 DEALLOCATE (work)
806 DEALLOCATE (iwork)
807 CALL timestop(hand3)
808
809 CALL timeset(routinen//"_build_cnew", hand4)
810! !Compute the refined vectors
811
812 c2_tmp%local_data = 0.0_dp
813 ALLOCATE (q_mat(nmob, ndim))
814 q_mat = 0.0_dp
815 b_loc => c2_tmp%local_data
816 DO ik = 1, krylov_space%nkrylov
817 CALL cp_fm_to_fm(v_mat(ik), v_tmp, nmob, 1, 1)
818 IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
819! a_loc => v_mat(ik)%local_data
820 a_loc => v_tmp%local_data
821 it = (ik - 1)*nmob
822 CALL dgemm('N', 'N', SIZE(c2_tmp%local_data, 1), ndim, nmob, 1.0_dp, a_loc(1, 1), &
823 SIZE(c2_tmp%local_data, 1), t_mat(it + 1, 1), ndim, 1.0_dp, &
824 b_loc(1, 1), SIZE(c2_tmp%local_data, 1))
825 END IF
826 END DO !ik
827
828 !Try to avoid linear dependencies
829 CALL cp_fm_to_fm(v_mat(1), v_tmp, nmob, 1, 1)
830 IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
831! a_loc => v_mat(1)%local_data
832 a_loc => v_tmp%local_data
833 b_loc => c2_tmp%local_data
834 CALL dgemm('T', 'N', nmob, ndim, SIZE(v_tmp%local_data, 1), 1.0_dp, a_loc(1, 1), &
835 SIZE(v_tmp%local_data, 1), b_loc(1, 1), SIZE(v_tmp%local_data, 1), &
836 0.0_dp, q_mat(1, 1), nmob)
837 END IF
838 CALL hc%matrix_struct%para_env%sum(q_mat)
839
840 ALLOCATE (itaken(ndim))
841 itaken = 0
842 DO it = 1, nmob
843 vmax = 0.0_dp
844 !select index ik
845 DO jt = 1, ndim
846 IF (itaken(jt) == 0 .AND. abs(q_mat(it, jt)) > vmax) THEN
847 vmax = abs(q_mat(it, jt))
848 ik = jt
849 END IF
850 END DO
851 itaken(ik) = 1
852
853 CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
854 END DO
855 DEALLOCATE (itaken)
856 DEALLOCATE (q_mat)
857
858 !Copy in the converged set to enlarge the converged subspace
859 CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
860 CALL timestop(hand4)
861
862 CALL cp_fm_release(c2_tmp)
863 CALL cp_fm_release(c_tmp)
864 CALL cp_fm_release(hc)
865 CALL cp_fm_release(v_tmp)
866 CALL cp_fm_release(b_mat)
867
868 DEALLOCATE (t_mat)
869 DEALLOCATE (a_block)
870 DEALLOCATE (b_block)
871
872 CALL cp_fm_release(v_mat)
873
874 END DO ! ib
875
876 CALL timeset(routinen//"_ortho", hand5)
877 CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
878
879 CALL cp_fm_cholesky_decompose(chc, nmo)
880 CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.true.)
881 CALL timestop(hand5)
882 ELSE
883 ! Do nothing
884 END IF
885
886 CALL timestop(handle)
887 END SUBROUTINE lanczos_refinement_2v
888
889END MODULE qs_scf_lanczos
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
basic linear algebra operations for full matrices
subroutine, public cp_fm_column_scale(matrixa, scaling)
scales column i of matrix a with scaling(i)
subroutine, public cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col)
performs a QR factorization of the input rectangular matrix A or of a submatrix of A the computed upp...
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
Choose the Eigensolver depending on which library is available ELPA seems to be unstable for small sy...
Definition cp_fm_diag.F:208
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_vectorsnorm(matrix, norm_array)
find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
basic linear algebra operations for full matrixes
Definition and initialisation of the mo data type.
Definition qs_mo_types.F:22
subroutine, public get_mo_set(mo_set, maxocc, homo, lfomo, nao, nelectron, n_el_f, nmo, eigenvalues, occupation_numbers, mo_coeff, mo_coeff_b, uniform_occupation, kts, mu, flexible_electron_count)
Get the components of a MO set data structure.
module that contains the algorithms to perform an itrative diagonalization by the block-Lanczos appro...
subroutine, public krylov_space_allocate(krylov_space, scf_control, mos)
allocates matrices and vectros used in the construction of the krylov space and for the lanczos refin...
subroutine, public lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, eps_iter, ispin, check_moconv_only)
lanczos refinement by blocks of not-converged MOs
subroutine, public lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, eps_iter, ispin, check_moconv_only)
...
module that contains the definitions of the scf types
parameters that control an scf iteration
keeps the information about the structure of a full matrix
represent a full matrix
wrapper for temporary and cached objects used in the scf iteration