(git:d43dca4)
Loading...
Searching...
No Matches
qs_localization_methods.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Localization methods such as 2x2 Jacobi rotations
10!> Steepest Decents
11!> Conjugate Gradient
12!> \par History
13!> Initial parallellization of jacobi (JVDV 07.2003)
14!> direct minimization using exponential parametrization (JVDV 09.2003)
15!> crazy rotations go fast (JVDV 10.2003)
16!> \author CJM (04.2003)
17! **************************************************************************************************
19 USE bibliography, ONLY: schreder2024_2,&
20 cite_reference
21 USE cell_types, ONLY: cell_type
31 USE cp_cfm_diag, ONLY: cp_cfm_heevd
32 USE cp_cfm_types, ONLY: &
36 USE cp_dbcsr_api, ONLY: dbcsr_p_type
54 USE cp_fm_types, ONLY: &
60 USE kahan_sum, ONLY: accurate_sum
61 USE kinds, ONLY: dp
62 USE machine, ONLY: m_flush,&
64 USE mathconstants, ONLY: gaussi,&
65 pi,&
66 twopi,&
67 z_one,&
68 z_zero
69 USE matrix_exp, ONLY: exp_pade_real,&
73#include "./base/base_uses.f90"
74
75 IMPLICIT NONE
79
80 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
81
82 PRIVATE
83
84 TYPE set_c_1d_type
85 COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array => null()
86 END TYPE set_c_1d_type
87
88 TYPE set_c_2d_type
89 COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array => null()
90 END TYPE set_c_2d_type
91
92CONTAINS
93! **************************************************************************************************
94!> \brief ...
95!> \param C ...
96!> \param iterations ...
97!> \param eps ...
98!> \param converged ...
99!> \param sweeps ...
100! **************************************************************************************************
101 SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
102 TYPE(cp_fm_type), INTENT(IN) :: c
103 INTEGER, INTENT(IN) :: iterations
104 REAL(kind=dp), INTENT(IN) :: eps
105 LOGICAL, INTENT(INOUT) :: converged
106 INTEGER, INTENT(INOUT) :: sweeps
107
108 CHARACTER(len=*), PARAMETER :: routinen = 'approx_l1_norm_sd'
109 INTEGER, PARAMETER :: taylor_order = 100
110 REAL(kind=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
111
112 INTEGER :: handle, i, istep, k, n, ncol_local, &
113 nrow_local, output_unit, p
114 REAL(kind=dp) :: expfactor, f2, f2old, gnorm, tnorm
115 TYPE(cp_blacs_env_type), POINTER :: context
116 TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
117 TYPE(cp_fm_type) :: ctmp, g, gp1, gp2, u
118 TYPE(mp_para_env_type), POINTER :: para_env
119
120 CALL timeset(routinen, handle)
121
122 NULLIFY (context, para_env, fm_struct_k_k)
123
124 output_unit = cp_logger_get_default_io_unit()
125
126 CALL cp_fm_struct_get(c%matrix_struct, nrow_global=n, ncol_global=k, &
127 nrow_local=nrow_local, ncol_local=ncol_local, &
128 para_env=para_env, context=context)
129 CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
130 nrow_global=k, ncol_global=k)
131 CALL cp_fm_create(ctmp, c%matrix_struct)
132 CALL cp_fm_create(u, fm_struct_k_k)
133 CALL cp_fm_create(g, fm_struct_k_k)
134 CALL cp_fm_create(gp1, fm_struct_k_k)
135 CALL cp_fm_create(gp2, fm_struct_k_k)
136 !
137 ! printing
138 IF (output_unit > 0) THEN
139 WRITE (output_unit, '(1X)')
140 WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
141 WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
142 WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
143 WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
144 WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
145 WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
146 WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
147 END IF
148 !
149 f2old = 0.0_dp
150 converged = .false.
151 !
152 ! Start the steepest descent
153 DO istep = 1, iterations
154 !
155 !-------------------------------------------------------------------
156 ! compute f_2
157 ! f_2(x)=(x^2+eps)^1/2
158 f2 = 0.0_dp
159 DO p = 1, ncol_local ! p
160 DO i = 1, nrow_local ! i
161 f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
162 END DO
163 END DO
164 CALL c%matrix_struct%para_env%sum(f2)
165 !write(*,*) 'qs_localize: f_2=',f2
166 !-------------------------------------------------------------------
167 ! compute the derivative of f_2
168 ! f_2(x)=(x^2+eps)^1/2
169 DO p = 1, ncol_local ! p
170 DO i = 1, nrow_local ! i
171 ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
172 END DO
173 END DO
174 CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
175 ! antisymmetrize
176 CALL cp_fm_transpose(g, u)
177 CALL cp_fm_scale_and_add(-0.5_dp, g, 0.5_dp, u)
178 !
179 !-------------------------------------------------------------------
180 !
181 gnorm = cp_fm_frobenius_norm(g)
182 !write(*,*) 'qs_localize: norm(G)=',gnorm
183 !
184 ! rescale for steepest descent
185 CALL cp_fm_scale(-alpha, g)
186 !
187 ! compute unitary transform
188 ! zeroth order
189 CALL cp_fm_set_all(u, 0.0_dp, 1.0_dp)
190 ! first order
191 expfactor = 1.0_dp
192 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, g)
193 tnorm = cp_fm_frobenius_norm(g)
194 !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
195 IF (tnorm > 1.0e-10_dp) THEN
196 ! other orders
197 CALL cp_fm_to_fm(g, gp1)
198 DO i = 2, taylor_order
199 ! new power of G
200 CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
201 CALL cp_fm_to_fm(gp2, gp1)
202 ! add to the taylor expansion so far
203 expfactor = expfactor/real(i, kind=dp)
204 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, gp1)
205 tnorm = cp_fm_frobenius_norm(gp1)
206 !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
207 IF (tnorm*expfactor < 1.0e-10_dp) EXIT
208 END DO
209 END IF
210 !
211 ! incrementaly rotate the MOs
212 CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
213 CALL cp_fm_to_fm(ctmp, c)
214 !
215 ! printing
216 IF (output_unit > 0) THEN
217 WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
218 END IF
219 !
220 ! Are we done?
221 sweeps = istep
222 !IF(gnorm<=grad_thresh.AND.ABS((f2-f2old)/f2)<=f2_thresh.AND.istep>1) THEN
223 IF (abs((f2 - f2old)/f2) <= eps .AND. istep > 1) THEN
224 converged = .true.
225 EXIT
226 END IF
227 f2old = f2
228 END DO
229 !
230 ! here we should do one refine step to enforce C'*S*C=1 for any case
231 !
232 ! Print the final result
233 IF (output_unit > 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
234 !
235 ! sparsity
236 !DO i=1,size(thresh,1)
237 ! gnorm = 0.0_dp
238 ! DO o=1,ncol_local
239 ! DO p=1,nrow_local
240 ! IF(ABS(C%local_data(p,o))>thresh(i)) THEN
241 ! gnorm = gnorm + 1.0_dp
242 ! ENDIF
243 ! ENDDO
244 ! ENDDO
245 ! CALL C%matrix_struct%para_env%sum(gnorm)
246 ! IF(output_unit>0) THEN
247 ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
248 ! ENDIF
249 !ENDDO
250 !
251 ! deallocate
252 CALL cp_fm_struct_release(fm_struct_k_k)
253 CALL cp_fm_release(ctmp)
254 CALL cp_fm_release(u)
255 CALL cp_fm_release(g)
256 CALL cp_fm_release(gp1)
257 CALL cp_fm_release(gp2)
258
259 CALL timestop(handle)
260
261 END SUBROUTINE approx_l1_norm_sd
262! **************************************************************************************************
263!> \brief ...
264!> \param cell ...
265!> \param weights ...
266! **************************************************************************************************
267 SUBROUTINE initialize_weights(cell, weights)
268
269 TYPE(cell_type), POINTER :: cell
270 REAL(kind=dp), DIMENSION(:) :: weights
271
272 REAL(kind=dp), DIMENSION(3, 3) :: metric
273
274 cpassert(ASSOCIATED(cell))
275
276 metric = 0.0_dp
277 CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
278
279 weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
280 weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
281 weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
282 weights(4) = metric(1, 2)
283 weights(5) = metric(1, 3)
284 weights(6) = metric(2, 3)
285
286 END SUBROUTINE initialize_weights
287
288! **************************************************************************************************
289!> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
290!> can deal with serial para_envs.
291!> \param weights ...
292!> \param zij ...
293!> \param vectors ...
294!> \param para_env ...
295!> \param max_iter ...
296!> \param eps_localization ...
297!> \param sweeps ...
298!> \param out_each ...
299!> \param target_time ...
300!> \param start_time ...
301!> \param restricted ...
302!> \par History
303!> \author Joost VandeVondele (02.2010)
304! **************************************************************************************************
305 SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
306 eps_localization, sweeps, out_each, target_time, start_time, restricted)
307
308 REAL(kind=dp), INTENT(IN) :: weights(:)
309 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
310 TYPE(mp_para_env_type), POINTER :: para_env
311 INTEGER, INTENT(IN) :: max_iter
312 REAL(kind=dp), INTENT(IN) :: eps_localization
313 INTEGER :: sweeps
314 INTEGER, INTENT(IN) :: out_each
315 REAL(dp) :: target_time, start_time
316 INTEGER :: restricted
317
318 IF (para_env%num_pe == 1) THEN
319 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
320 sweeps, out_each, restricted=restricted)
321 ELSE
322 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
323 sweeps, out_each, target_time, start_time, restricted=restricted)
324 END IF
325
326 END SUBROUTINE jacobi_rotations
327
328! **************************************************************************************************
329!> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
330!> while the routine below works in parallel, it is too slow to be useful
331!> \param weights ...
332!> \param zij ...
333!> \param vectors ...
334!> \param max_iter ...
335!> \param eps_localization ...
336!> \param sweeps ...
337!> \param out_each ...
338!> \param restricted ...
339! **************************************************************************************************
340 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
341 out_each, restricted)
342 REAL(kind=dp), INTENT(IN) :: weights(:)
343 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
344 INTEGER, INTENT(IN) :: max_iter
345 REAL(kind=dp), INTENT(IN) :: eps_localization
346 INTEGER :: sweeps
347 INTEGER, INTENT(IN) :: out_each
348 INTEGER :: restricted
349
350 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial'
351
352 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
353 INTEGER :: dim2, handle, idim, istate, jstate, &
354 nstate, unit_nr
355 REAL(kind=dp) :: ct, st, t1, t2, theta, tolerance
356 TYPE(cp_cfm_type) :: c_rmat
357 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
358 TYPE(cp_fm_type) :: rmat
359
360 CALL timeset(routinen, handle)
361
362 dim2 = SIZE(zij, 2)
363 ALLOCATE (c_zij(dim2))
364 NULLIFY (mii, mij, mjj)
365 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
366
367 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
368 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
369
370 CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
371 CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
372 DO idim = 1, dim2
373 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
374 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
375 zij(2, idim)%local_data, dp)
376 END DO
377
378 CALL cp_fm_get_info(rmat, nrow_global=nstate)
379 tolerance = 1.0e10_dp
380
381 sweeps = 0
382 unit_nr = -1
383 IF (rmat%matrix_struct%para_env%is_source()) THEN
385 WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
386 END IF
387
388 IF (restricted > 0) THEN
390 WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
391 nstate = nstate - restricted
392 END IF
393
394 ! do jacobi sweeps until converged
395 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
396 sweeps = sweeps + 1
397 t1 = m_walltime()
398
399 DO istate = 1, nstate
400 DO jstate = istate + 1, nstate
401 DO idim = 1, dim2
402 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
403 CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
404 CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
405 END DO
406 CALL get_angle(mii, mjj, mij, weights, theta)
407 st = sin(theta)
408 ct = cos(theta)
409 CALL rotate_zij(istate, jstate, st, ct, c_zij)
410
411 CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
412 END DO
413 END DO
414
415 CALL check_tolerance(c_zij, weights, tolerance)
416
417 t2 = m_walltime()
418 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
419 WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
420 "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
421 CALL m_flush(unit_nr)
422 END IF
423
424 END DO
425
426 DO idim = 1, dim2
427 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
428 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
429 CALL cp_cfm_release(c_zij(idim))
430 END DO
431 DEALLOCATE (c_zij)
432 DEALLOCATE (mii, mij, mjj)
433 rmat%local_data = real(c_rmat%local_data, dp)
434
435 CALL rotate_orbitals(rmat, vectors)
436
437 CALL cp_cfm_release(c_rmat)
438 CALL cp_fm_release(rmat)
439
440 CALL timestop(handle)
441
442 END SUBROUTINE jacobi_rotations_serial
443! **************************************************************************************************
444!> \brief very similar to jacobi_rotations_serial with some extra output options
445!> \param weights ...
446!> \param c_zij ...
447!> \param max_iter ...
448!> \param c_rmat ...
449!> \param eps_localization ...
450!> \param tol_out ...
451!> \param jsweeps ...
452!> \param out_each ...
453!> \param c_zij_out ...
454!> \param grad_final ...
455! **************************************************************************************************
456 SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
457 tol_out, jsweeps, out_each, c_zij_out, grad_final)
458 REAL(kind=dp), INTENT(IN) :: weights(:)
459 TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
460 INTEGER, INTENT(IN) :: max_iter
461 TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
462 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
463 REAL(kind=dp), INTENT(OUT), OPTIONAL :: tol_out
464 INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
465 INTEGER, INTENT(IN), OPTIONAL :: out_each
466 TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
467 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
468
469 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial_1'
470
471 COMPLEX(KIND=dp) :: mzii
472 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
473 INTEGER :: dim2, handle, idim, istate, jstate, &
474 nstate, sweeps, unit_nr
475 REAL(kind=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
476 sum_spread_ii, t1, t2, theta, tolerance
477 TYPE(cp_cfm_type) :: c_rmat_local
478 TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
479
480 CALL timeset(routinen, handle)
481
482 dim2 = SIZE(c_zij)
483 NULLIFY (mii, mij, mjj)
484 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
485
486 ALLOCATE (c_zij_local(dim2))
487 CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
488 CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
489 DO idim = 1, dim2
490 CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
491 c_zij_local(idim)%local_data = c_zij(idim)%local_data
492 END DO
493
494 CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
495 tolerance = 1.0e10_dp
496
497 IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
498
499 sweeps = 0
500 IF (PRESENT(out_each)) THEN
501 unit_nr = -1
502 IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
504 END IF
505 alpha = 0.0_dp
506 DO idim = 1, dim2
507 alpha = alpha + weights(idim)
508 END DO
509 END IF
510
511 ! do jacobi sweeps until converged
512 DO WHILE (sweeps < max_iter)
513 sweeps = sweeps + 1
514 IF (PRESENT(eps_localization)) THEN
515 IF (tolerance < eps_localization) EXIT
516 END IF
517 IF (PRESENT(out_each)) t1 = m_walltime()
518
519 DO istate = 1, nstate
520 DO jstate = istate + 1, nstate
521 DO idim = 1, dim2
522 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
523 CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
524 CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
525 END DO
526 CALL get_angle(mii, mjj, mij, weights, theta)
527 st = sin(theta)
528 ct = cos(theta)
529 CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
530
531 CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
532 END DO
533 END DO
534
535 IF (PRESENT(grad_final)) THEN
536 CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
537 ELSE
538 CALL check_tolerance(c_zij_local, weights, tolerance)
539 END IF
540 IF (PRESENT(tol_out)) tol_out = tolerance
541
542 IF (PRESENT(out_each)) THEN
543 t2 = m_walltime()
544 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
545 sum_spread_ii = 0.0_dp
546 DO istate = 1, nstate
547 spread_ii = 0.0_dp
548 DO idim = 1, dim2
549 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
550 spread_ii = spread_ii + weights(idim)* &
551 abs(mzii)**2/twopi/twopi
552 END DO
553 sum_spread_ii = sum_spread_ii + spread_ii
554 END DO
555 sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
556 avg_spread_ii = sum_spread_ii/nstate
557 WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
558 "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
559 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
560 sweeps, avg_spread_ii, tolerance, t2 - t1
561 CALL m_flush(unit_nr)
562 END IF
563 IF (PRESENT(jsweeps)) jsweeps = sweeps
564 END IF
565
566 END DO
567
568 IF (PRESENT(c_zij_out)) THEN
569 DO idim = 1, dim2
570 CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
571 END DO
572 END IF
573 CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
574
575 DEALLOCATE (mii, mij, mjj)
576 DO idim = 1, dim2
577 CALL cp_cfm_release(c_zij_local(idim))
578 END DO
579 DEALLOCATE (c_zij_local)
580 CALL cp_cfm_release(c_rmat_local)
581
582 CALL timestop(handle)
583
584 END SUBROUTINE jacobi_rotations_serial_1
585! **************************************************************************************************
586!> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
587!> for partially occupied wannier functions
588!> \param para_env ...
589!> \param weights ...
590!> \param zij ...
591!> \param vectors ...
592!> \param max_iter ...
593!> \param eps_localization ...
594!> \param iter ...
595!> \param out_each ...
596!> \param nextra ...
597!> \param do_cg ...
598!> \param nmo ...
599!> \param vectors_2 ...
600!> \param mos_guess ...
601! **************************************************************************************************
602 SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
603 iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
604 TYPE(mp_para_env_type), POINTER :: para_env
605 REAL(kind=dp), INTENT(IN) :: weights(:)
606 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
607 INTEGER, INTENT(IN) :: max_iter
608 REAL(kind=dp), INTENT(IN) :: eps_localization
609 INTEGER :: iter
610 INTEGER, INTENT(IN) :: out_each, nextra
611 LOGICAL, INTENT(IN) :: do_cg
612 INTEGER, INTENT(IN), OPTIONAL :: nmo
613 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
614
615 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_cg_edf_ls'
616 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
617 czero = (0.0_dp, 0.0_dp)
618 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
619
620 COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
621 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
622 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
623 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
624 INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
625 lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
626 INTEGER, DIMENSION(1) :: iloc
627 LOGICAL :: do_cinit_mo, do_cinit_random, &
628 do_u_guess_mo, new_direction
629 REAL(kind=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
630 norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
631 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
632 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
633 REAL(kind=dp), DIMENSION(50) :: energy, pos
634 REAL(kind=dp), DIMENSION(:), POINTER :: tmp_arr
635 TYPE(cp_blacs_env_type), POINTER :: context
636 TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
637 grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
638 tmp_cfm_2, u, ul, v, vl, zdiag
639 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
640 TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
641 TYPE(cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
642 matrix_v_all, rmat, tmp_fm, vectors_all
643
644 CALL timeset(routinen, handle)
645
646 dim2 = SIZE(zij, 2)
647 NULLIFY (context)
648 NULLIFY (matrix_zii, arr_zii)
649 NULLIFY (tmp_fm_struct)
650 NULLIFY (tmp_arr)
651
652 ALLOCATE (c_zij(dim2))
653
654 CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
655
656 ALLOCATE (sum_spread(nstate))
657 ALLOCATE (matrix_zii(nstate, dim2))
658 matrix_zii = czero
659 sum_spread = 0.0_dp
660
661 alpha = 0.0_dp
662 DO idim = 1, dim2
663 alpha = alpha + weights(idim)
664 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
665 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
666 zij(2, idim)%local_data, dp)
667 END DO
668
669 ALLOCATE (zij_0(dim2))
670
671 CALL cp_cfm_create(u, zij(1, 1)%matrix_struct)
672 CALL cp_fm_create(matrix_u, zij(1, 1)%matrix_struct)
673
674 CALL cp_cfm_set_all(u, czero, cone)
675 CALL cp_fm_set_all(matrix_u, 0.0_dp, 1.0_dp)
676
677 CALL cp_fm_get_info(vectors, nrow_global=nao)
678 IF (nextra > 0) THEN
679 IF (PRESENT(mos_guess)) THEN
680 do_cinit_random = .false.
681 do_cinit_mo = .true.
682 CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
683 ELSE
684 do_cinit_random = .true.
685 do_cinit_mo = .false.
686 ndummy = nstate
687 END IF
688
689 IF (do_cinit_random) THEN
690 icinit = 1
691 do_u_guess_mo = .false.
692 ELSEIF (do_cinit_mo) THEN
693 icinit = 2
694 do_u_guess_mo = .true.
695 END IF
696
697 nocc = nstate - nextra
698 northo = nmo - nocc
699 norextra = nmo - nstate
700 CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
701
702 ALLOCATE (tmp_cmat(nstate, nstate))
703 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
704 para_env=para_env, context=context)
705 DO idim = 1, dim2
706 CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
707 CALL cp_cfm_set_all(zij_0(idim), czero, cone)
708 CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
709 CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
710 END DO
711 CALL cp_fm_struct_release(tmp_fm_struct)
712 DEALLOCATE (tmp_cmat)
713
714 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
715 para_env=para_env, context=context)
716 CALL cp_cfm_create(v, tmp_fm_struct)
717 CALL cp_fm_create(matrix_v, tmp_fm_struct)
718 CALL cp_cfm_create(zdiag, tmp_fm_struct)
719 CALL cp_fm_create(rmat, tmp_fm_struct)
720 CALL cp_fm_struct_release(tmp_fm_struct)
721 CALL cp_cfm_set_all(v, czero, cone)
722 CALL cp_fm_set_all(matrix_v, 0.0_dp, 1.0_dp)
723
724 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
725 para_env=para_env, context=context)
726 CALL cp_fm_create(matrix_v_all, tmp_fm_struct)
727 CALL cp_fm_struct_release(tmp_fm_struct)
728 CALL cp_fm_set_all(matrix_v_all, 0._dp, 1._dp)
729
730 ALLOCATE (arr_zii(nstate))
731
732 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
733 para_env=para_env, context=context)
734 CALL cp_cfm_create(c_tilde, tmp_fm_struct)
735 CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
736 CALL cp_cfm_create(gct_old, tmp_fm_struct)
737 CALL cp_cfm_create(skc, tmp_fm_struct)
738 CALL cp_fm_struct_release(tmp_fm_struct)
739 CALL cp_cfm_set_all(c_tilde, czero)
740 CALL cp_cfm_set_all(gct_old, czero)
741 CALL cp_cfm_set_all(skc, czero)
742
743 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
744 para_env=para_env, context=context)
745 CALL cp_cfm_create(vl, tmp_fm_struct)
746 CALL cp_cfm_set_all(vl, czero)
747 CALL cp_fm_struct_release(tmp_fm_struct)
748
749 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
750 para_env=para_env, context=context)
751 CALL cp_fm_create(id_nextra, tmp_fm_struct)
752 CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
753 CALL cp_fm_struct_release(tmp_fm_struct)
754 CALL cp_cfm_set_all(ctrans_lambda, czero)
755 CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
756
757 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
758 para_env=para_env, context=context)
759 CALL cp_cfm_create(ul, tmp_fm_struct)
760 CALL cp_fm_struct_release(tmp_fm_struct)
761 CALL cp_cfm_set_all(ul, czero)
762
763 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
764 para_env=para_env, context=context)
765 CALL cp_fm_create(vectors_all, tmp_fm_struct)
766 CALL cp_fm_struct_release(tmp_fm_struct)
767 ALLOCATE (tmp_mat(nao, nstate))
768 CALL cp_fm_get_submatrix(vectors, tmp_mat)
769 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
770 DEALLOCATE (tmp_mat)
771 ALLOCATE (tmp_mat(nao, norextra))
772 CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
773 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
774 DEALLOCATE (tmp_mat)
775
776 ! initialize c_tilde
777 SELECT CASE (icinit)
778 CASE (1) ! random coefficients
779 !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
780 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
781 CALL cp_fm_init_random(tmp_fm, nextra)
782 CALL ortho_vectors(tmp_fm)
783 c_tilde%local_data = tmp_fm%local_data
784 CALL cp_fm_release(tmp_fm)
785 ALLOCATE (tmp_cmat(northo, nextra))
786 CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
787 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
788 DEALLOCATE (tmp_cmat)
789 CASE (2) ! MO based coeffs
790 CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
791 ALLOCATE (tmp_arr(nmo))
792 ALLOCATE (tmp_mat(nmo, ndummy))
793 ALLOCATE (tmp_mat_1(nmo, nstate))
794 ! normalize matrix_V_all
795 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat)
796 DO istate = 1, ndummy
797 tmp_arr(:) = tmp_mat(:, istate)
798 norm = norm2(tmp_arr)
799 tmp_arr(:) = tmp_arr(:)/norm
800 tmp_mat(:, istate) = tmp_arr(:)
801 END DO
802 CALL cp_fm_set_submatrix(matrix_v_all, tmp_mat)
803 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat_1, 1, 1, nmo, nstate)
804 CALL cp_fm_set_submatrix(matrix_v, tmp_mat_1)
805 DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
806 CALL cp_fm_to_cfm(msourcer=matrix_v, mtarget=v)
807 ALLOCATE (tmp_mat(northo, ndummy))
808 ALLOCATE (tmp_mat_1(northo, nextra))
809 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat, nocc + 1, 1, northo, ndummy)
810 ALLOCATE (tmp_arr(ndummy))
811 tmp_arr = 0.0_dp
812 DO istate = 1, ndummy
813 tmp_arr(istate) = norm2(tmp_mat(:, istate))
814 END DO
815 ! find edfs
816 DO istate = 1, nextra
817 iloc = maxloc(tmp_arr)
818 tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
819 tmp_arr(iloc(1)) = 0.0_dp
820 END DO
821
822 DEALLOCATE (tmp_arr, tmp_mat)
823
824 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
825 para_env=para_env, context=context)
826 CALL cp_fm_create(tmp_fm, tmp_fm_struct)
827 CALL cp_fm_struct_release(tmp_fm_struct)
828 CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
829 DEALLOCATE (tmp_mat_1)
830 CALL ortho_vectors(tmp_fm)
831 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
832 CALL cp_fm_release(tmp_fm)
833 ! initialize U
834 IF (do_u_guess_mo) THEN
835 ALLOCATE (tmp_cmat(nocc, nstate))
836 CALL cp_cfm_get_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
837 CALL cp_cfm_set_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
838 DEALLOCATE (tmp_cmat)
839 ALLOCATE (tmp_cmat(northo, nstate))
840 CALL cp_cfm_get_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
841 CALL cp_cfm_set_submatrix(vl, tmp_cmat, 1, 1, northo, nstate)
842 DEALLOCATE (tmp_cmat)
843 CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
844 ALLOCATE (tmp_cmat(nextra, nstate))
845 CALL cp_cfm_get_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
846 CALL cp_cfm_set_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
847 DEALLOCATE (tmp_cmat)
848 CALL cp_fm_create(tmp_fm, u%matrix_struct)
849 tmp_fm%local_data = real(u%local_data, kind=dp)
850 CALL ortho_vectors(tmp_fm)
851 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=u)
852 CALL cp_fm_release(tmp_fm)
853 CALL cp_cfm_to_fm(u, matrix_u)
854 END IF
855 ! reevaluate V
856 ALLOCATE (tmp_cmat(nocc, nstate))
857 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
858 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
859 DEALLOCATE (tmp_cmat)
860 ALLOCATE (tmp_cmat(nextra, nstate))
861 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
862 CALL cp_cfm_set_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
863 DEALLOCATE (tmp_cmat)
864 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
865 ALLOCATE (tmp_cmat(northo, nstate))
866 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
867 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
868 DEALLOCATE (tmp_cmat)
869 END SELECT
870 ELSE
871 DO idim = 1, dim2
872 CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
873 CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
874 END DO
875 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
876 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
877 END IF
878
879 unit_nr = -1
880 IF (rmat%matrix_struct%para_env%is_source()) THEN
882 WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
883 END IF
884
885 norm2_old = 1.0e30_dp
886 ds_min = 1.0_dp
887 new_direction = .true.
888 iter = 0
889 line_searches = 0
890 line_search_count = 0
891 tol = 1.0e+20_dp
892 mintol = 1.0e+10_dp
893 miniter = 0
894
895 !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
896 ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
897
898 ! do conjugate gradient until converged
899 DO WHILE (iter < max_iter)
900 iter = iter + 1
901 !WRITE(*,*) 'iter = ', iter
902 t1 = m_walltime()
903
904 IF (iter > 1) THEN
905 ! comput U
906 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
907 CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
908 IF (para_env%num_pe == 1) THEN
909 CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
910 ELSE
911 CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
912 END IF
913 CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
914 CALL cp_cfm_to_cfm(tmp_cfm, u)
915 CALL cp_cfm_release(tmp_cfm)
916 CALL cp_cfm_release(tmp_cfm_2)
917 END IF
918
919 IF (nextra > 0) THEN
920 ALLOCATE (tmp_cmat(nextra, nstate))
921 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
922 CALL cp_cfm_set_submatrix(ul, tmp_cmat)
923 DEALLOCATE (tmp_cmat)
924 IF (iter > 1) THEN
925 ! orthonormalize c_tilde
926 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
927 tmp_fm%local_data = real(c_tilde%local_data, kind=dp)
928 CALL ortho_vectors(tmp_fm)
929 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
930 CALL cp_fm_release(tmp_fm)
931
932 ALLOCATE (tmp_cmat(nocc, nstate))
933 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
934 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
935 DEALLOCATE (tmp_cmat)
936 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
937 ALLOCATE (tmp_cmat(northo, nstate))
938 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
939 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
940 DEALLOCATE (tmp_cmat)
941 END IF
942
943 ! reset if new_direction
944 IF (new_direction .AND. mod(line_searches, 20) == 5) THEN
945 CALL cp_cfm_set_all(skc, czero)
946 CALL cp_cfm_set_all(gct_old, czero)
947 norm2_old = 1.0e30_dp
948 END IF
949
950 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
951 CALL cp_cfm_to_cfm(v, tmp_cfm)
952 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
953 ndummy = nmo
954 ELSE
955 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
956 CALL cp_cfm_to_cfm(u, tmp_cfm)
957 CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
958 ndummy = nstate
959 END IF
960 ! update z_ij
961 DO idim = 1, dim2
962 ! 'tmp_cfm_1 = zij_0*tmp_cfm'
963 CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
964 tmp_cfm, czero, tmp_cfm_1)
965 ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
966 CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
967 czero, c_zij(idim))
968 END DO
969 CALL cp_cfm_release(tmp_cfm)
970 CALL cp_cfm_release(tmp_cfm_1)
971 ! compute spread
972 DO istate = 1, nstate
973 spread_ii = 0.0_dp
974 DO idim = 1, dim2
975 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
976 spread_ii = spread_ii + weights(idim)* &
977 abs(mzii)**2/twopi/twopi
978 matrix_zii(istate, idim) = mzii
979 END DO
980 !WRITE(*,*) 'spread_ii', spread_ii
981 sum_spread(istate) = spread_ii
982 END DO
983 CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
984 spread_sum = accurate_sum(sum_spread)
985
986 IF (nextra > 0) THEN
987 ! update c_tilde
988 CALL cp_cfm_set_all(zdiag, czero)
989 CALL cp_cfm_set_all(grad_ctilde, czero)
990 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
991 CALL cp_cfm_set_all(tmp_cfm, czero)
992 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
993 CALL cp_cfm_set_all(tmp_cfm_1, czero)
994 ALLOCATE (tmp_cmat(northo, nstate))
995 DO idim = 1, dim2
996 weight = weights(idim)
997 arr_zii = matrix_zii(:, idim)
998 ! tmp_cfm = zij_0*V
999 CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
1000 zij_0(idim), v, czero, tmp_cfm)
1001 ! tmp_cfm = tmp_cfm*diag_zij_dagg
1002 CALL cp_cfm_column_scale(tmp_cfm, conjg(arr_zii))
1003 ! tmp_cfm_1 = tmp_cfm*U_dagg
1004 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1005 u, czero, tmp_cfm_1)
1006 CALL cp_cfm_scale(weight, tmp_cfm_1)
1007 ! zdiag = zdiag + tmp_cfm_1'
1008 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1009
1010 ! tmp_cfm = zij_0_dagg*V
1011 CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1012 zij_0(idim), v, czero, tmp_cfm)
1013
1014 ! tmp_cfm = tmp_cfm*diag_zij
1015 CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1016 ! tmp_cfm_1 = tmp_cfm*U_dagg
1017 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1018 u, czero, tmp_cfm_1)
1019 CALL cp_cfm_scale(weight, tmp_cfm_1)
1020 ! zdiag = zdiag + tmp_cfm_1'
1021 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1022 END DO ! idim
1023 CALL cp_cfm_release(tmp_cfm)
1024 CALL cp_cfm_release(tmp_cfm_1)
1025 DEALLOCATE (tmp_cmat)
1026 ALLOCATE (tmp_cmat(northo, nextra))
1027 CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1028 northo, nextra, .false.)
1029 ! 'grad_ctilde'
1030 CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1031 DEALLOCATE (tmp_cmat)
1032 ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1033 CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1034 !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1035 ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1036 CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1037 END IF ! nextra > 0
1038
1039 ! tolerance
1040 IF (nextra > 0) THEN
1041 tolc = 0.0_dp
1042 CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1043 CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1044 CALL cp_fm_maxabsval(tmp_fm, tolc)
1045 CALL cp_fm_release(tmp_fm)
1046 !WRITE(*,*) 'tolc = ', tolc
1047 tol = tol + tolc
1048 END IF
1049 !WRITE(*,*) 'tol = ', tol
1050
1051 IF (nextra > 0) THEN
1052 !WRITE(*,*) 'new_direction: ', new_direction
1053 IF (new_direction) THEN
1054 line_searches = line_searches + 1
1055 IF (mintol > tol) THEN
1056 mintol = tol
1057 miniter = iter
1058 END IF
1059
1060 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1061 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1062 avg_spread_ii = sum_spread_ii/nstate
1063 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1064 "Iteration", "Avg. Spread_ii", "Tolerance"
1065 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1066 iter, avg_spread_ii, tol
1067 CALL m_flush(unit_nr)
1068 END IF
1069 IF (tol < eps_localization) EXIT
1070
1071 IF (do_cg) THEN
1072 cnorm2_gct = czero
1073 cnorm2_gct_cross = czero
1074 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1075 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1076 gct_old%local_data = grad_ctilde%local_data
1077 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct)
1078 norm2_gct = real(cnorm2_gct, kind=dp)
1079 ! compute beta_pr
1080 beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1081 norm2_old = norm2_gct
1082 beta = max(0.0_dp, beta_pr)
1083 !WRITE(*,*) 'beta = ', beta
1084 ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1085 CALL cp_cfm_scale(beta, skc)
1086 CALL cp_cfm_scale_and_add(cone, skc, cone, gct_old)
1087 CALL cp_cfm_trace(skc, gct_old, cnorm2_gct_cross)
1088 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1089 IF (norm2_gct_cross <= 0.0_dp) THEN ! back to steepest ascent
1090 CALL cp_cfm_scale_and_add(czero, skc, cone, gct_old)
1091 END IF
1092 ELSE
1093 CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1094 END IF
1095 line_search_count = 0
1096 END IF
1097
1098 line_search_count = line_search_count + 1
1099 !WRITE(*,*) 'line_search_count = ', line_search_count
1100 energy(line_search_count) = spread_sum
1101
1102 ! gold line search
1103 new_direction = .false.
1104 IF (line_search_count == 1) THEN
1105 lsl = 1
1106 lsr = 0
1107 lsm = 1
1108 pos(1) = 0.0_dp
1109 pos(2) = ds_min/gold_sec
1110 ds = pos(2)
1111 ELSE
1112 IF (line_search_count == 50) THEN
1113 IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp) THEN
1114 cpwarn("Line search failed to converge properly")
1115 ds_min = 0.1_dp
1116 new_direction = .true.
1117 ds = pos(line_search_count)
1118 line_search_count = 0
1119 ELSE
1120 cpabort("No. of line searches exceeds 50")
1121 END IF
1122 ELSE
1123 IF (lsr == 0) THEN
1124 IF (energy(line_search_count - 1) > energy(line_search_count)) THEN
1125 lsr = line_search_count
1126 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1127 ELSE
1128 lsl = lsm
1129 lsm = line_search_count
1130 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1131 END IF
1132 ELSE
1133 IF (pos(line_search_count) < pos(lsm)) THEN
1134 IF (energy(line_search_count) > energy(lsm)) THEN
1135 lsr = lsm
1136 lsm = line_search_count
1137 ELSE
1138 lsl = line_search_count
1139 END IF
1140 ELSE
1141 IF (energy(line_search_count) > energy(lsm)) THEN
1142 lsl = lsm
1143 lsm = line_search_count
1144 ELSE
1145 lsr = line_search_count
1146 END IF
1147 END IF
1148 IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
1149 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1150 ELSE
1151 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1152 END IF
1153 IF ((pos(lsr) - pos(lsl)) < 1.0e-3_dp*pos(lsr)) THEN
1154 new_direction = .true.
1155 END IF
1156 END IF ! lsr .eq. 0
1157 END IF ! line_search_count .eq. 50
1158 ! now go to the suggested point
1159 ds = pos(line_search_count + 1) - pos(line_search_count)
1160 !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1161 IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1)) THEN
1162 new_direction = .true.
1163 ds_min = 0.5_dp/alpha
1164 ELSEIF (abs(ds) > 10.0_dp) THEN
1165 new_direction = .true.
1166 ds_min = 0.5_dp/alpha
1167 ELSE
1168 ds_min = pos(line_search_count + 1)
1169 END IF
1170 END IF ! first step
1171 ! 'c_tilde = c_tilde + d*skc'
1172 CALL cp_cfm_scale(ds, skc)
1173 CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1174 ELSE
1175 IF (mintol > tol) THEN
1176 mintol = tol
1177 miniter = iter
1178 END IF
1179 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1180 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1181 avg_spread_ii = sum_spread_ii/nstate
1182 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1183 "Iteration", "Avg. Spread_ii", "Tolerance"
1184 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1185 iter, avg_spread_ii, tol
1186 CALL m_flush(unit_nr)
1187 END IF
1188 IF (tol < eps_localization) EXIT
1189 END IF ! nextra > 0
1190
1191 END DO ! iteration
1192
1193 IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1194 WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1195 WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1196 CALL m_flush(unit_nr)
1197 END IF
1198
1199 CALL cp_cfm_to_fm(u, matrix_u)
1200
1201 IF (nextra > 0) THEN
1202 rmat%local_data = real(v%local_data, kind=dp)
1203 CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1204
1205 CALL cp_cfm_release(c_tilde)
1206 CALL cp_cfm_release(grad_ctilde)
1207 CALL cp_cfm_release(gct_old)
1208 CALL cp_cfm_release(skc)
1209 CALL cp_cfm_release(ul)
1210 CALL cp_cfm_release(zdiag)
1211 CALL cp_cfm_release(ctrans_lambda)
1212 CALL cp_fm_release(id_nextra)
1213 CALL cp_fm_release(vectors_all)
1214 CALL cp_cfm_release(v)
1215 CALL cp_fm_release(matrix_v)
1216 CALL cp_fm_release(matrix_v_all)
1217 CALL cp_cfm_release(vl)
1218 DEALLOCATE (arr_zii)
1219 ELSE
1220 rmat%local_data = matrix_u%local_data
1221 CALL rotate_orbitals(rmat, vectors)
1222 END IF
1223 DO idim = 1, dim2
1224 CALL cp_cfm_release(zij_0(idim))
1225 END DO
1226 DEALLOCATE (zij_0)
1227
1228 DO idim = 1, dim2
1229 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
1230 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1231 CALL cp_cfm_release(c_zij(idim))
1232 END DO
1233 DEALLOCATE (c_zij)
1234 CALL cp_fm_release(rmat)
1235 CALL cp_cfm_release(u)
1236 CALL cp_fm_release(matrix_u)
1237 DEALLOCATE (matrix_zii, sum_spread)
1238
1239 CALL timestop(handle)
1240
1241 END SUBROUTINE jacobi_cg_edf_ls
1242
1243! **************************************************************************************************
1244!> \brief ...
1245!> \param vmatrix ...
1246! **************************************************************************************************
1247 SUBROUTINE ortho_vectors(vmatrix)
1248
1249 TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1250
1251 CHARACTER(LEN=*), PARAMETER :: routinen = 'ortho_vectors'
1252
1253 INTEGER :: handle, n, ncol
1254 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1255 TYPE(cp_fm_type) :: overlap_vv
1256
1257 CALL timeset(routinen, handle)
1258
1259 NULLIFY (fm_struct_tmp)
1260
1261 CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1262
1263 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1264 para_env=vmatrix%matrix_struct%para_env, &
1265 context=vmatrix%matrix_struct%context)
1266 CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1267 CALL cp_fm_struct_release(fm_struct_tmp)
1268
1269 CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1270 CALL cp_fm_cholesky_decompose(overlap_vv)
1271 CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.true.)
1272
1273 CALL cp_fm_release(overlap_vv)
1274
1275 CALL timestop(handle)
1276
1277 END SUBROUTINE ortho_vectors
1278
1279! **************************************************************************************************
1280!> \brief ...
1281!> \param istate ...
1282!> \param jstate ...
1283!> \param st ...
1284!> \param ct ...
1285!> \param zij ...
1286! **************************************************************************************************
1287 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1288 INTEGER, INTENT(IN) :: istate, jstate
1289 REAL(kind=dp), INTENT(IN) :: st, ct
1290 TYPE(cp_cfm_type) :: zij(:)
1291
1292 INTEGER :: id
1293
1294! Locals
1295
1296 DO id = 1, SIZE(zij, 1)
1297 CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1298 CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1299 END DO
1300
1301 END SUBROUTINE rotate_zij
1302! **************************************************************************************************
1303!> \brief ...
1304!> \param istate ...
1305!> \param jstate ...
1306!> \param st ...
1307!> \param ct ...
1308!> \param rmat ...
1309! **************************************************************************************************
1310 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1311 INTEGER, INTENT(IN) :: istate, jstate
1312 REAL(kind=dp), INTENT(IN) :: st, ct
1313 TYPE(cp_cfm_type), INTENT(IN) :: rmat
1314
1315 CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1316
1317 END SUBROUTINE rotate_rmat
1318! **************************************************************************************************
1319!> \brief ...
1320!> \param mii ...
1321!> \param mjj ...
1322!> \param mij ...
1323!> \param weights ...
1324!> \param theta ...
1325!> \param grad_ij ...
1326!> \param step ...
1327! **************************************************************************************************
1328 SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1329 COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1330 REAL(kind=dp), INTENT(IN) :: weights(:)
1331 REAL(kind=dp), INTENT(OUT) :: theta
1332 REAL(kind=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1333
1334 COMPLEX(KIND=dp) :: z11, z12, z22
1335 INTEGER :: dim_m, idim
1336 REAL(kind=dp) :: a12, b12, d2, ratio
1337
1338 a12 = 0.0_dp
1339 b12 = 0.0_dp
1340 dim_m = SIZE(mii)
1341 DO idim = 1, dim_m
1342 z11 = mii(idim)
1343 z22 = mjj(idim)
1344 z12 = mij(idim)
1345 a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=dp)
1346 b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1347 0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=dp)
1348 END DO
1349 IF (abs(b12) > 1.e-10_dp) THEN
1350 ratio = -a12/b12
1351 theta = 0.25_dp*atan(ratio)
1352 ELSEIF (abs(b12) < 1.e-10_dp) THEN
1353 b12 = 0.0_dp
1354 theta = 0.0_dp
1355 ELSE
1356 theta = 0.25_dp*pi
1357 END IF
1358 IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1359! Check second derivative info
1360 d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1361 IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1362 IF (theta > 0.0_dp) THEN ! make theta as small as possible
1363 theta = theta - 0.25_dp*pi
1364 ELSE
1365 theta = theta + 0.25_dp*pi
1366 END IF
1367 END IF
1368 END SUBROUTINE get_angle
1369! **************************************************************************************************
1370!> \brief ...
1371!> \param zij ...
1372!> \param weights ...
1373!> \param tolerance ...
1374!> \param grad ...
1375! **************************************************************************************************
1376 SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1377 TYPE(cp_cfm_type) :: zij(:)
1378 REAL(kind=dp), INTENT(IN) :: weights(:)
1379 REAL(kind=dp), INTENT(OUT) :: tolerance
1380 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1381
1382 CHARACTER(len=*), PARAMETER :: routinen = 'check_tolerance'
1383
1384 INTEGER :: handle
1385 TYPE(cp_fm_type) :: force
1386
1387 CALL timeset(routinen, handle)
1388
1389! compute gradient at t=0
1390
1391 CALL cp_fm_create(force, zij(1)%matrix_struct)
1392 CALL cp_fm_set_all(force, 0._dp)
1393 CALL grad_at_0(zij, weights, force)
1394 CALL cp_fm_maxabsval(force, tolerance)
1395 IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1396 CALL cp_fm_release(force)
1397
1398 CALL timestop(handle)
1399
1400 END SUBROUTINE check_tolerance
1401
1402! **************************************************************************************************
1403!> \brief ...
1404!> \param rmat ...
1405!> \param vectors ...
1406! **************************************************************************************************
1407 SUBROUTINE rotate_orbitals(rmat, vectors)
1408 TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1409
1410 INTEGER :: k, n
1411 TYPE(cp_fm_type) :: wf
1412
1413 CALL cp_fm_create(wf, vectors%matrix_struct)
1414 CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1415 CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1416 CALL cp_fm_to_fm(wf, vectors)
1417 CALL cp_fm_release(wf)
1418 END SUBROUTINE rotate_orbitals
1419
1420! **************************************************************************************************
1421!> \brief ...
1422!> \param rmat ...
1423!> \param vectors ...
1424! **************************************************************************************************
1425 SUBROUTINE rotate_orbitals_cfm(rmat, vectors)
1426 TYPE(cp_cfm_type), INTENT(IN) :: rmat, vectors
1427
1428 INTEGER :: k, n
1429 TYPE(cp_cfm_type) :: wf
1430
1431 CALL cp_cfm_create(wf, vectors%matrix_struct)
1432 CALL cp_cfm_get_info(vectors, nrow_global=n, ncol_global=k)
1433 CALL parallel_gemm("N", "N", n, k, k, z_one, vectors, rmat, z_zero, wf)
1434 CALL cp_cfm_to_cfm(wf, vectors)
1435 CALL cp_cfm_release(wf)
1436 END SUBROUTINE rotate_orbitals_cfm
1437
1438! **************************************************************************************************
1439!> \brief ...
1440!> \param rmat ...
1441!> \param vec_all ...
1442!> \param vectors ...
1443! **************************************************************************************************
1444 SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1445 TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1446
1447 INTEGER :: k, l, n
1448 TYPE(cp_fm_type) :: wf
1449
1450 CALL cp_fm_create(wf, vectors%matrix_struct)
1451 CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1452 CALL cp_fm_get_info(rmat, ncol_global=l)
1453
1454 CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1455 CALL cp_fm_to_fm(wf, vectors)
1456 CALL cp_fm_release(wf)
1457 END SUBROUTINE rotate_orbitals_edf
1458! **************************************************************************************************
1459!> \brief ...
1460!> \param diag ...
1461!> \param weights ...
1462!> \param matrix ...
1463!> \param ndim ...
1464! **************************************************************************************************
1465 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1466 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1467 REAL(kind=dp), INTENT(IN) :: weights(:)
1468 TYPE(cp_fm_type), INTENT(IN) :: matrix
1469 INTEGER, INTENT(IN) :: ndim
1470
1471 COMPLEX(KIND=dp) :: zii, zjj
1472 INTEGER :: idim, istate, jstate, ncol_local, &
1473 nrow_global, nrow_local
1474 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1475 REAL(kind=dp) :: gradsq_ij
1476
1477 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1478 ncol_local=ncol_local, nrow_global=nrow_global, &
1479 row_indices=row_indices, col_indices=col_indices)
1480
1481 DO istate = 1, nrow_local
1482 DO jstate = 1, ncol_local
1483! get real and imaginary parts
1484 gradsq_ij = 0.0_dp
1485 DO idim = 1, ndim
1486 zii = diag(row_indices(istate), idim)
1487 zjj = diag(col_indices(jstate), idim)
1488 gradsq_ij = gradsq_ij + weights(idim)* &
1489 4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=dp)
1490 END DO
1491 matrix%local_data(istate, jstate) = gradsq_ij
1492 END DO
1493 END DO
1494 END SUBROUTINE gradsq_at_0
1495! **************************************************************************************************
1496!> \brief ...
1497!> \param matrix_p ...
1498!> \param weights ...
1499!> \param matrix ...
1500! **************************************************************************************************
1501 SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1502 TYPE(cp_cfm_type) :: matrix_p(:)
1503 REAL(kind=dp), INTENT(IN) :: weights(:)
1504 TYPE(cp_fm_type), INTENT(IN) :: matrix
1505
1506 COMPLEX(KIND=dp) :: zii, zij, zjj
1507 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1508 INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1509 nrow_global, nrow_local
1510 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1511 REAL(kind=dp) :: grad_ij
1512
1513 NULLIFY (diag)
1514 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1515 ncol_local=ncol_local, nrow_global=nrow_global, &
1516 row_indices=row_indices, col_indices=col_indices)
1517 dim_m = SIZE(matrix_p, 1)
1518 ALLOCATE (diag(nrow_global, dim_m))
1519
1520 DO idim = 1, dim_m
1521 DO istate = 1, nrow_global
1522 CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1523 END DO
1524 END DO
1525
1526 DO istate = 1, nrow_local
1527 DO jstate = 1, ncol_local
1528! get real and imaginary parts
1529 grad_ij = 0.0_dp
1530 DO idim = 1, dim_m
1531 zii = diag(row_indices(istate), idim)
1532 zjj = diag(col_indices(jstate), idim)
1533 zij = matrix_p(idim)%local_data(istate, jstate)
1534 grad_ij = grad_ij + weights(idim)* &
1535 REAL(4.0_dp*conjg(zij)*(zjj - zii), dp)
1536 END DO
1537 matrix%local_data(istate, jstate) = grad_ij
1538 END DO
1539 END DO
1540 DEALLOCATE (diag)
1541 END SUBROUTINE grad_at_0
1542
1543! return energy and maximum gradient in the current point
1544! **************************************************************************************************
1545!> \brief ...
1546!> \param weights ...
1547!> \param zij ...
1548!> \param tolerance ...
1549!> \param value ...
1550! **************************************************************************************************
1551 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1552 REAL(kind=dp), INTENT(IN) :: weights(:)
1553 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1554 REAL(kind=dp) :: tolerance, value
1555
1556 COMPLEX(KIND=dp) :: kii, kij, kjj
1557 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1558 INTEGER :: idim, istate, jstate, ncol_local, &
1559 nrow_global, nrow_local
1560 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1561 REAL(kind=dp) :: grad_ij, ra, rb
1562
1563 NULLIFY (diag)
1564 CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1565 ncol_local=ncol_local, nrow_global=nrow_global, &
1566 row_indices=row_indices, col_indices=col_indices)
1567 ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1568 value = 0.0_dp
1569 DO idim = 1, SIZE(zij, 2)
1570 DO istate = 1, nrow_global
1571 CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1572 CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1573 diag(istate, idim) = cmplx(ra, rb, dp)
1574 value = value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1575 END DO
1576 END DO
1577 tolerance = 0.0_dp
1578 DO istate = 1, nrow_local
1579 DO jstate = 1, ncol_local
1580 grad_ij = 0.0_dp
1581 DO idim = 1, SIZE(zij, 2)
1582 kii = diag(row_indices(istate), idim)
1583 kjj = diag(col_indices(jstate), idim)
1584 ra = zij(1, idim)%local_data(istate, jstate)
1585 rb = zij(2, idim)%local_data(istate, jstate)
1586 kij = cmplx(ra, rb, dp)
1587 grad_ij = grad_ij + weights(idim)* &
1588 REAL(4.0_dp*conjg(kij)*(kjj - kii), dp)
1589 END DO
1590 tolerance = max(abs(grad_ij), tolerance)
1591 END DO
1592 END DO
1593 CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1594
1595 DEALLOCATE (diag)
1596
1597 END SUBROUTINE check_tolerance_new
1598
1599! **************************************************************************************************
1600!> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1601!> and rotates them all at the same time (hoping for the best of course)
1602!> \param weights ...
1603!> \param zij ...
1604!> \param vectors ...
1605!> \param max_iter ...
1606!> \param max_crazy_angle ...
1607!> \param crazy_scale ...
1608!> \param crazy_use_diag ...
1609!> \param eps_localization ...
1610!> \param iterations ...
1611!> \param converged ...
1612! **************************************************************************************************
1613 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1614 eps_localization, iterations, converged)
1615 REAL(kind=dp), INTENT(IN) :: weights(:)
1616 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1617 INTEGER, INTENT(IN) :: max_iter
1618 REAL(kind=dp), INTENT(IN) :: max_crazy_angle
1619 REAL(kind=dp) :: crazy_scale
1620 LOGICAL :: crazy_use_diag
1621 REAL(kind=dp), INTENT(IN) :: eps_localization
1622 INTEGER :: iterations
1623 LOGICAL, INTENT(out), OPTIONAL :: converged
1624
1625 CHARACTER(len=*), PARAMETER :: routinen = 'crazy_rotations'
1626 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1627 czero = (0.0_dp, 0.0_dp)
1628
1629 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1630 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1631 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1632 INTEGER :: dim2, handle, i, icol, idim, irow, &
1633 method, ncol_global, ncol_local, &
1634 norder, nrow_global, nrow_local, &
1635 nsquare, unit_nr
1636 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1637 LOGICAL :: do_emd
1638 REAL(kind=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1639 norm, ra, rb, theta, tolerance, value
1640 REAL(kind=dp), DIMENSION(:), POINTER :: evals
1641 TYPE(cp_cfm_type) :: cmat_a, cmat_r, cmat_t1
1642 TYPE(cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1643
1644 CALL timeset(routinen, handle)
1645 NULLIFY (row_indices, col_indices)
1646 CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1647 ncol_global=ncol_global, &
1648 row_indices=row_indices, col_indices=col_indices, &
1649 nrow_local=nrow_local, ncol_local=ncol_local)
1650
1651 limit_crazy_angle = max_crazy_angle
1652
1653 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1654 dim2 = SIZE(zij, 2)
1655 ALLOCATE (diag_z(nrow_global, dim2))
1656 ALLOCATE (evals(nrow_global))
1657 ALLOCATE (evals_exp(nrow_global))
1658
1659 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1660 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1661 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1662
1663 CALL cp_fm_create(mat_u, zij(1, 1)%matrix_struct)
1664 CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1665 CALL cp_fm_create(mat_r, zij(1, 1)%matrix_struct)
1666
1667 CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1668
1669 CALL cp_fm_set_all(mat_r, 0.0_dp, 1.0_dp)
1670 CALL cp_fm_set_all(mat_t, 0.0_dp)
1671 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1672 DO idim = 1, dim2
1673 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1674 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1675 END DO
1676 CALL cp_fm_syevd(mat_t, mat_u, evals)
1677 DO idim = 1, dim2
1678 ! rotate z's
1679 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1680 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1681 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1682 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1683 END DO
1684 ! collect rotation matrix
1685 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1686 CALL cp_fm_to_fm(mat_t, mat_r)
1687
1688 unit_nr = -1
1689 IF (cmat_a%matrix_struct%para_env%is_source()) THEN
1691 WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1692 "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1693 END IF
1694
1695 iterations = 0
1696 tolerance = 1.0_dp
1697
1698 DO
1699 iterations = iterations + 1
1700 DO idim = 1, dim2
1701 DO i = 1, nrow_global
1702 CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1703 CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1704 diag_z(i, idim) = cmplx(ra, rb, dp)
1705 END DO
1706 END DO
1707 DO irow = 1, nrow_local
1708 DO icol = 1, ncol_local
1709 DO idim = 1, dim2
1710 ra = zij(1, idim)%local_data(irow, icol)
1711 rb = zij(2, idim)%local_data(irow, icol)
1712 mij(idim) = cmplx(ra, rb, dp)
1713 mii(idim) = diag_z(row_indices(irow), idim)
1714 mjj(idim) = diag_z(col_indices(icol), idim)
1715 END DO
1716 IF (row_indices(irow) /= col_indices(icol)) THEN
1717 CALL get_angle(mii, mjj, mij, weights, theta)
1718 theta = crazy_scale*theta
1719 IF (theta > limit_crazy_angle) theta = limit_crazy_angle
1720 IF (theta < -limit_crazy_angle) theta = -limit_crazy_angle
1721 IF (crazy_use_diag) THEN
1722 cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta, dp)
1723 ELSE
1724 mat_theta%local_data(irow, icol) = -theta
1725 END IF
1726 ELSE
1727 IF (crazy_use_diag) THEN
1728 cmat_a%local_data(irow, icol) = czero
1729 ELSE
1730 mat_theta%local_data(irow, icol) = 0.0_dp
1731 END IF
1732 END IF
1733 END DO
1734 END DO
1735
1736 ! construct rotation matrix U based on A using diagonalization
1737 ! alternatively, exp based on repeated squaring could be faster
1738 IF (crazy_use_diag) THEN
1739 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1740 maxeval = maxval(abs(evals))
1741 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1742 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1743 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1744 CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1745 cmat_t1, cmat_r, czero, cmat_a)
1746 mat_u%local_data = real(cmat_a%local_data, kind=dp) ! U is a real matrix
1747 ELSE
1748 do_emd = .false.
1749 method = 2
1750 eps_exp = 1.0_dp*epsilon(eps_exp)
1751 CALL cp_fm_maxabsrownorm(mat_theta, norm)
1752 maxeval = norm ! an upper bound
1753 CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1754 CALL exp_pade_real(mat_u, mat_theta, nsquare, norder)
1755 END IF
1756
1757 DO idim = 1, dim2
1758 ! rotate z's
1759 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1760 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1761 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1762 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1763 END DO
1764 ! collect rotation matrix
1765 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1766 CALL cp_fm_to_fm(mat_t, mat_r)
1767
1768 CALL check_tolerance_new(weights, zij, tolerance, value)
1769
1770 IF (unit_nr > 0) THEN
1771 WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1772 "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1773 CALL m_flush(unit_nr)
1774 END IF
1775 IF (tolerance < eps_localization .OR. iterations >= max_iter) EXIT
1776 END DO
1777
1778 IF (PRESENT(converged)) converged = (tolerance < eps_localization)
1779
1780 CALL cp_cfm_release(cmat_a)
1781 CALL cp_cfm_release(cmat_r)
1782 CALL cp_cfm_release(cmat_t1)
1783
1784 CALL cp_fm_release(mat_u)
1785 CALL cp_fm_release(mat_t)
1786 CALL cp_fm_release(mat_theta)
1787
1788 CALL rotate_orbitals(mat_r, vectors)
1789
1790 CALL cp_fm_release(mat_r)
1791 DEALLOCATE (evals_exp, evals, diag_z)
1792 DEALLOCATE (mii, mij, mjj)
1793
1794 CALL timestop(handle)
1795
1796 END SUBROUTINE crazy_rotations
1797
1798! **************************************************************************************************
1799!> \brief use the exponential parametrization as described in to perform a direct mini
1800!> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1801!> none of the input is modified for the time being, just finds the rotations
1802!> that minimizes, and throws it away afterwards :-)
1803!> apart from being expensive and not cleaned, this works fine
1804!> useful to try different spread functionals
1805!> \param weights ...
1806!> \param zij ...
1807!> \param vectors ...
1808!> \param max_iter ...
1809!> \param eps_localization ...
1810!> \param iterations ...
1811! **************************************************************************************************
1812 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1813 REAL(kind=dp), INTENT(IN) :: weights(:)
1814 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1815 INTEGER, INTENT(IN) :: max_iter
1816 REAL(kind=dp), INTENT(IN) :: eps_localization
1817 INTEGER :: iterations
1818
1819 CHARACTER(len=*), PARAMETER :: routinen = 'direct_mini'
1820 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1821 czero = (0.0_dp, 0.0_dp)
1822 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
1823
1824 COMPLEX(KIND=dp) :: lk, ll, tmp
1825 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1826 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1827 INTEGER :: handle, i, icol, idim, irow, &
1828 line_search_count, line_searches, lsl, &
1829 lsm, lsr, n, ncol_local, ndim, &
1830 nrow_local, output_unit
1831 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1832 LOGICAL :: new_direction
1833 REAL(kind=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1834 fb, fc, nom, normg, normg_cross, &
1835 normg_old, npos, omega, tol, val, x0, &
1836 x1, xa, xb, xc
1837 REAL(kind=dp), DIMENSION(150) :: energy, grad, pos
1838 REAL(kind=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1839 TYPE(cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1840 cmat_t2, cmat_u
1841 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1842 TYPE(cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1843 matrix_g_search, matrix_h, matrix_r, &
1844 matrix_t
1845
1846 NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1847
1848 CALL timeset(routinen, handle)
1849 output_unit = cp_logger_get_default_io_unit()
1850
1851 n = zij(1, 1)%matrix_struct%nrow_global
1852 ndim = (SIZE(zij, 2))
1853
1854 IF (output_unit > 0) THEN
1855 WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1856 WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1857 END IF
1858
1859 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1860 ALLOCATE (c_zij(ndim))
1861
1862 ! create the three complex matrices Z
1863 DO idim = 1, ndim
1864 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1865 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1866 zij(2, idim)%local_data, dp)
1867 END DO
1868
1869 CALL cp_fm_create(matrix_a, zij(1, 1)%matrix_struct)
1870 CALL cp_fm_create(matrix_g, zij(1, 1)%matrix_struct)
1871 CALL cp_fm_create(matrix_t, zij(1, 1)%matrix_struct)
1872 CALL cp_fm_create(matrix_h, zij(1, 1)%matrix_struct)
1873 CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1874 CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1875 CALL cp_fm_create(matrix_r, zij(1, 1)%matrix_struct)
1876 CALL cp_fm_set_all(matrix_r, 0.0_dp, 1.0_dp)
1877
1878 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1879! CALL cp_fm_init_random ( matrix_A )
1880
1881 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1882 CALL cp_cfm_create(cmat_u, zij(1, 1)%matrix_struct)
1883 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1884 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1885 CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1886 CALL cp_cfm_create(cmat_b, zij(1, 1)%matrix_struct)
1887 CALL cp_cfm_create(cmat_m, zij(1, 1)%matrix_struct)
1888
1889 CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1890 row_indices=row_indices, col_indices=col_indices)
1891
1892 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1893 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1894 normg_old = 1.0e30_dp
1895 ds_min = 1.0_dp
1896 new_direction = .true.
1897 iterations = 0
1898 line_searches = 0
1899 line_search_count = 0
1900 DO
1901 iterations = iterations + 1
1902 ! compute U,R,evals given A
1903 cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data, dp) ! cmat_A is hermitian, evals are reals
1904 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1905 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1906 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1907 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1908 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1909 cmat_u%local_data = real(cmat_u%local_data, kind=dp) ! enforce numerics, U is a real matrix
1910
1911 IF (new_direction .AND. mod(line_searches, 20) == 5) THEN ! reset with A .eq. 0
1912 DO idim = 1, ndim
1913 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1914 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1915 END DO
1916 ! collect rotation matrix
1917 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
1918 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1919 CALL cp_fm_to_fm(matrix_t, matrix_r)
1920
1921 CALL cp_cfm_set_all(cmat_u, czero, cone)
1922 CALL cp_cfm_set_all(cmat_r, czero, cone)
1923 CALL cp_cfm_set_all(cmat_a, czero)
1924 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1925 evals(:) = 0.0_dp
1926 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1927 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1928 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1929 normg_old = 1.0e30_dp
1930 END IF
1931
1932 ! compute Omega and M
1933 CALL cp_cfm_set_all(cmat_m, czero)
1934 omega = 0.0_dp
1935 DO idim = 1, ndim
1936 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1) ! t1=ZU
1937 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1938 DO i = 1, n
1939 CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1940 SELECT CASE (2) ! allows for selection of different spread functionals
1941 CASE (1)
1942 fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1943 fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1944 CASE (2) ! corresponds to the jacobi setup
1945 fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1946 fvald(i) = -weights(idim)
1947 END SELECT
1948 omega = omega + fval(i)
1949 END DO
1950 DO icol = 1, ncol_local
1951 DO irow = 1, nrow_local
1952 tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1953 cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1954 + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=dp)
1955 END DO
1956 END DO
1957 END DO
1958
1959 ! compute Hessian diagonal approximation for the preconditioner
1960 IF (.true.) THEN
1961 CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1962 ELSE
1963 CALL cp_fm_set_all(matrix_h, 1.0_dp)
1964 END IF
1965
1966 ! compute B
1967 DO icol = 1, ncol_local
1968 DO irow = 1, nrow_local
1969 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1970 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1971 IF (abs(ll - lk) < 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1972 tmp = 1.0_dp
1973 cmat_b%local_data(irow, icol) = 0.0_dp
1974 DO i = 1, 16
1975 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1976 tmp = tmp*(ll - lk)/(i + 1)
1977 END DO
1978 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1979 ELSE
1980 cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1981 END IF
1982 END DO
1983 END DO
1984 ! compute gradient matrix_G
1985
1986 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1) ! t1=(M^T)(R^T)
1987 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1988 CALL cp_cfm_schur_product(cmat_t2, cmat_b, cmat_t1)
1989 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1990 CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1991 matrix_g%local_data = real(cmat_t1%local_data, kind=dp)
1992 CALL cp_fm_transpose(matrix_g, matrix_t)
1993 CALL cp_fm_scale_and_add(-1.0_dp, matrix_g, 1.0_dp, matrix_t)
1994 CALL cp_fm_maxabsval(matrix_g, tol)
1995
1996 ! from here on, minimizing technology
1997 IF (new_direction) THEN
1998 ! energy converged up to machine precision ?
1999 line_searches = line_searches + 1
2000 ! DO i=1,line_search_count
2001 ! write(15,*) pos(i),energy(i)
2002 ! ENDDO
2003 ! write(15,*) ""
2004 ! CALL m_flush(15)
2005 !write(16,*) evals(:)
2006 !write(17,*) matrix_A%local_data(:,:)
2007 !write(18,*) matrix_G%local_data(:,:)
2008 IF (output_unit > 0) THEN
2009 WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
2010 CALL m_flush(output_unit)
2011 END IF
2012 IF (tol < eps_localization .OR. iterations > max_iter) EXIT
2013
2014 IF (.true.) THEN ! do conjugate gradient CG
2015 CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
2016 normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
2017 ! apply the preconditioner
2018 DO icol = 1, ncol_local
2019 DO irow = 1, nrow_local
2020 matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
2021 END DO
2022 END DO
2023 CALL cp_fm_trace(matrix_g, matrix_g_old, normg)
2024 normg = normg*0.5_dp
2025 beta_pr = (normg - normg_cross)/normg_old
2026 normg_old = normg
2027 beta_pr = max(beta_pr, 0.0_dp)
2028 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2029 CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2030 IF (normg_cross >= 0) THEN ! back to SD
2031 IF (matrix_a%matrix_struct%para_env%is_source()) THEN
2032 WRITE (cp_logger_get_default_unit_nr(), *) "!"
2033 END IF
2034 beta_pr = 0.0_dp
2035 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2036 END IF
2037 ELSE ! SD
2038 CALL cp_fm_scale_and_add(0.0_dp, matrix_g_search, -1.0_dp, matrix_g)
2039 END IF
2040 ! ds_min=1.0E-4_dp
2041 line_search_count = 0
2042 END IF
2043 line_search_count = line_search_count + 1
2044 energy(line_search_count) = omega
2045
2046 ! line search section
2047 SELECT CASE (3)
2048 CASE (1) ! two point line search
2049 SELECT CASE (line_search_count)
2050 CASE (1)
2051 pos(1) = 0.0_dp
2052 pos(2) = ds_min
2053 CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2054 grad(1) = grad(1)/2.0_dp
2055 new_direction = .false.
2056 CASE (2)
2057 new_direction = .true.
2058 x0 = pos(1) ! 0.0_dp
2059 c = energy(1)
2060 b = grad(1)
2061 x1 = pos(2)
2062 a = (energy(2) - b*x1 - c)/(x1**2)
2063 IF (a <= 0.0_dp) a = 1.0e-15_dp
2064 npos = -b/(2.0_dp*a)
2065 val = a*npos**2 + b*npos + c
2066 IF (val < energy(1) .AND. val <= energy(2)) THEN
2067 ! we go to a minimum, but ...
2068 ! we take a guard against too large steps
2069 pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2070 ELSE ! just take an extended step
2071 pos(3) = maxval(pos(1:2))*2.0_dp
2072 END IF
2073 END SELECT
2074 CASE (2) ! 3 point line search
2075 SELECT CASE (line_search_count)
2076 CASE (1)
2077 new_direction = .false.
2078 pos(1) = 0.0_dp
2079 pos(2) = ds_min*0.8_dp
2080 CASE (2)
2081 new_direction = .false.
2082 IF (energy(2) > energy(1)) THEN
2083 pos(3) = ds_min*0.7_dp
2084 ELSE
2085 pos(3) = ds_min*1.4_dp
2086 END IF
2087 CASE (3)
2088 new_direction = .true.
2089 xa = pos(1)
2090 xb = pos(2)
2091 xc = pos(3)
2092 fa = energy(1)
2093 fb = energy(2)
2094 fc = energy(3)
2095 nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2096 denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2097 IF (abs(denom) <= 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa))) THEN
2098 npos = xb
2099 ELSE
2100 npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2101 END IF
2102 val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2103 (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2104 (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2105 IF (val < fa .AND. val <= fb .AND. val <= fc) THEN ! OK, we go to a minimum
2106 ! we take a guard against too large steps
2107 pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2108 min(npos, maxval(pos(1:3))*4.0_dp))
2109 ELSE ! just take an extended step
2110 pos(4) = maxval(pos(1:3))*2.0_dp
2111 END IF
2112 END SELECT
2113 CASE (3) ! golden section hunt
2114 new_direction = .false.
2115 IF (line_search_count == 1) THEN
2116 lsl = 1
2117 lsr = 0
2118 lsm = 1
2119 pos(1) = 0.0_dp
2120 pos(2) = ds_min/gold_sec
2121 ELSE
2122 IF (line_search_count == 150) cpabort("Too many")
2123 IF (lsr == 0) THEN
2124 IF (energy(line_search_count - 1) < energy(line_search_count)) THEN
2125 lsr = line_search_count
2126 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2127 ELSE
2128 lsl = lsm
2129 lsm = line_search_count
2130 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2131 END IF
2132 ELSE
2133 IF (pos(line_search_count) < pos(lsm)) THEN
2134 IF (energy(line_search_count) < energy(lsm)) THEN
2135 lsr = lsm
2136 lsm = line_search_count
2137 ELSE
2138 lsl = line_search_count
2139 END IF
2140 ELSE
2141 IF (energy(line_search_count) < energy(lsm)) THEN
2142 lsl = lsm
2143 lsm = line_search_count
2144 ELSE
2145 lsr = line_search_count
2146 END IF
2147 END IF
2148 IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
2149 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2150 ELSE
2151 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2152 END IF
2153 IF ((pos(lsr) - pos(lsl)) < 1.0e-3_dp*pos(lsr)) THEN
2154 new_direction = .true.
2155 END IF
2156 END IF ! lsr .eq. 0
2157 END IF ! first step
2158 END SELECT
2159 ! now go to the suggested point
2160 ds_min = pos(line_search_count + 1)
2161 ds = pos(line_search_count + 1) - pos(line_search_count)
2162 CALL cp_fm_scale_and_add(1.0_dp, matrix_a, ds, matrix_g_search)
2163 END DO
2164
2165 ! collect rotation matrix
2166 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
2167 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2168 CALL cp_fm_to_fm(matrix_t, matrix_r)
2169 CALL rotate_orbitals(matrix_r, vectors)
2170 CALL cp_fm_release(matrix_r)
2171
2172 CALL cp_fm_release(matrix_a)
2173 CALL cp_fm_release(matrix_g)
2174 CALL cp_fm_release(matrix_h)
2175 CALL cp_fm_release(matrix_t)
2176 CALL cp_fm_release(matrix_g_search)
2177 CALL cp_fm_release(matrix_g_old)
2178 CALL cp_cfm_release(cmat_a)
2179 CALL cp_cfm_release(cmat_u)
2180 CALL cp_cfm_release(cmat_r)
2181 CALL cp_cfm_release(cmat_t1)
2182 CALL cp_cfm_release(cmat_t2)
2183 CALL cp_cfm_release(cmat_b)
2184 CALL cp_cfm_release(cmat_m)
2185
2186 DEALLOCATE (evals, evals_exp, fval, fvald)
2187
2188 DO idim = 1, SIZE(c_zij)
2189 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
2190 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2191 CALL cp_cfm_release(c_zij(idim))
2192 END DO
2193 DEALLOCATE (c_zij)
2194 DEALLOCATE (diag_z)
2195
2196 CALL timestop(handle)
2197
2198 END SUBROUTINE direct_mini
2199
2200! **************************************************************************************************
2201!> \brief Parallel algorithm for jacobi rotations
2202!> \param weights ...
2203!> \param zij ...
2204!> \param vectors ...
2205!> \param para_env ...
2206!> \param max_iter ...
2207!> \param eps_localization ...
2208!> \param sweeps ...
2209!> \param out_each ...
2210!> \param target_time ...
2211!> \param start_time ...
2212!> \param restricted ...
2213!> \par History
2214!> use allgather for improved performance
2215!> \author MI (11.2009)
2216! **************************************************************************************************
2217 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2218 sweeps, out_each, target_time, start_time, restricted)
2219
2220 REAL(kind=dp), INTENT(IN) :: weights(:)
2221 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2222 TYPE(mp_para_env_type), POINTER :: para_env
2223 INTEGER, INTENT(IN) :: max_iter
2224 REAL(kind=dp), INTENT(IN) :: eps_localization
2225 INTEGER :: sweeps
2226 INTEGER, INTENT(IN) :: out_each
2227 REAL(dp) :: target_time, start_time
2228 INTEGER :: restricted
2229
2230 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para'
2231
2232 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2233 nblock, nblock_max, ns_me, nstate, &
2234 output_unit
2235 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2236 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2237 REAL(kind=dp) :: xstate
2238 TYPE(cp_fm_type) :: rmat
2239 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2240
2241 CALL timeset(routinen, handle)
2242
2243 output_unit = cp_logger_get_default_io_unit()
2244
2245 NULLIFY (cz_ij_loc)
2246
2247 dim2 = SIZE(zij, 2)
2248
2249 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2250 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2251
2252 CALL cp_fm_get_info(rmat, nrow_global=nstate)
2253
2254 IF (restricted > 0) THEN
2255 IF (output_unit > 0) THEN
2256 WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2257 END IF
2258 nstate = nstate - restricted
2259 END IF
2260
2261 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2262 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2263 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2264 DO ip = 1, para_env%num_pe
2265 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2266 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2267 END DO
2268 nblock_max = 0
2269 DO ip = 0, para_env%num_pe - 1
2270 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2271 nblock_max = max(nblock_max, nblock)
2272 END DO
2273
2274 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2275 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2276 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2277 ALLOCATE (cz_ij_loc(dim2))
2278 DO idim = 1, dim2
2279 DO ip = 0, para_env%num_pe - 1
2280 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2281 CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2282 CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2283 IF (para_env%mepos == ip) THEN
2284 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2285 DO i = 1, nblock
2286 DO j = 1, nstate
2287 cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2288 END DO
2289 END DO
2290 END IF
2291 END DO ! ip
2292 END DO
2293 DEALLOCATE (z_ij_loc_re)
2294 DEALLOCATE (z_ij_loc_im)
2295
2296 ALLOCATE (rotmat(nstate, 2*nblock_max))
2297
2298 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2299 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2300 target_time=target_time, start_time=start_time)
2301
2302 ilow1 = ns_bound(para_env%mepos, 1)
2303 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2304 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2305 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2306 DO idim = 1, dim2
2307 DO ip = 0, para_env%num_pe - 1
2308 z_ij_loc_re = 0.0_dp
2309 z_ij_loc_im = 0.0_dp
2310 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2311 IF (ip == para_env%mepos) THEN
2312 ns_me = nblock
2313 DO i = 1, ns_me
2314 ii = ilow1 + i - 1
2315 DO j = 1, nstate
2316 z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i), dp)
2317 z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2318 END DO
2319 END DO
2320 END IF
2321 CALL para_env%bcast(z_ij_loc_re, ip)
2322 CALL para_env%bcast(z_ij_loc_im, ip)
2323 CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2324 CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2325 END DO ! ip
2326 END DO
2327
2328 DO ip = 0, para_env%num_pe - 1
2329 z_ij_loc_re = 0.0_dp
2330 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2331 IF (ip == para_env%mepos) THEN
2332 ns_me = nblock
2333 DO i = 1, ns_me
2334 ii = ilow1 + i - 1
2335 DO j = 1, nstate
2336 z_ij_loc_re(j, i) = rotmat(j, i)
2337 END DO
2338 END DO
2339 END IF
2340 CALL para_env%bcast(z_ij_loc_re, ip)
2341 CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2342 END DO
2343
2344 DEALLOCATE (z_ij_loc_re)
2345 DEALLOCATE (z_ij_loc_im)
2346 DO idim = 1, dim2
2347 DEALLOCATE (cz_ij_loc(idim)%c_array)
2348 END DO
2349 DEALLOCATE (cz_ij_loc)
2350
2351 CALL para_env%sync()
2352 CALL rotate_orbitals(rmat, vectors)
2353 CALL cp_fm_release(rmat)
2354
2355 DEALLOCATE (rotmat)
2356 DEALLOCATE (ns_bound)
2357
2358 CALL timestop(handle)
2359
2360 END SUBROUTINE jacobi_rot_para
2361
2362! **************************************************************************************************
2363!> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2364!> \param weights ...
2365!> \param czij ...
2366!> \param para_env ...
2367!> \param max_iter ...
2368!> \param rmat ...
2369!> \param tol_out ...
2370!> \author Soumya Ghosh (08/21)
2371! **************************************************************************************************
2372 SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2373
2374 REAL(kind=dp), INTENT(IN) :: weights(:)
2375 TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2376 TYPE(mp_para_env_type), POINTER :: para_env
2377 INTEGER, INTENT(IN) :: max_iter
2378 TYPE(cp_cfm_type), INTENT(IN) :: rmat
2379 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2380
2381 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para_1'
2382
2383 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2384 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2385 nblock, nblock_max, ns_me, nstate, &
2386 sweeps
2387 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2388 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2389 REAL(kind=dp) :: xstate
2390 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2391
2392 CALL timeset(routinen, handle)
2393
2394 dim2 = SIZE(czij)
2395
2396 CALL cp_cfm_set_all(rmat, cmplx(0._dp, 0._dp, dp), cmplx(1._dp, 0._dp, dp))
2397
2398 CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2399
2400 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2401 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2402 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2403 DO ip = 1, para_env%num_pe
2404 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2405 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2406 END DO
2407 nblock_max = 0
2408 DO ip = 0, para_env%num_pe - 1
2409 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2410 nblock_max = max(nblock_max, nblock)
2411 END DO
2412
2413 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2414 ALLOCATE (czij_array(nstate, nblock_max))
2415 ALLOCATE (cz_ij_loc(dim2))
2416 DO idim = 1, dim2
2417 DO ip = 0, para_env%num_pe - 1
2418 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2419 ! cfm --> allocatable
2420 CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2421 IF (para_env%mepos == ip) THEN
2422 ns_me = nblock
2423 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2424 DO i = 1, ns_me
2425 DO j = 1, nstate
2426 cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2427 END DO
2428 END DO
2429 END IF
2430 END DO ! ip
2431 END DO
2432 DEALLOCATE (czij_array)
2433
2434 ALLOCATE (rotmat(nstate, 2*nblock_max))
2435
2436 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2437 cz_ij_loc, rotmat, 0, tol_out=tol_out)
2438
2439 ilow1 = ns_bound(para_env%mepos, 1)
2440 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2441 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2442
2443 DO ip = 0, para_env%num_pe - 1
2444 z_ij_loc_re = 0.0_dp
2445 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2446 IF (ip == para_env%mepos) THEN
2447 ns_me = nblock
2448 DO i = 1, ns_me
2449 ii = ilow1 + i - 1
2450 DO j = 1, nstate
2451 z_ij_loc_re(j, i) = rotmat(j, i)
2452 END DO
2453 END DO
2454 END IF
2455 CALL para_env%bcast(z_ij_loc_re, ip)
2456 CALL cp_cfm_set_submatrix(rmat, cmplx(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2457 END DO
2458
2459 DEALLOCATE (z_ij_loc_re)
2460 DO idim = 1, dim2
2461 DEALLOCATE (cz_ij_loc(idim)%c_array)
2462 END DO
2463 DEALLOCATE (cz_ij_loc)
2464
2465 CALL para_env%sync()
2466
2467 DEALLOCATE (rotmat)
2468 DEALLOCATE (ns_bound)
2469
2470 CALL timestop(handle)
2471
2472 END SUBROUTINE jacobi_rot_para_1
2473
2474! **************************************************************************************************
2475!> \brief Parallel algorithm for jacobi rotations
2476!> \param weights ...
2477!> \param para_env ...
2478!> \param max_iter ...
2479!> \param sweeps ...
2480!> \param out_each ...
2481!> \param dim2 ...
2482!> \param nstate ...
2483!> \param nblock_max ...
2484!> \param ns_bound ...
2485!> \param cz_ij_loc ...
2486!> \param rotmat ...
2487!> \param output_unit ...
2488!> \param tol_out ...
2489!> \param eps_localization ...
2490!> \param target_time ...
2491!> \param start_time ...
2492!> \par History
2493!> split out to reuse with different input types
2494!> \author HF (05.2022)
2495! **************************************************************************************************
2496 SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2497 ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2498
2499 REAL(kind=dp), INTENT(IN) :: weights(:)
2500 TYPE(mp_para_env_type), POINTER :: para_env
2501 INTEGER, INTENT(IN) :: max_iter
2502 INTEGER, INTENT(OUT) :: sweeps
2503 INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2504 INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2505 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2506 REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2507 INTEGER, INTENT(IN) :: output_unit
2508 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2509 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
2510 REAL(dp), OPTIONAL :: target_time, start_time
2511
2512 COMPLEX(KIND=dp) :: zi, zj
2513 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2514 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2515 INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2516 ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2517 iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2518 ns_recv_from, ns_recv_partner
2519 INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2520 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2521 LOGICAL :: should_stop
2522 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2523 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2524 REAL(kind=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2525 t2, theta, tolerance, zc, zr
2526 TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2527 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2528
2529 NULLIFY (zdiag_all, zdiag_me)
2530 NULLIFY (xyz_mix, xyz_mix_ns)
2531 NULLIFY (mii, mij, mjj)
2532
2533 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2534
2535 ALLOCATE (rcount(para_env%num_pe), stat=istat)
2536 ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2537
2538 tolerance = 1.0e10_dp
2539 sweeps = 0
2540
2541 ! number of processor pairs and number of permutations
2542 npair = (para_env%num_pe + 1)/2
2543 nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2544 ALLOCATE (list_pair(2, npair))
2545
2546 ! initialize rotation matrix
2547 rotmat = 0.0_dp
2548 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2549 ii = i - ns_bound(para_env%mepos, 1) + 1
2550 rotmat(i, ii) = 1.0_dp
2551 END DO
2552
2553 ALLOCATE (xyz_mix(dim2))
2554 ALLOCATE (xyz_mix_ns(dim2))
2555 ALLOCATE (zdiag_me(dim2))
2556 ALLOCATE (zdiag_all(dim2))
2557
2558 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2559 IF (ns_me /= 0) THEN
2560 ALLOCATE (c_array_me(nstate, ns_me, dim2))
2561 DO idim = 1, dim2
2562 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2563 END DO
2564 ALLOCATE (gmat(nstate, ns_me))
2565 END IF
2566
2567 DO idim = 1, dim2
2568 ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2569 zdiag_me(idim)%c_array = z_zero
2570 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2571 zdiag_all(idim)%c_array = z_zero
2572 END DO
2573 ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2574 ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2575
2576 ! buffer for message passing
2577 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2578
2579 IF (output_unit > 0) THEN
2580 WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2581 WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2582 END IF
2583
2584 DO lsweep = 1, max_iter + 1
2585 sweeps = lsweep
2586 IF (sweeps == max_iter + 1) THEN
2587 IF (output_unit > 0) THEN
2588 WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2589 WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2590 END IF
2591 EXIT
2592 END IF
2593 t1 = m_walltime()
2594
2595 DO iperm = 1, nperm
2596
2597 ! fix partners for this permutation, and get the number of states
2598 CALL eberlein(iperm, para_env, list_pair)
2599 ip_partner = -1
2600 ns_partner = 0
2601 DO ipair = 1, npair
2602 IF (list_pair(1, ipair) == para_env%mepos) THEN
2603 ip_partner = list_pair(2, ipair)
2604 EXIT
2605 ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2606 ip_partner = list_pair(1, ipair)
2607 EXIT
2608 END IF
2609 END DO
2610 IF (ip_partner >= 0) THEN
2611 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2612 ELSE
2613 ns_partner = 0
2614 END IF
2615
2616 ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2617 IF (ns_partner*ns_me /= 0) THEN
2618
2619 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2620 rmat_loc = 0.0_dp
2621 DO i = 1, ns_me + ns_partner
2622 rmat_loc(i, i) = 1.0_dp
2623 END DO
2624
2625 ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2626
2627 DO idim = 1, dim2
2628 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2629 DO i = 1, ns_me
2630 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2631 END DO
2632 END DO
2633
2634 CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2635 msgout=c_array_partner, source=ip_partner)
2636
2637 n1 = ns_me
2638 n2 = ns_partner
2639 ilow1 = ns_bound(para_env%mepos, 1)
2640 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2641 ilow2 = ns_bound(ip_partner, 1)
2642 iup2 = ns_bound(ip_partner, 1) + n2 - 1
2643 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2644 il1 = 1
2645 iu1 = n1
2646 iu1 = n1
2647 il2 = 1 + n1
2648 iu2 = n1 + n2
2649 ELSE
2650 il1 = 1 + n2
2651 iu1 = n1 + n2
2652 iu1 = n1 + n2
2653 il2 = 1
2654 iu2 = n2
2655 END IF
2656
2657 DO idim = 1, dim2
2658 DO i = 1, n1
2659 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2660 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2661 END DO
2662 DO i = 1, n2
2663 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2664 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2665 END DO
2666 END DO
2667
2668 DO istate = 1, n1 + n2
2669 DO jstate = istate + 1, n1 + n2
2670 DO idim = 1, dim2
2671 mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2672 mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2673 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2674 END DO
2675 CALL get_angle(mii, mjj, mij, weights, theta)
2676 st = sin(theta)
2677 ct = cos(theta)
2678 DO idim = 1, dim2
2679 DO i = 1, n1 + n2
2680 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2681 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2682 xyz_mix(idim)%c_array(i, istate) = zi
2683 xyz_mix(idim)%c_array(i, jstate) = zj
2684 END DO
2685 DO i = 1, n1 + n2
2686 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2687 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2688 xyz_mix(idim)%c_array(istate, i) = zi
2689 xyz_mix(idim)%c_array(jstate, i) = zj
2690 END DO
2691 END DO
2692
2693 DO i = 1, n1 + n2
2694 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2695 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2696 rmat_loc(i, istate) = ri
2697 rmat_loc(i, jstate) = rj
2698 END DO
2699 END DO
2700 END DO
2701
2702 k = nblock_max + 1
2703 CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2704 rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2705
2706 IF (ilow1 < ilow2) THEN
2707 ! no longer compiles in official sdgb:
2708 !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2709 ! probably inefficient:
2710 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2711 n2, 0.0_dp, gmat(:, :), nstate)
2712 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2713 n1 + n2, 1.0_dp, gmat(:, :), nstate)
2714 ELSE
2715 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2716 rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2717 ! no longer compiles in official sdgb:
2718 !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2719 ! probably inefficient:
2720 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2721 n1, 1.0_dp, gmat(:, :), nstate)
2722 END IF
2723
2724 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2725
2726 DO idim = 1, dim2
2727 DO i = 1, n1
2728 xyz_mix_ns(idim)%c_array(1:nstate, i) = z_zero
2729 END DO
2730
2731 DO istate = 1, n1
2732 DO jstate = 1, nstate
2733 DO i = 1, n2
2734 xyz_mix_ns(idim)%c_array(jstate, istate) = &
2735 xyz_mix_ns(idim)%c_array(jstate, istate) + &
2736 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2737 END DO
2738 END DO
2739 END DO
2740 DO istate = 1, n1
2741 DO jstate = 1, nstate
2742 DO i = 1, n1
2743 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2744 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2745 END DO
2746 END DO
2747 END DO
2748 END DO ! idim
2749
2750 DEALLOCATE (c_array_partner)
2751
2752 ELSE ! save my data
2753 DO idim = 1, dim2
2754 DO i = 1, ns_me
2755 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2756 END DO
2757 END DO
2758 END IF
2759
2760 DO idim = 1, dim2
2761 DO i = 1, ns_me
2762 cz_ij_loc(idim)%c_array(1:nstate, i) = z_zero
2763 END DO
2764 END DO
2765
2766 IF (ns_partner*ns_me /= 0) THEN
2767 ! transpose rotation matrix rmat_loc
2768 DO i = 1, ns_me + ns_partner
2769 DO j = i + 1, ns_me + ns_partner
2770 ri = rmat_loc(i, j)
2771 rmat_loc(i, j) = rmat_loc(j, i)
2772 rmat_loc(j, i) = ri
2773 END DO
2774 END DO
2775
2776 ! prepare for distribution
2777 DO i = 1, n1
2778 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2779 END DO
2780 ik = nblock_max
2781 DO i = 1, n2
2782 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2783 END DO
2784 ELSE
2785 rmat_send = 0.0_dp
2786 END IF
2787
2788 ! collect data from all tasks (this takes some significant time)
2789 CALL para_env%allgather(rmat_send, rmat_recv_all)
2790
2791 ! update blocks everywhere
2792 DO ip = 0, para_env%num_pe - 1
2793
2794 ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2795 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2796
2797 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2798
2799 IF (ns_me /= 0) THEN
2800 IF (ns_recv_from /= 0) THEN
2801 !look for the partner of ip_recv_from
2802 ip_recv_partner = -1
2803 ns_recv_partner = 0
2804 DO ipair = 1, npair
2805 IF (list_pair(1, ipair) == ip_recv_from) THEN
2806 ip_recv_partner = list_pair(2, ipair)
2807 EXIT
2808 ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2809 ip_recv_partner = list_pair(1, ipair)
2810 EXIT
2811 END IF
2812 END DO
2813
2814 IF (ip_recv_partner >= 0) THEN
2815 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2816 END IF
2817 IF (ns_recv_partner > 0) THEN
2818 il1 = ns_bound(para_env%mepos, 1)
2819 il_recv = ns_bound(ip_recv_from, 1)
2820 il_recv_partner = ns_bound(ip_recv_partner, 1)
2821 ik = nblock_max
2822
2823 DO idim = 1, dim2
2824 DO i = 1, ns_recv_from
2825 ii = il_recv + i - 1
2826 DO j = 1, ns_me
2827 jj = j
2828 DO k = 1, ns_recv_from
2829 kk = il_recv + k - 1
2830 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2831 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2832 END DO
2833 END DO
2834 END DO
2835 DO i = 1, ns_recv_from
2836 ii = il_recv + i - 1
2837 DO j = 1, ns_me
2838 jj = j
2839 DO k = 1, ns_recv_partner
2840 kk = il_recv_partner + k - 1
2841 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2842 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2843 END DO
2844 END DO
2845 END DO
2846 END DO ! idim
2847 ELSE
2848 il1 = ns_bound(para_env%mepos, 1)
2849 il_recv = ns_bound(ip_recv_from, 1)
2850 DO idim = 1, dim2
2851 DO j = 1, ns_me
2852 jj = j
2853 DO i = 1, ns_recv_from
2854 ii = il_recv + i - 1
2855 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2856 END DO
2857 END DO
2858 END DO ! idim
2859 END IF
2860 END IF
2861 END IF ! ns_me
2862 END DO ! ip
2863
2864 IF (ns_partner*ns_me /= 0) THEN
2865 DEALLOCATE (rmat_loc)
2866 DO idim = 1, dim2
2867 DEALLOCATE (xyz_mix(idim)%c_array)
2868 END DO
2869 END IF
2870
2871 END DO ! iperm
2872
2873 ! calculate the max gradient
2874 DO idim = 1, dim2
2875 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2876 ii = i - ns_bound(para_env%mepos, 1) + 1
2877 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2878 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2879 END DO
2880 rcount(:) = SIZE(zdiag_me(idim)%c_array)
2881 rdispl(1) = 0
2882 DO ip = 2, para_env%num_pe
2883 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2884 END DO
2885 ! collect all the diagonal elements in a replicated 1d array
2886 CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2887 END DO
2888
2889 gmax = 0.0_dp
2890 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2891 k = j - ns_bound(para_env%mepos, 1) + 1
2892 DO i = 1, j - 1
2893 ! find the location of the diagonal element (i,i)
2894 DO ip = 0, para_env%num_pe - 1
2895 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2896 ip_has_i = ip
2897 EXIT
2898 END IF
2899 END DO
2900 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2901 ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2902 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2903 grad = 0.0_dp
2904 DO idim = 1, dim2
2905 zi = zdiag_all(idim)%c_array(ii)
2906 zj = zdiag_all(idim)%c_array(jj)
2907 grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2908 END DO
2909 gmax = max(gmax, abs(grad))
2910 END DO
2911 END DO
2912
2913 CALL para_env%max(gmax)
2914 tolerance = gmax
2915 IF (PRESENT(tol_out)) tol_out = tolerance
2916
2917 func = 0.0_dp
2918 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2919 k = i - ns_bound(para_env%mepos, 1) + 1
2920 DO idim = 1, dim2
2921 zr = real(cz_ij_loc(idim)%c_array(i, k), dp)
2922 zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2923 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2924 END DO
2925 END DO
2926 CALL para_env%sum(func)
2927 t2 = m_walltime()
2928
2929 IF (output_unit > 0 .AND. modulo(sweeps, out_each) == 0) THEN
2930 WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2931 CALL m_flush(output_unit)
2932 END IF
2933 IF (PRESENT(eps_localization)) THEN
2934 IF (tolerance < eps_localization) EXIT
2935 END IF
2936 IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2937 CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2938 IF (should_stop) EXIT
2939 END IF
2940
2941 END DO ! lsweep
2942
2943 ! buffer for message passing
2944 DEALLOCATE (rmat_recv_all)
2945
2946 DEALLOCATE (rmat_recv)
2947 DEALLOCATE (rmat_send)
2948 IF (ns_me > 0) THEN
2949 DEALLOCATE (c_array_me)
2950 END IF
2951 DO idim = 1, dim2
2952 DEALLOCATE (zdiag_me(idim)%c_array)
2953 DEALLOCATE (zdiag_all(idim)%c_array)
2954 END DO
2955 DEALLOCATE (zdiag_me)
2956 DEALLOCATE (zdiag_all)
2957 DEALLOCATE (xyz_mix)
2958 DO idim = 1, dim2
2959 IF (ns_me /= 0) THEN
2960 DEALLOCATE (xyz_mix_ns(idim)%c_array)
2961 END IF
2962 END DO
2963 DEALLOCATE (xyz_mix_ns)
2964 IF (ns_me /= 0) THEN
2965 DEALLOCATE (gmat)
2966 END IF
2967 DEALLOCATE (mii)
2968 DEALLOCATE (mij)
2969 DEALLOCATE (mjj)
2970 DEALLOCATE (list_pair)
2971
2972 END SUBROUTINE jacobi_rot_para_core
2973
2974! **************************************************************************************************
2975!> \brief ...
2976!> \param iperm ...
2977!> \param para_env ...
2978!> \param list_pair ...
2979! **************************************************************************************************
2980 SUBROUTINE eberlein(iperm, para_env, list_pair)
2981 INTEGER, INTENT(IN) :: iperm
2982 TYPE(mp_para_env_type), POINTER :: para_env
2983 INTEGER, DIMENSION(:, :) :: list_pair
2984
2985 INTEGER :: i, ii, jj, npair
2986
2987 npair = (para_env%num_pe + 1)/2
2988 IF (iperm == 1) THEN
2989!..set up initial ordering
2990 DO i = 0, para_env%num_pe - 1
2991 ii = ((i + 1) + 1)/2
2992 jj = mod((i + 1) + 1, 2) + 1
2993 list_pair(jj, ii) = i
2994 END DO
2995 IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2996 ELSEIF (mod(iperm, 2) == 0) THEN
2997!..a type shift
2998 jj = list_pair(1, npair)
2999 DO i = npair, 3, -1
3000 list_pair(1, i) = list_pair(1, i - 1)
3001 END DO
3002 list_pair(1, 2) = list_pair(2, 1)
3003 list_pair(2, 1) = jj
3004 ELSE
3005!..b type shift
3006 jj = list_pair(2, 1)
3007 DO i = 1, npair - 1
3008 list_pair(2, i) = list_pair(2, i + 1)
3009 END DO
3010 list_pair(2, npair) = jj
3011 END IF
3012
3013 END SUBROUTINE eberlein
3014
3015! **************************************************************************************************
3016!> \brief ...
3017!> \param vectors ...
3018!> \param op_sm_set ...
3019!> \param zij_fm_set ...
3020! **************************************************************************************************
3021 SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
3022
3023 TYPE(cp_fm_type), INTENT(IN) :: vectors
3024 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
3025 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
3026
3027 CHARACTER(len=*), PARAMETER :: routinen = 'zij_matrix'
3028
3029 INTEGER :: handle, i, j, nao, nmoloc
3030 TYPE(cp_fm_type) :: opvec
3031
3032 CALL timeset(routinen, handle)
3033
3034 ! get rows and cols of the input
3035 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3036 ! replicate the input kind of matrix
3037 CALL cp_fm_create(opvec, vectors%matrix_struct)
3038
3039 ! Compute zij here
3040 DO i = 1, SIZE(zij_fm_set, 2)
3041 DO j = 1, SIZE(zij_fm_set, 1)
3042 CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3043 CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3044 CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3045 zij_fm_set(j, i))
3046 END DO
3047 END DO
3048
3049 CALL cp_fm_release(opvec)
3050 CALL timestop(handle)
3051
3052 END SUBROUTINE zij_matrix
3053
3054! **************************************************************************************************
3055!> \brief ...
3056!> \param vectors ...
3057! **************************************************************************************************
3058 SUBROUTINE scdm_qrfact(vectors)
3059
3060 TYPE(cp_fm_type), INTENT(IN) :: vectors
3061
3062 CHARACTER(len=*), PARAMETER :: routinen = 'scdm_qrfact'
3063
3064 INTEGER :: handle, ncolt, nrowt
3065 REAL(kind=dp), DIMENSION(:), POINTER :: tau
3066 TYPE(cp_fm_struct_type), POINTER :: cstruct
3067 TYPE(cp_fm_type) :: ctp, qf, tmp
3068
3069 CALL timeset(routinen, handle)
3070
3071 ! Create Transpose of Coefficient Matrix vectors
3072 nrowt = vectors%matrix_struct%ncol_global
3073 ncolt = vectors%matrix_struct%nrow_global
3074
3075 CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, &
3076 nrow_global=nrowt, ncol_global=ncolt)
3077 CALL cp_fm_create(ctp, cstruct)
3078 CALL cp_fm_struct_release(cstruct)
3079
3080 ALLOCATE (tau(nrowt))
3081
3082 CALL cp_fm_transpose(vectors, ctp)
3083
3084 ! Get QR decomposition of CTs
3085 CALL cp_fm_pdgeqpf(ctp, tau, nrowt, ncolt, 1, 1)
3086
3087 ! Construction of Q from the scalapack output
3088 CALL cp_fm_struct_create(cstruct, para_env=ctp%matrix_struct%para_env, &
3089 context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3090 ncol_global=ctp%matrix_struct%nrow_global)
3091 CALL cp_fm_create(qf, cstruct)
3092 CALL cp_fm_struct_release(cstruct)
3093 CALL cp_fm_to_fm_submat(ctp, qf, nrowt, nrowt, 1, 1, 1, 1)
3094
3095 ! Get Q
3096 CALL cp_fm_pdorgqr(qf, tau, nrowt, 1, 1)
3097
3098 ! Transform original coefficient matrix vectors
3099 CALL cp_fm_create(tmp, vectors%matrix_struct)
3100 CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3101 CALL cp_fm_to_fm(vectors, tmp)
3102 CALL parallel_gemm('N', 'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3103
3104 ! Cleanup
3105 CALL cp_fm_release(ctp)
3106 CALL cp_fm_release(tmp)
3107 CALL cp_fm_release(qf)
3108 DEALLOCATE (tau)
3109
3110 CALL timestop(handle)
3111
3112 END SUBROUTINE scdm_qrfact
3113
3114! **************************************************************************************************
3115!> \brief Achieves minimisation of the spread functional by simultaneous diagonalisation with Jacobi
3116!> rotations as presented in Cardoso & Souloumiac, SIAM J. Matrix Anal. Appl., 17(1), 161.
3117!> Generalizes the Jacobi algorithm to complex matrices.
3118!> \param weights array of weights for calculating the total spread
3119!> \param zij spread operator matrices
3120!> \param max_iter maximum number iterations
3121!> \param eps_localization numerical tolerance
3122!> \param sweeps counts number of sweeps required
3123!> \param out_each how often to print info
3124!> \param vectors complex vectors to be localized
3125!> \par History
3126!> 2020-04 created [LS]
3127!> \author Lukas Schreder
3128! **************************************************************************************************
3129 SUBROUTINE cardoso_souloumiac(weights, zij, max_iter, eps_localization, sweeps, &
3130 out_each, vectors)
3131
3132 REAL(kind=dp), INTENT(IN) :: weights(:)
3133 TYPE(cp_cfm_type), INTENT(INOUT) :: zij(:, :)
3134 INTEGER, INTENT(IN) :: max_iter
3135 REAL(kind=dp), INTENT(IN) :: eps_localization
3136 INTEGER :: sweeps
3137 INTEGER, INTENT(IN) :: out_each
3138 TYPE(cp_cfm_type), POINTER :: vectors
3139
3140 CHARACTER(len=*), PARAMETER :: routinen = 'cardoso_souloumiac'
3141
3142 COMPLEX(KIND=dp) :: s
3143 COMPLEX(KIND=dp), ALLOCATABLE :: mii(:), mij(:), mji(:), mjj(:)
3144 INTEGER :: dim1, dim2, handle, idim, istate, jdim, &
3145 jstate, nstate, unit_nr
3146 REAL(kind=dp) :: c, old_spread, spread, t1, t2, tolerance
3147 TYPE(cp_cfm_type), POINTER :: c_rmat, c_zij(:)
3148
3149 CALL timeset(routinen, handle)
3150
3151 dim1 = SIZE(zij, 1)
3152 dim2 = SIZE(zij, 2)
3153
3154 NULLIFY (c_rmat, c_zij)
3155 ALLOCATE (c_rmat, c_zij(dim1*dim2), mii(dim1*dim2), mij(dim1*dim2), mji(dim1*dim2), mjj(dim1*dim2))
3156 CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
3157 CALL cp_cfm_set_all(c_rmat, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp)) ! start with the identity transformation
3158 DO idim = 1, dim2
3159 DO jdim = 1, dim1
3160 CALL cp_cfm_create(c_zij((idim - 1)*dim1 + jdim), zij(jdim, idim)%matrix_struct)
3161 CALL cp_cfm_to_cfm(zij(jdim, idim), c_zij((idim - 1)*dim1 + jdim))
3162 END DO
3163 END DO
3164
3165 CALL cp_cfm_get_info(c_rmat, nrow_global=nstate)
3166 tolerance = 1.0e10_dp
3167 old_spread = 1.0e10_dp
3168 sweeps = 0
3169 unit_nr = -1
3170 IF (c_rmat%matrix_struct%para_env%is_source()) THEN
3172 WRITE (unit_nr, "(T4,A )") " Localization by iterative Jacobi rotation using "// &
3173 "Cardoso-Souloumiac angles"
3174
3175 END IF
3176
3177 ! Jacobi sweeps until converged
3178 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
3179 sweeps = sweeps + 1
3180 t1 = m_walltime()
3181 DO jstate = 1, nstate
3182 DO istate = jstate + 1, nstate
3183 DO idim = 1, dim1*dim2
3184 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
3185 CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
3186 CALL cp_cfm_get_element(c_zij(idim), jstate, istate, mji(idim))
3187 CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
3188 END DO
3189 CALL get_cardoso_angles(mii, mij, mji, mjj, c, s, vectors%matrix_struct)
3190 DO idim = 1, dim1*dim2
3191 CALL cp_cfm_rot_cols(c_zij(idim), istate, jstate, c, real(s))
3192 CALL cp_cfm_rot_rows(c_zij(idim), istate, jstate, c, real(s))
3193 END DO
3194 CALL cp_cfm_rot_cols(c_rmat, istate, jstate, c, real(s))
3195 END DO
3196 END DO
3197 CALL check_tolerance(c_zij, weights, spread)
3198 CALL check_tolerance(c_zij, weights, tolerance)
3199 tolerance = abs(spread - old_spread)
3200 old_spread = spread
3201
3202 t2 = m_walltime()
3203 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
3204 WRITE (unit_nr, "(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
3205 "Iteration:", sweeps, "Functional", spread, "Tolerance:", tolerance, "Time:", t2 - t1
3206 CALL m_flush(unit_nr)
3207 END IF
3208 END DO
3209
3210 DO idim = 1, dim2
3211 ! back to an interlaced matrix
3212 DO jdim = 1, dim1
3213 CALL cp_cfm_to_cfm(c_zij((idim - 1)*dim1 + jdim), zij(jdim, idim))
3214 CALL cp_cfm_release(c_zij((idim - 1)*dim1 + jdim))
3215 END DO
3216 END DO
3217
3218 CALL rotate_orbitals_cfm(c_rmat, vectors)
3219
3220 DEALLOCATE (c_zij, mii, mij, mji, mjj)
3221 CALL cp_cfm_release(c_rmat)
3222 DEALLOCATE (c_rmat)
3223
3224 CALL timestop(handle)
3225
3226 END SUBROUTINE cardoso_souloumiac
3227
3228! **************************************************************************************************
3229!> \brief Pipek-Mezey version of the Cardoso-Souloumiac PADE algorithm for complex-valued matrices.
3230!> \param zij spread operator matrices
3231!> \param vec complex vectors to be localised
3232!> \param sweeps counts number of sweeps required
3233!> \param max_iter maximum number iterations
3234!> \param eps numerical tolerance
3235!> \param out_each how often to print info
3236!> \par History
3237!> 2020-04 created [LS]
3238!> \author Lukas Schreder
3239! **************************************************************************************************
3240 SUBROUTINE cardoso_souloumiac_pipek(zij, vec, sweeps, max_iter, eps, out_each)
3241
3242 TYPE(cp_cfm_type), POINTER :: zij(:, :), vec
3243 INTEGER :: sweeps, max_iter
3244 REAL(dp) :: eps
3245 INTEGER :: out_each
3246
3247 CHARACTER(*), PARAMETER :: routinen = 'cardoso_souloumiac_pipek'
3248
3249 COMPLEX(dp) :: c_spread, s
3250 COMPLEX(dp), POINTER :: qii(:), qij(:), qji(:), qjj(:)
3251 INTEGER :: handle, i, j, k, n_dim, n_states, &
3252 output_unit
3253 REAL(dp) :: c, old_spread, spread, t1, t2, tol
3254 TYPE(cp_cfm_type), POINTER :: c_zij(:), rmat
3255
3256 CALL timeset(routinen, handle)
3257
3258 CALL cite_reference(schreder2024_2)
3259
3260 n_dim = SIZE(zij)
3261
3262 ALLOCATE (rmat)
3263 CALL cp_cfm_create(rmat, zij(1, 1)%matrix_struct)
3264
3265 CALL cp_cfm_set_all(rmat, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
3266 ALLOCATE (c_zij(n_dim), qii(n_dim), qij(n_dim), qji(n_dim), qjj(n_dim))
3267
3268 ! build Qij matrix
3269 DO k = 1, n_dim
3270 c_zij(k) = zij(k, 1)
3271 END DO
3272
3273 CALL cp_cfm_get_info(c_zij(1), ncol_global=n_states)
3274
3275 tol = 1.0e10_dp
3276 c_spread = (0.0_dp, 0.0_dp)
3277 DO k = 1, n_dim
3278 DO i = 1, n_states
3279 CALL cp_cfm_get_element(c_zij(k), i, i, s)
3280 c_spread = c_spread + s*s
3281 END DO
3282 END DO
3283 spread = real(c_spread)
3284 old_spread = spread
3285
3286 sweeps = 0
3287 output_unit = cp_logger_get_default_unit_nr()
3288 WRITE (output_unit, "(T4,A )") " Localization by iterative Jacobi rotation using "// &
3289 "Cardoso-Souloumiac angles"
3290 WRITE (output_unit, "(T4,A )") " and Pipek-Mezey spread functional"
3291
3292 IF (output_unit > 0 .AND. modulo(sweeps, out_each) == 0) THEN
3293 WRITE (output_unit, "(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
3294 "Iteration:", sweeps, " Functional", spread, " Tolerance:", tol, " Time:", 0.0_dp
3295 END IF
3296
3297 DO WHILE (tol >= eps .AND. sweeps < max_iter)
3298 t1 = m_walltime()
3299 sweeps = sweeps + 1
3300
3301 DO i = 1, n_states
3302 DO j = i + 1, n_states
3303 DO k = 1, n_dim
3304 CALL cp_cfm_get_element(c_zij(k), i, i, qii(k))
3305 CALL cp_cfm_get_element(c_zij(k), i, j, qij(k))
3306 CALL cp_cfm_get_element(c_zij(k), j, i, qji(k))
3307 CALL cp_cfm_get_element(c_zij(k), j, j, qjj(k))
3308 END DO
3309 CALL get_cardoso_angles(qii, qij, qji, qjj, c, s, vec%matrix_struct)
3310 DO k = 1, n_dim
3311 CALL cp_cfm_rot_cols(c_zij(k), i, j, c, real(s))
3312 CALL cp_cfm_rot_rows(c_zij(k), i, j, c, real(s))
3313 END DO
3314 CALL cp_cfm_rot_cols(rmat, i, j, c, real(s))
3315 END DO
3316 END DO
3317
3318 c_spread = (0.0_dp, 0.0_dp)
3319 DO i = 1, n_states
3320 DO k = 1, n_dim
3321 CALL cp_cfm_get_element(c_zij(k), i, i, s)
3322 c_spread = c_spread + s*s
3323 END DO
3324 END DO
3325 spread = real(c_spread)
3326
3327 tol = abs(spread - old_spread)
3328 old_spread = spread
3329 t2 = m_walltime()
3330 IF (output_unit > 0 .AND. modulo(sweeps, out_each) == 0) THEN
3331 WRITE (output_unit, "(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
3332 "Iteration:", sweeps, " Functional", spread, " Tolerance:", tol, " Time:", t2 - t1
3333 END IF
3334 END DO
3335
3336 CALL rotate_orbitals_cfm(rmat, vec)
3337
3338 ! c_zij(k) was assigned (shallow copy of zij(k,1)); do not release contents
3339 DEALLOCATE (c_zij, qii, qij, qji, qjj)
3340 CALL cp_cfm_release(rmat)
3341 DEALLOCATE (rmat)
3342
3343 CALL timestop(handle)
3344
3345 END SUBROUTINE cardoso_souloumiac_pipek
3346
3347! **************************************************************************************************
3348!> \brief calculates the Jacobi angles needed in serial Cardoso-Souloumiac diagonalisation
3349!> \param mii ...
3350!> \param mij ...
3351!> \param mji ...
3352!> \param mjj ...
3353!> \param c ...
3354!> \param s ...
3355!> \param tmp_fm_struct ...
3356!> \par History
3357!> 2020-04 created [LS]
3358!> \author Lukas Schreder
3359! **************************************************************************************************
3360 SUBROUTINE get_cardoso_angles(mii, mij, mji, mjj, c, s, tmp_fm_struct)
3361
3362 COMPLEX(KIND=dp), DIMENSION(:) :: mii, mij, mji, mjj
3363 REAL(kind=dp), INTENT(out) :: c
3364 COMPLEX(KIND=dp), INTENT(out) :: s
3365 TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
3366
3367 INTEGER :: dim_m, i, i_max
3368 REAL(kind=dp) :: r, x, y, z
3369 REAL(kind=dp), DIMENSION(3) :: evals
3370 TYPE(cp_cfm_type), POINTER :: c_gmat, hmat
3371 TYPE(cp_fm_struct_type), POINTER :: g_fm_struct, h_fm_struct
3372 TYPE(cp_fm_type), POINTER :: evects, gmat
3373
3374 dim_m = SIZE(mii)
3375
3376 CALL cp_fm_struct_create(h_fm_struct, nrow_global=1, ncol_global=3, &
3377 template_fmstruct=tmp_fm_struct)
3378 CALL cp_fm_struct_create(g_fm_struct, nrow_global=3, ncol_global=3, &
3379 template_fmstruct=tmp_fm_struct)
3380 ALLOCATE (hmat, c_gmat, gmat, evects)
3381 CALL cp_cfm_create(hmat, h_fm_struct)
3382 CALL cp_cfm_create(c_gmat, g_fm_struct)
3383 CALL cp_cfm_set_all(c_gmat, (0.0_dp, 0.0_dp), (0.0_dp, 0.0_dp))
3384 CALL cp_fm_create(gmat, g_fm_struct)
3385 CALL cp_fm_create(evects, g_fm_struct)
3386
3387 DO i = 1, dim_m
3388 CALL cp_cfm_set_element(hmat, 1, 1, (mii(i) - mjj(i)))
3389 CALL cp_cfm_set_element(hmat, 1, 2, (mij(i) + mji(i)))
3390 CALL cp_cfm_set_element(hmat, 1, 3, (0.0_dp, 1.0_dp)*(mji(i) - mij(i)))
3391 CALL cp_cfm_gemm("C", "N", 3, 3, 1, (1.0_dp, 0.0_dp), hmat, hmat, (1.0_dp, 0.0_dp), c_gmat)
3392 END DO
3393 CALL cp_cfm_to_fm(c_gmat, gmat)
3394
3395 ! find eigenvector with highest eigenvalue
3396 CALL choose_eigv_solver(gmat, evects, evals)
3397 i_max = maxloc(evals, 1)
3398 CALL cp_fm_get_element(evects, 1, i_max, x)
3399 CALL cp_fm_get_element(evects, 2, i_max, y)
3400 CALL cp_fm_get_element(evects, 3, i_max, z)
3401 IF (x < 0) THEN
3402 x = -x
3403 y = -y
3404 z = -z
3405 END IF
3406
3407 ! calculate the angles
3408 r = sqrt(x**2 + y**2 + z**2) ! always 1
3409 c = sqrt((x + r)/(2*r)) ! always real
3410 s = (y - gaussi*z)/sqrt(2*r*(x + r)) ! always complex
3411
3412 CALL cp_fm_struct_release(h_fm_struct)
3413 CALL cp_fm_struct_release(g_fm_struct)
3414 CALL cp_cfm_release(hmat)
3415 CALL cp_cfm_release(c_gmat)
3416 CALL cp_fm_release(gmat)
3417 CALL cp_fm_release(evects)
3418 DEALLOCATE (hmat, c_gmat, gmat, evects)
3419
3420 END SUBROUTINE get_cardoso_angles
3421
3422END MODULE qs_localization_methods
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public schreder2024_2
Handles all functions related to the CELL.
Definition cell_types.F:15
methods related to the blacs parallel environment
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
Performs one of the matrix-matrix operations: matrix_c = alpha * op1( matrix_a ) * op2( matrix_b ) + ...
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
Definition cp_cfm_diag.F:14
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Definition cp_cfm_diag.F:74
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
Definition cp_fm_diag.F:573
subroutine, public choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
Choose the Eigensolver depending on which library is available ELPA seems to be unstable for small sy...
Definition cp_fm_diag.F:245
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:136
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:153
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
complex(kind=dp), parameter, public z_one
complex(kind=dp), parameter, public gaussi
real(kind=dp), parameter, public twopi
complex(kind=dp), parameter, public z_zero
Routines for calculating a complex matrix exponential.
Definition matrix_exp.F:13
subroutine, public exp_pade_real(exp_h, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
Definition matrix_exp.F:570
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
Definition matrix_exp.F:244
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public approx_l1_norm_sd(c, iterations, eps, converged, sweeps)
...
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public cardoso_souloumiac(weights, zij, max_iter, eps_localization, sweeps, out_each, vectors)
Achieves minimisation of the spread functional by simultaneous diagonalisation with Jacobi rotations ...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public cardoso_souloumiac_pipek(zij, vec, sweeps, max_iter, eps, out_each)
Pipek-Mezey version of the Cardoso-Souloumiac PADE algorithm for complex-valued matrices.
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.
Definition xc.F:17
Type defining parameters related to the simulation cell.
Definition cell_types.F:60
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Represent a complex full matrix.
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment