(git:ed6f26b)
Loading...
Searching...
No Matches
qs_localization_methods.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Localization methods such as 2x2 Jacobi rotations
10!> Steepest Decents
11!> Conjugate Gradient
12!> \par History
13!> Initial parallellization of jacobi (JVDV 07.2003)
14!> direct minimization using exponential parametrization (JVDV 09.2003)
15!> crazy rotations go fast (JVDV 10.2003)
16!> \author CJM (04.2003)
17! **************************************************************************************************
19 USE cell_types, ONLY: cell_type
28 USE cp_cfm_diag, ONLY: cp_cfm_heevd
29 USE cp_cfm_types, ONLY: &
33 USE cp_dbcsr_api, ONLY: dbcsr_p_type
45 USE cp_fm_diag, ONLY: cp_fm_syevd
50 USE cp_fm_types, ONLY: &
56 USE kahan_sum, ONLY: accurate_sum
57 USE kinds, ONLY: dp
58 USE machine, ONLY: m_flush,&
60 USE mathconstants, ONLY: pi,&
61 twopi
62 USE matrix_exp, ONLY: exp_pade_real,&
66#include "./base/base_uses.f90"
67
68 IMPLICIT NONE
72
73 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
74
75 PRIVATE
76
77 TYPE set_c_1d_type
78 COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array => null()
79 END TYPE
80
81 TYPE set_c_2d_type
82 COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array => null()
83 END TYPE
84
85CONTAINS
86! **************************************************************************************************
87!> \brief ...
88!> \param C ...
89!> \param iterations ...
90!> \param eps ...
91!> \param converged ...
92!> \param sweeps ...
93! **************************************************************************************************
94 SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
95 TYPE(cp_fm_type), INTENT(IN) :: c
96 INTEGER, INTENT(IN) :: iterations
97 REAL(kind=dp), INTENT(IN) :: eps
98 LOGICAL, INTENT(INOUT) :: converged
99 INTEGER, INTENT(INOUT) :: sweeps
100
101 CHARACTER(len=*), PARAMETER :: routinen = 'approx_l1_norm_sd'
102 INTEGER, PARAMETER :: taylor_order = 100
103 REAL(kind=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
104
105 INTEGER :: handle, i, istep, k, n, ncol_local, &
106 nrow_local, output_unit, p
107 REAL(kind=dp) :: expfactor, f2, f2old, gnorm, tnorm
108 TYPE(cp_blacs_env_type), POINTER :: context
109 TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
110 TYPE(cp_fm_type) :: ctmp, g, gp1, gp2, u
111 TYPE(mp_para_env_type), POINTER :: para_env
112
113 CALL timeset(routinen, handle)
114
115 NULLIFY (context, para_env, fm_struct_k_k)
116
117 output_unit = cp_logger_get_default_io_unit()
118
119 CALL cp_fm_struct_get(c%matrix_struct, nrow_global=n, ncol_global=k, &
120 nrow_local=nrow_local, ncol_local=ncol_local, &
121 para_env=para_env, context=context)
122 CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
123 nrow_global=k, ncol_global=k)
124 CALL cp_fm_create(ctmp, c%matrix_struct)
125 CALL cp_fm_create(u, fm_struct_k_k)
126 CALL cp_fm_create(g, fm_struct_k_k)
127 CALL cp_fm_create(gp1, fm_struct_k_k)
128 CALL cp_fm_create(gp2, fm_struct_k_k)
129 !
130 ! printing
131 IF (output_unit > 0) THEN
132 WRITE (output_unit, '(1X)')
133 WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
134 WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
135 WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
136 WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
137 WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
138 WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
139 WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
140 END IF
141 !
142 f2old = 0.0_dp
143 converged = .false.
144 !
145 ! Start the steepest descent
146 DO istep = 1, iterations
147 !
148 !-------------------------------------------------------------------
149 ! compute f_2
150 ! f_2(x)=(x^2+eps)^1/2
151 f2 = 0.0_dp
152 DO p = 1, ncol_local ! p
153 DO i = 1, nrow_local ! i
154 f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
155 END DO
156 END DO
157 CALL c%matrix_struct%para_env%sum(f2)
158 !write(*,*) 'qs_localize: f_2=',f2
159 !-------------------------------------------------------------------
160 ! compute the derivative of f_2
161 ! f_2(x)=(x^2+eps)^1/2
162 DO p = 1, ncol_local ! p
163 DO i = 1, nrow_local ! i
164 ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
165 END DO
166 END DO
167 CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
168 ! antisymmetrize
169 CALL cp_fm_transpose(g, u)
170 CALL cp_fm_scale_and_add(-0.5_dp, g, 0.5_dp, u)
171 !
172 !-------------------------------------------------------------------
173 !
174 gnorm = cp_fm_frobenius_norm(g)
175 !write(*,*) 'qs_localize: norm(G)=',gnorm
176 !
177 ! rescale for steepest descent
178 CALL cp_fm_scale(-alpha, g)
179 !
180 ! compute unitary transform
181 ! zeroth order
182 CALL cp_fm_set_all(u, 0.0_dp, 1.0_dp)
183 ! first order
184 expfactor = 1.0_dp
185 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, g)
186 tnorm = cp_fm_frobenius_norm(g)
187 !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
188 IF (tnorm .GT. 1.0e-10_dp) THEN
189 ! other orders
190 CALL cp_fm_to_fm(g, gp1)
191 DO i = 2, taylor_order
192 ! new power of G
193 CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
194 CALL cp_fm_to_fm(gp2, gp1)
195 ! add to the taylor expansion so far
196 expfactor = expfactor/real(i, kind=dp)
197 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, gp1)
198 tnorm = cp_fm_frobenius_norm(gp1)
199 !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
200 IF (tnorm*expfactor .LT. 1.0e-10_dp) EXIT
201 END DO
202 END IF
203 !
204 ! incrementaly rotate the MOs
205 CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
206 CALL cp_fm_to_fm(ctmp, c)
207 !
208 ! printing
209 IF (output_unit .GT. 0) THEN
210 WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
211 END IF
212 !
213 ! Are we done?
214 sweeps = istep
215 !IF(gnorm.LE.grad_thresh.AND.ABS((f2-f2old)/f2).LE.f2_thresh.AND.istep.GT.1) THEN
216 IF (abs((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1) THEN
217 converged = .true.
218 EXIT
219 END IF
220 f2old = f2
221 END DO
222 !
223 ! here we should do one refine step to enforce C'*S*C=1 for any case
224 !
225 ! Print the final result
226 IF (output_unit .GT. 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
227 !
228 ! sparsity
229 !DO i=1,size(thresh,1)
230 ! gnorm = 0.0_dp
231 ! DO o=1,ncol_local
232 ! DO p=1,nrow_local
233 ! IF(ABS(C%local_data(p,o)).GT.thresh(i)) THEN
234 ! gnorm = gnorm + 1.0_dp
235 ! ENDIF
236 ! ENDDO
237 ! ENDDO
238 ! CALL C%matrix_struct%para_env%sum(gnorm)
239 ! IF(output_unit.GT.0) THEN
240 ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
241 ! ENDIF
242 !ENDDO
243 !
244 ! deallocate
245 CALL cp_fm_struct_release(fm_struct_k_k)
246 CALL cp_fm_release(ctmp)
247 CALL cp_fm_release(u)
248 CALL cp_fm_release(g)
249 CALL cp_fm_release(gp1)
250 CALL cp_fm_release(gp2)
251
252 CALL timestop(handle)
253
254 END SUBROUTINE approx_l1_norm_sd
255! **************************************************************************************************
256!> \brief ...
257!> \param cell ...
258!> \param weights ...
259! **************************************************************************************************
260 SUBROUTINE initialize_weights(cell, weights)
261
262 TYPE(cell_type), POINTER :: cell
263 REAL(kind=dp), DIMENSION(:) :: weights
264
265 REAL(kind=dp), DIMENSION(3, 3) :: metric
266
267 cpassert(ASSOCIATED(cell))
268
269 metric = 0.0_dp
270 CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
271
272 weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
273 weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
274 weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
275 weights(4) = metric(1, 2)
276 weights(5) = metric(1, 3)
277 weights(6) = metric(2, 3)
278
279 END SUBROUTINE initialize_weights
280
281! **************************************************************************************************
282!> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
283!> can deal with serial para_envs.
284!> \param weights ...
285!> \param zij ...
286!> \param vectors ...
287!> \param para_env ...
288!> \param max_iter ...
289!> \param eps_localization ...
290!> \param sweeps ...
291!> \param out_each ...
292!> \param target_time ...
293!> \param start_time ...
294!> \param restricted ...
295!> \par History
296!> \author Joost VandeVondele (02.2010)
297! **************************************************************************************************
298 SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
299 eps_localization, sweeps, out_each, target_time, start_time, restricted)
300
301 REAL(kind=dp), INTENT(IN) :: weights(:)
302 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
303 TYPE(mp_para_env_type), POINTER :: para_env
304 INTEGER, INTENT(IN) :: max_iter
305 REAL(kind=dp), INTENT(IN) :: eps_localization
306 INTEGER :: sweeps
307 INTEGER, INTENT(IN) :: out_each
308 REAL(dp) :: target_time, start_time
309 INTEGER :: restricted
310
311 IF (para_env%num_pe == 1) THEN
312 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
313 sweeps, out_each, restricted=restricted)
314 ELSE
315 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
316 sweeps, out_each, target_time, start_time, restricted=restricted)
317 END IF
318
319 END SUBROUTINE jacobi_rotations
320
321! **************************************************************************************************
322!> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
323!> while the routine below works in parallel, it is too slow to be useful
324!> \param weights ...
325!> \param zij ...
326!> \param vectors ...
327!> \param max_iter ...
328!> \param eps_localization ...
329!> \param sweeps ...
330!> \param out_each ...
331!> \param restricted ...
332! **************************************************************************************************
333 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
334 out_each, restricted)
335 REAL(kind=dp), INTENT(IN) :: weights(:)
336 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
337 INTEGER, INTENT(IN) :: max_iter
338 REAL(kind=dp), INTENT(IN) :: eps_localization
339 INTEGER :: sweeps
340 INTEGER, INTENT(IN) :: out_each
341 INTEGER :: restricted
342
343 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial'
344
345 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
346 INTEGER :: dim2, handle, idim, istate, jstate, &
347 nstate, unit_nr
348 REAL(kind=dp) :: ct, st, t1, t2, theta, tolerance
349 TYPE(cp_cfm_type) :: c_rmat
350 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
351 TYPE(cp_fm_type) :: rmat
352
353 CALL timeset(routinen, handle)
354
355 dim2 = SIZE(zij, 2)
356 ALLOCATE (c_zij(dim2))
357 NULLIFY (mii, mij, mjj)
358 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
359
360 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
361 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
362
363 CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
364 CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
365 DO idim = 1, dim2
366 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
367 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
368 zij(2, idim)%local_data, dp)
369 END DO
370
371 CALL cp_fm_get_info(rmat, nrow_global=nstate)
372 tolerance = 1.0e10_dp
373
374 sweeps = 0
375 unit_nr = -1
376 IF (rmat%matrix_struct%para_env%is_source()) THEN
378 WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
379 END IF
380
381 IF (restricted > 0) THEN
383 WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
384 nstate = nstate - restricted
385 END IF
386
387 ! do jacobi sweeps until converged
388 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
389 sweeps = sweeps + 1
390 t1 = m_walltime()
391
392 DO istate = 1, nstate
393 DO jstate = istate + 1, nstate
394 DO idim = 1, dim2
395 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
396 CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
397 CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
398 END DO
399 CALL get_angle(mii, mjj, mij, weights, theta)
400 st = sin(theta)
401 ct = cos(theta)
402 CALL rotate_zij(istate, jstate, st, ct, c_zij)
403
404 CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
405 END DO
406 END DO
407
408 CALL check_tolerance(c_zij, weights, tolerance)
409
410 t2 = m_walltime()
411 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
412 WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
413 "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
414 CALL m_flush(unit_nr)
415 END IF
416
417 END DO
418
419 DO idim = 1, dim2
420 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
421 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
422 CALL cp_cfm_release(c_zij(idim))
423 END DO
424 DEALLOCATE (c_zij)
425 DEALLOCATE (mii, mij, mjj)
426 rmat%local_data = real(c_rmat%local_data, dp)
427 CALL cp_cfm_release(c_rmat)
428 CALL rotate_orbitals(rmat, vectors)
429 CALL cp_fm_release(rmat)
430
431 CALL timestop(handle)
432
433 END SUBROUTINE jacobi_rotations_serial
434! **************************************************************************************************
435!> \brief very similar to jacobi_rotations_serial with some extra output options
436!> \param weights ...
437!> \param c_zij ...
438!> \param max_iter ...
439!> \param c_rmat ...
440!> \param eps_localization ...
441!> \param tol_out ...
442!> \param jsweeps ...
443!> \param out_each ...
444!> \param c_zij_out ...
445!> \param grad_final ...
446! **************************************************************************************************
447 SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
448 tol_out, jsweeps, out_each, c_zij_out, grad_final)
449 REAL(kind=dp), INTENT(IN) :: weights(:)
450 TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
451 INTEGER, INTENT(IN) :: max_iter
452 TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
453 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
454 REAL(kind=dp), INTENT(OUT), OPTIONAL :: tol_out
455 INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
456 INTEGER, INTENT(IN), OPTIONAL :: out_each
457 TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
458 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
459
460 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial_1'
461
462 COMPLEX(KIND=dp) :: mzii
463 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
464 INTEGER :: dim2, handle, idim, istate, jstate, &
465 nstate, sweeps, unit_nr
466 REAL(kind=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
467 sum_spread_ii, t1, t2, theta, tolerance
468 TYPE(cp_cfm_type) :: c_rmat_local
469 TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
470
471 CALL timeset(routinen, handle)
472
473 dim2 = SIZE(c_zij)
474 NULLIFY (mii, mij, mjj)
475 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
476
477 ALLOCATE (c_zij_local(dim2))
478 CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
479 CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
480 DO idim = 1, dim2
481 CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
482 c_zij_local(idim)%local_data = c_zij(idim)%local_data
483 END DO
484
485 CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
486 tolerance = 1.0e10_dp
487
488 IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
489
490 sweeps = 0
491 IF (PRESENT(out_each)) THEN
492 unit_nr = -1
493 IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
495 END IF
496 alpha = 0.0_dp
497 DO idim = 1, dim2
498 alpha = alpha + weights(idim)
499 END DO
500 END IF
501
502 ! do jacobi sweeps until converged
503 DO WHILE (sweeps < max_iter)
504 sweeps = sweeps + 1
505 IF (PRESENT(eps_localization)) THEN
506 IF (tolerance < eps_localization) EXIT
507 END IF
508 IF (PRESENT(out_each)) t1 = m_walltime()
509
510 DO istate = 1, nstate
511 DO jstate = istate + 1, nstate
512 DO idim = 1, dim2
513 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
514 CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
515 CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
516 END DO
517 CALL get_angle(mii, mjj, mij, weights, theta)
518 st = sin(theta)
519 ct = cos(theta)
520 CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
521
522 CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
523 END DO
524 END DO
525
526 IF (PRESENT(grad_final)) THEN
527 CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
528 ELSE
529 CALL check_tolerance(c_zij_local, weights, tolerance)
530 END IF
531 IF (PRESENT(tol_out)) tol_out = tolerance
532
533 IF (PRESENT(out_each)) THEN
534 t2 = m_walltime()
535 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
536 sum_spread_ii = 0.0_dp
537 DO istate = 1, nstate
538 spread_ii = 0.0_dp
539 DO idim = 1, dim2
540 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
541 spread_ii = spread_ii + weights(idim)* &
542 abs(mzii)**2/twopi/twopi
543 END DO
544 sum_spread_ii = sum_spread_ii + spread_ii
545 END DO
546 sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
547 avg_spread_ii = sum_spread_ii/nstate
548 WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
549 "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
550 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
551 sweeps, avg_spread_ii, tolerance, t2 - t1
552 CALL m_flush(unit_nr)
553 END IF
554 IF (PRESENT(jsweeps)) jsweeps = sweeps
555 END IF
556
557 END DO
558
559 IF (PRESENT(c_zij_out)) THEN
560 DO idim = 1, dim2
561 CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
562 END DO
563 END IF
564 CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
565
566 DEALLOCATE (mii, mij, mjj)
567 DO idim = 1, dim2
568 CALL cp_cfm_release(c_zij_local(idim))
569 END DO
570 DEALLOCATE (c_zij_local)
571 CALL cp_cfm_release(c_rmat_local)
572
573 CALL timestop(handle)
574
575 END SUBROUTINE jacobi_rotations_serial_1
576! **************************************************************************************************
577!> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
578!> for partially occupied wannier functions
579!> \param para_env ...
580!> \param weights ...
581!> \param zij ...
582!> \param vectors ...
583!> \param max_iter ...
584!> \param eps_localization ...
585!> \param iter ...
586!> \param out_each ...
587!> \param nextra ...
588!> \param do_cg ...
589!> \param nmo ...
590!> \param vectors_2 ...
591!> \param mos_guess ...
592! **************************************************************************************************
593 SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
594 iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
595 TYPE(mp_para_env_type), POINTER :: para_env
596 REAL(kind=dp), INTENT(IN) :: weights(:)
597 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
598 INTEGER, INTENT(IN) :: max_iter
599 REAL(kind=dp), INTENT(IN) :: eps_localization
600 INTEGER :: iter
601 INTEGER, INTENT(IN) :: out_each, nextra
602 LOGICAL, INTENT(IN) :: do_cg
603 INTEGER, INTENT(IN), OPTIONAL :: nmo
604 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
605
606 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_cg_edf_ls'
607 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
608 czero = (0.0_dp, 0.0_dp)
609 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
610
611 COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
612 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
613 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
614 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
615 INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
616 lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
617 INTEGER, DIMENSION(1) :: iloc
618 LOGICAL :: do_cinit_mo, do_cinit_random, &
619 do_u_guess_mo, new_direction
620 REAL(kind=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
621 norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
622 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
623 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
624 REAL(kind=dp), DIMENSION(50) :: energy, pos
625 REAL(kind=dp), DIMENSION(:), POINTER :: tmp_arr
626 TYPE(cp_blacs_env_type), POINTER :: context
627 TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
628 grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
629 tmp_cfm_2, u, ul, v, vl, zdiag
630 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
631 TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
632 TYPE(cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
633 matrix_v_all, rmat, tmp_fm, vectors_all
634
635 CALL timeset(routinen, handle)
636
637 dim2 = SIZE(zij, 2)
638 NULLIFY (context)
639 NULLIFY (matrix_zii, arr_zii)
640 NULLIFY (tmp_fm_struct)
641 NULLIFY (tmp_arr)
642
643 ALLOCATE (c_zij(dim2))
644
645 CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
646
647 ALLOCATE (sum_spread(nstate))
648 ALLOCATE (matrix_zii(nstate, dim2))
649 matrix_zii = czero
650 sum_spread = 0.0_dp
651
652 alpha = 0.0_dp
653 DO idim = 1, dim2
654 alpha = alpha + weights(idim)
655 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
656 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
657 zij(2, idim)%local_data, dp)
658 END DO
659
660 ALLOCATE (zij_0(dim2))
661
662 CALL cp_cfm_create(u, zij(1, 1)%matrix_struct)
663 CALL cp_fm_create(matrix_u, zij(1, 1)%matrix_struct)
664
665 CALL cp_cfm_set_all(u, czero, cone)
666 CALL cp_fm_set_all(matrix_u, 0.0_dp, 1.0_dp)
667
668 CALL cp_fm_get_info(vectors, nrow_global=nao)
669 IF (nextra > 0) THEN
670 IF (PRESENT(mos_guess)) THEN
671 do_cinit_random = .false.
672 do_cinit_mo = .true.
673 CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
674 ELSE
675 do_cinit_random = .true.
676 do_cinit_mo = .false.
677 ndummy = nstate
678 END IF
679
680 IF (do_cinit_random) THEN
681 icinit = 1
682 do_u_guess_mo = .false.
683 ELSEIF (do_cinit_mo) THEN
684 icinit = 2
685 do_u_guess_mo = .true.
686 END IF
687
688 nocc = nstate - nextra
689 northo = nmo - nocc
690 norextra = nmo - nstate
691 CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
692
693 ALLOCATE (tmp_cmat(nstate, nstate))
694 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
695 para_env=para_env, context=context)
696 DO idim = 1, dim2
697 CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
698 CALL cp_cfm_set_all(zij_0(idim), czero, cone)
699 CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
700 CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
701 END DO
702 CALL cp_fm_struct_release(tmp_fm_struct)
703 DEALLOCATE (tmp_cmat)
704
705 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
706 para_env=para_env, context=context)
707 CALL cp_cfm_create(v, tmp_fm_struct)
708 CALL cp_fm_create(matrix_v, tmp_fm_struct)
709 CALL cp_cfm_create(zdiag, tmp_fm_struct)
710 CALL cp_fm_create(rmat, tmp_fm_struct)
711 CALL cp_fm_struct_release(tmp_fm_struct)
712 CALL cp_cfm_set_all(v, czero, cone)
713 CALL cp_fm_set_all(matrix_v, 0.0_dp, 1.0_dp)
714
715 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
716 para_env=para_env, context=context)
717 CALL cp_fm_create(matrix_v_all, tmp_fm_struct)
718 CALL cp_fm_struct_release(tmp_fm_struct)
719 CALL cp_fm_set_all(matrix_v_all, 0._dp, 1._dp)
720
721 ALLOCATE (arr_zii(nstate))
722
723 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
724 para_env=para_env, context=context)
725 CALL cp_cfm_create(c_tilde, tmp_fm_struct)
726 CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
727 CALL cp_cfm_create(gct_old, tmp_fm_struct)
728 CALL cp_cfm_create(skc, tmp_fm_struct)
729 CALL cp_fm_struct_release(tmp_fm_struct)
730 CALL cp_cfm_set_all(c_tilde, czero)
731 CALL cp_cfm_set_all(gct_old, czero)
732 CALL cp_cfm_set_all(skc, czero)
733
734 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
735 para_env=para_env, context=context)
736 CALL cp_cfm_create(vl, tmp_fm_struct)
737 CALL cp_cfm_set_all(vl, czero)
738 CALL cp_fm_struct_release(tmp_fm_struct)
739
740 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
741 para_env=para_env, context=context)
742 CALL cp_fm_create(id_nextra, tmp_fm_struct)
743 CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
744 CALL cp_fm_struct_release(tmp_fm_struct)
745 CALL cp_cfm_set_all(ctrans_lambda, czero)
746 CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
747
748 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
749 para_env=para_env, context=context)
750 CALL cp_cfm_create(ul, tmp_fm_struct)
751 CALL cp_fm_struct_release(tmp_fm_struct)
752 CALL cp_cfm_set_all(ul, czero)
753
754 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
755 para_env=para_env, context=context)
756 CALL cp_fm_create(vectors_all, tmp_fm_struct)
757 CALL cp_fm_struct_release(tmp_fm_struct)
758 ALLOCATE (tmp_mat(nao, nstate))
759 CALL cp_fm_get_submatrix(vectors, tmp_mat)
760 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
761 DEALLOCATE (tmp_mat)
762 ALLOCATE (tmp_mat(nao, norextra))
763 CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
764 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
765 DEALLOCATE (tmp_mat)
766
767 ! initialize c_tilde
768 SELECT CASE (icinit)
769 CASE (1) ! random coefficients
770 !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
771 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
772 CALL cp_fm_init_random(tmp_fm, nextra)
773 CALL ortho_vectors(tmp_fm)
774 c_tilde%local_data = tmp_fm%local_data
775 CALL cp_fm_release(tmp_fm)
776 ALLOCATE (tmp_cmat(northo, nextra))
777 CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
778 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
779 DEALLOCATE (tmp_cmat)
780 CASE (2) ! MO based coeffs
781 CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
782 ALLOCATE (tmp_arr(nmo))
783 ALLOCATE (tmp_mat(nmo, ndummy))
784 ALLOCATE (tmp_mat_1(nmo, nstate))
785 ! normalize matrix_V_all
786 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat)
787 DO istate = 1, ndummy
788 tmp_arr(:) = tmp_mat(:, istate)
789 norm = norm2(tmp_arr)
790 tmp_arr(:) = tmp_arr(:)/norm
791 tmp_mat(:, istate) = tmp_arr(:)
792 END DO
793 CALL cp_fm_set_submatrix(matrix_v_all, tmp_mat)
794 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat_1, 1, 1, nmo, nstate)
795 CALL cp_fm_set_submatrix(matrix_v, tmp_mat_1)
796 DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
797 CALL cp_fm_to_cfm(msourcer=matrix_v, mtarget=v)
798 ALLOCATE (tmp_mat(northo, ndummy))
799 ALLOCATE (tmp_mat_1(northo, nextra))
800 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat, nocc + 1, 1, northo, ndummy)
801 ALLOCATE (tmp_arr(ndummy))
802 tmp_arr = 0.0_dp
803 DO istate = 1, ndummy
804 tmp_arr(istate) = norm2(tmp_mat(:, istate))
805 END DO
806 ! find edfs
807 DO istate = 1, nextra
808 iloc = maxloc(tmp_arr)
809 tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
810 tmp_arr(iloc(1)) = 0.0_dp
811 END DO
812
813 DEALLOCATE (tmp_arr, tmp_mat)
814
815 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
816 para_env=para_env, context=context)
817 CALL cp_fm_create(tmp_fm, tmp_fm_struct)
818 CALL cp_fm_struct_release(tmp_fm_struct)
819 CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
820 DEALLOCATE (tmp_mat_1)
821 CALL ortho_vectors(tmp_fm)
822 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
823 CALL cp_fm_release(tmp_fm)
824 ! initialize U
825 IF (do_u_guess_mo) THEN
826 ALLOCATE (tmp_cmat(nocc, nstate))
827 CALL cp_cfm_get_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
828 CALL cp_cfm_set_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
829 DEALLOCATE (tmp_cmat)
830 ALLOCATE (tmp_cmat(northo, nstate))
831 CALL cp_cfm_get_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
832 CALL cp_cfm_set_submatrix(vl, tmp_cmat, 1, 1, northo, nstate)
833 DEALLOCATE (tmp_cmat)
834 CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
835 ALLOCATE (tmp_cmat(nextra, nstate))
836 CALL cp_cfm_get_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
837 CALL cp_cfm_set_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
838 DEALLOCATE (tmp_cmat)
839 CALL cp_fm_create(tmp_fm, u%matrix_struct)
840 tmp_fm%local_data = real(u%local_data, kind=dp)
841 CALL ortho_vectors(tmp_fm)
842 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=u)
843 CALL cp_fm_release(tmp_fm)
844 CALL cp_cfm_to_fm(u, matrix_u)
845 END IF
846 ! reevaluate V
847 ALLOCATE (tmp_cmat(nocc, nstate))
848 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
849 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
850 DEALLOCATE (tmp_cmat)
851 ALLOCATE (tmp_cmat(nextra, nstate))
852 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
853 CALL cp_cfm_set_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
854 DEALLOCATE (tmp_cmat)
855 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
856 ALLOCATE (tmp_cmat(northo, nstate))
857 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
858 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
859 DEALLOCATE (tmp_cmat)
860 END SELECT
861 ELSE
862 DO idim = 1, dim2
863 CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
864 CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
865 END DO
866 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
867 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
868 END IF
869
870 unit_nr = -1
871 IF (rmat%matrix_struct%para_env%is_source()) THEN
873 WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
874 END IF
875
876 norm2_old = 1.0e30_dp
877 ds_min = 1.0_dp
878 new_direction = .true.
879 iter = 0
880 line_searches = 0
881 line_search_count = 0
882 tol = 1.0e+20_dp
883 mintol = 1.0e+10_dp
884 miniter = 0
885
886 !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
887 ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
888
889 ! do conjugate gradient until converged
890 DO WHILE (iter < max_iter)
891 iter = iter + 1
892 !WRITE(*,*) 'iter = ', iter
893 t1 = m_walltime()
894
895 IF (iter > 1) THEN
896 ! comput U
897 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
898 CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
899 IF (para_env%num_pe == 1) THEN
900 CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
901 ELSE
902 CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
903 END IF
904 CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
905 CALL cp_cfm_to_cfm(tmp_cfm, u)
906 CALL cp_cfm_release(tmp_cfm)
907 CALL cp_cfm_release(tmp_cfm_2)
908 END IF
909
910 IF (nextra > 0) THEN
911 ALLOCATE (tmp_cmat(nextra, nstate))
912 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
913 CALL cp_cfm_set_submatrix(ul, tmp_cmat)
914 DEALLOCATE (tmp_cmat)
915 IF (iter > 1) THEN
916 ! orthonormalize c_tilde
917 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
918 tmp_fm%local_data = real(c_tilde%local_data, kind=dp)
919 CALL ortho_vectors(tmp_fm)
920 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
921 CALL cp_fm_release(tmp_fm)
922
923 ALLOCATE (tmp_cmat(nocc, nstate))
924 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
925 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
926 DEALLOCATE (tmp_cmat)
927 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
928 ALLOCATE (tmp_cmat(northo, nstate))
929 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
930 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
931 DEALLOCATE (tmp_cmat)
932 END IF
933
934 ! reset if new_direction
935 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5) THEN
936 CALL cp_cfm_set_all(skc, czero)
937 CALL cp_cfm_set_all(gct_old, czero)
938 norm2_old = 1.0e30_dp
939 END IF
940
941 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
942 CALL cp_cfm_to_cfm(v, tmp_cfm)
943 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
944 ndummy = nmo
945 ELSE
946 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
947 CALL cp_cfm_to_cfm(u, tmp_cfm)
948 CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
949 ndummy = nstate
950 END IF
951 ! update z_ij
952 DO idim = 1, dim2
953 ! 'tmp_cfm_1 = zij_0*tmp_cfm'
954 CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
955 tmp_cfm, czero, tmp_cfm_1)
956 ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
957 CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
958 czero, c_zij(idim))
959 END DO
960 CALL cp_cfm_release(tmp_cfm)
961 CALL cp_cfm_release(tmp_cfm_1)
962 ! compute spread
963 DO istate = 1, nstate
964 spread_ii = 0.0_dp
965 DO idim = 1, dim2
966 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
967 spread_ii = spread_ii + weights(idim)* &
968 abs(mzii)**2/twopi/twopi
969 matrix_zii(istate, idim) = mzii
970 END DO
971 !WRITE(*,*) 'spread_ii', spread_ii
972 sum_spread(istate) = spread_ii
973 END DO
974 CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
975 spread_sum = accurate_sum(sum_spread)
976
977 IF (nextra > 0) THEN
978 ! update c_tilde
979 CALL cp_cfm_set_all(zdiag, czero)
980 CALL cp_cfm_set_all(grad_ctilde, czero)
981 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
982 CALL cp_cfm_set_all(tmp_cfm, czero)
983 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
984 CALL cp_cfm_set_all(tmp_cfm_1, czero)
985 ALLOCATE (tmp_cmat(northo, nstate))
986 DO idim = 1, dim2
987 weight = weights(idim)
988 arr_zii = matrix_zii(:, idim)
989 ! tmp_cfm = zij_0*V
990 CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
991 zij_0(idim), v, czero, tmp_cfm)
992 ! tmp_cfm = tmp_cfm*diag_zij_dagg
993 CALL cp_cfm_column_scale(tmp_cfm, conjg(arr_zii))
994 ! tmp_cfm_1 = tmp_cfm*U_dagg
995 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
996 u, czero, tmp_cfm_1)
997 CALL cp_cfm_scale(weight, tmp_cfm_1)
998 ! zdiag = zdiag + tmp_cfm_1'
999 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1000
1001 ! tmp_cfm = zij_0_dagg*V
1002 CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1003 zij_0(idim), v, czero, tmp_cfm)
1004
1005 ! tmp_cfm = tmp_cfm*diag_zij
1006 CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1007 ! tmp_cfm_1 = tmp_cfm*U_dagg
1008 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1009 u, czero, tmp_cfm_1)
1010 CALL cp_cfm_scale(weight, tmp_cfm_1)
1011 ! zdiag = zdiag + tmp_cfm_1'
1012 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1013 END DO ! idim
1014 CALL cp_cfm_release(tmp_cfm)
1015 CALL cp_cfm_release(tmp_cfm_1)
1016 DEALLOCATE (tmp_cmat)
1017 ALLOCATE (tmp_cmat(northo, nextra))
1018 CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1019 northo, nextra, .false.)
1020 ! 'grad_ctilde'
1021 CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1022 DEALLOCATE (tmp_cmat)
1023 ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1024 CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1025 !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1026 ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1027 CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1028 END IF ! nextra > 0
1029
1030 ! tolerance
1031 IF (nextra > 0) THEN
1032 tolc = 0.0_dp
1033 CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1034 CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1035 CALL cp_fm_maxabsval(tmp_fm, tolc)
1036 CALL cp_fm_release(tmp_fm)
1037 !WRITE(*,*) 'tolc = ', tolc
1038 tol = tol + tolc
1039 END IF
1040 !WRITE(*,*) 'tol = ', tol
1041
1042 IF (nextra > 0) THEN
1043 !WRITE(*,*) 'new_direction: ', new_direction
1044 IF (new_direction) THEN
1045 line_searches = line_searches + 1
1046 IF (mintol > tol) THEN
1047 mintol = tol
1048 miniter = iter
1049 END IF
1050
1051 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1052 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1053 avg_spread_ii = sum_spread_ii/nstate
1054 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1055 "Iteration", "Avg. Spread_ii", "Tolerance"
1056 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1057 iter, avg_spread_ii, tol
1058 CALL m_flush(unit_nr)
1059 END IF
1060 IF (tol < eps_localization) EXIT
1061
1062 IF (do_cg) THEN
1063 cnorm2_gct = czero
1064 cnorm2_gct_cross = czero
1065 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1066 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1067 gct_old%local_data = grad_ctilde%local_data
1068 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct)
1069 norm2_gct = real(cnorm2_gct, kind=dp)
1070 ! compute beta_pr
1071 beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1072 norm2_old = norm2_gct
1073 beta = max(0.0_dp, beta_pr)
1074 !WRITE(*,*) 'beta = ', beta
1075 ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1076 CALL cp_cfm_scale(beta, skc)
1077 CALL cp_cfm_scale_and_add(cone, skc, cone, gct_old)
1078 CALL cp_cfm_trace(skc, gct_old, cnorm2_gct_cross)
1079 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1080 IF (norm2_gct_cross .LE. 0.0_dp) THEN ! back to steepest ascent
1081 CALL cp_cfm_scale_and_add(czero, skc, cone, gct_old)
1082 END IF
1083 ELSE
1084 CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1085 END IF
1086 line_search_count = 0
1087 END IF
1088
1089 line_search_count = line_search_count + 1
1090 !WRITE(*,*) 'line_search_count = ', line_search_count
1091 energy(line_search_count) = spread_sum
1092
1093 ! gold line search
1094 new_direction = .false.
1095 IF (line_search_count .EQ. 1) THEN
1096 lsl = 1
1097 lsr = 0
1098 lsm = 1
1099 pos(1) = 0.0_dp
1100 pos(2) = ds_min/gold_sec
1101 ds = pos(2)
1102 ELSE
1103 IF (line_search_count .EQ. 50) THEN
1104 IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp) THEN
1105 cpwarn("Line search failed to converge properly")
1106 ds_min = 0.1_dp
1107 new_direction = .true.
1108 ds = pos(line_search_count)
1109 line_search_count = 0
1110 ELSE
1111 cpabort("No. of line searches exceeds 50")
1112 END IF
1113 ELSE
1114 IF (lsr .EQ. 0) THEN
1115 IF (energy(line_search_count - 1) .GT. energy(line_search_count)) THEN
1116 lsr = line_search_count
1117 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1118 ELSE
1119 lsl = lsm
1120 lsm = line_search_count
1121 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1122 END IF
1123 ELSE
1124 IF (pos(line_search_count) .LT. pos(lsm)) THEN
1125 IF (energy(line_search_count) .GT. energy(lsm)) THEN
1126 lsr = lsm
1127 lsm = line_search_count
1128 ELSE
1129 lsl = line_search_count
1130 END IF
1131 ELSE
1132 IF (energy(line_search_count) .GT. energy(lsm)) THEN
1133 lsl = lsm
1134 lsm = line_search_count
1135 ELSE
1136 lsr = line_search_count
1137 END IF
1138 END IF
1139 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
1140 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1141 ELSE
1142 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1143 END IF
1144 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr)) THEN
1145 new_direction = .true.
1146 END IF
1147 END IF ! lsr .eq. 0
1148 END IF ! line_search_count .eq. 50
1149 ! now go to the suggested point
1150 ds = pos(line_search_count + 1) - pos(line_search_count)
1151 !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1152 IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1)) THEN
1153 new_direction = .true.
1154 ds_min = 0.5_dp/alpha
1155 ELSEIF (abs(ds) > 10.0_dp) THEN
1156 new_direction = .true.
1157 ds_min = 0.5_dp/alpha
1158 ELSE
1159 ds_min = pos(line_search_count + 1)
1160 END IF
1161 END IF ! first step
1162 ! 'c_tilde = c_tilde + d*skc'
1163 CALL cp_cfm_scale(ds, skc)
1164 CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1165 ELSE
1166 IF (mintol > tol) THEN
1167 mintol = tol
1168 miniter = iter
1169 END IF
1170 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1171 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1172 avg_spread_ii = sum_spread_ii/nstate
1173 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1174 "Iteration", "Avg. Spread_ii", "Tolerance"
1175 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1176 iter, avg_spread_ii, tol
1177 CALL m_flush(unit_nr)
1178 END IF
1179 IF (tol < eps_localization) EXIT
1180 END IF ! nextra > 0
1181
1182 END DO ! iteration
1183
1184 IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1185 WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1186 WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1187 CALL m_flush(unit_nr)
1188 END IF
1189
1190 CALL cp_cfm_to_fm(u, matrix_u)
1191
1192 IF (nextra > 0) THEN
1193 rmat%local_data = real(v%local_data, kind=dp)
1194 CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1195
1196 CALL cp_cfm_release(c_tilde)
1197 CALL cp_cfm_release(grad_ctilde)
1198 CALL cp_cfm_release(gct_old)
1199 CALL cp_cfm_release(skc)
1200 CALL cp_cfm_release(ul)
1201 CALL cp_cfm_release(zdiag)
1202 CALL cp_cfm_release(ctrans_lambda)
1203 CALL cp_fm_release(id_nextra)
1204 CALL cp_fm_release(vectors_all)
1205 CALL cp_cfm_release(v)
1206 CALL cp_fm_release(matrix_v)
1207 CALL cp_fm_release(matrix_v_all)
1208 CALL cp_cfm_release(vl)
1209 DEALLOCATE (arr_zii)
1210 ELSE
1211 rmat%local_data = matrix_u%local_data
1212 CALL rotate_orbitals(rmat, vectors)
1213 END IF
1214 DO idim = 1, dim2
1215 CALL cp_cfm_release(zij_0(idim))
1216 END DO
1217 DEALLOCATE (zij_0)
1218
1219 DO idim = 1, dim2
1220 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
1221 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1222 CALL cp_cfm_release(c_zij(idim))
1223 END DO
1224 DEALLOCATE (c_zij)
1225 CALL cp_fm_release(rmat)
1226 CALL cp_cfm_release(u)
1227 CALL cp_fm_release(matrix_u)
1228 DEALLOCATE (matrix_zii, sum_spread)
1229
1230 CALL timestop(handle)
1231
1232 END SUBROUTINE jacobi_cg_edf_ls
1233
1234! **************************************************************************************************
1235!> \brief ...
1236!> \param vmatrix ...
1237! **************************************************************************************************
1238 SUBROUTINE ortho_vectors(vmatrix)
1239
1240 TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1241
1242 CHARACTER(LEN=*), PARAMETER :: routinen = 'ortho_vectors'
1243
1244 INTEGER :: handle, n, ncol
1245 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1246 TYPE(cp_fm_type) :: overlap_vv
1247
1248 CALL timeset(routinen, handle)
1249
1250 NULLIFY (fm_struct_tmp)
1251
1252 CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1253
1254 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1255 para_env=vmatrix%matrix_struct%para_env, &
1256 context=vmatrix%matrix_struct%context)
1257 CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1258 CALL cp_fm_struct_release(fm_struct_tmp)
1259
1260 CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1261 CALL cp_fm_cholesky_decompose(overlap_vv)
1262 CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.true.)
1263
1264 CALL cp_fm_release(overlap_vv)
1265
1266 CALL timestop(handle)
1267
1268 END SUBROUTINE ortho_vectors
1269
1270! **************************************************************************************************
1271!> \brief ...
1272!> \param istate ...
1273!> \param jstate ...
1274!> \param st ...
1275!> \param ct ...
1276!> \param zij ...
1277! **************************************************************************************************
1278 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1279 INTEGER, INTENT(IN) :: istate, jstate
1280 REAL(kind=dp), INTENT(IN) :: st, ct
1281 TYPE(cp_cfm_type) :: zij(:)
1282
1283 INTEGER :: id
1284
1285! Locals
1286
1287 DO id = 1, SIZE(zij, 1)
1288 CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1289 CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1290 END DO
1291
1292 END SUBROUTINE rotate_zij
1293! **************************************************************************************************
1294!> \brief ...
1295!> \param istate ...
1296!> \param jstate ...
1297!> \param st ...
1298!> \param ct ...
1299!> \param rmat ...
1300! **************************************************************************************************
1301 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1302 INTEGER, INTENT(IN) :: istate, jstate
1303 REAL(kind=dp), INTENT(IN) :: st, ct
1304 TYPE(cp_cfm_type), INTENT(IN) :: rmat
1305
1306 CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1307
1308 END SUBROUTINE rotate_rmat
1309! **************************************************************************************************
1310!> \brief ...
1311!> \param mii ...
1312!> \param mjj ...
1313!> \param mij ...
1314!> \param weights ...
1315!> \param theta ...
1316!> \param grad_ij ...
1317!> \param step ...
1318! **************************************************************************************************
1319 SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1320 COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1321 REAL(kind=dp), INTENT(IN) :: weights(:)
1322 REAL(kind=dp), INTENT(OUT) :: theta
1323 REAL(kind=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1324
1325 COMPLEX(KIND=dp) :: z11, z12, z22
1326 INTEGER :: dim_m, idim
1327 REAL(kind=dp) :: a12, b12, d2, ratio
1328
1329 a12 = 0.0_dp
1330 b12 = 0.0_dp
1331 dim_m = SIZE(mii)
1332 DO idim = 1, dim_m
1333 z11 = mii(idim)
1334 z22 = mjj(idim)
1335 z12 = mij(idim)
1336 a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=dp)
1337 b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1338 0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=dp)
1339 END DO
1340 IF (abs(b12) > 1.e-10_dp) THEN
1341 ratio = -a12/b12
1342 theta = 0.25_dp*atan(ratio)
1343 ELSEIF (abs(b12) < 1.e-10_dp) THEN
1344 b12 = 0.0_dp
1345 theta = 0.0_dp
1346 ELSE
1347 theta = 0.25_dp*pi
1348 END IF
1349 IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1350! Check second derivative info
1351 d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1352 IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1353 IF (theta > 0.0_dp) THEN ! make theta as small as possible
1354 theta = theta - 0.25_dp*pi
1355 ELSE
1356 theta = theta + 0.25_dp*pi
1357 END IF
1358 END IF
1359 END SUBROUTINE get_angle
1360! **************************************************************************************************
1361!> \brief ...
1362!> \param zij ...
1363!> \param weights ...
1364!> \param tolerance ...
1365!> \param grad ...
1366! **************************************************************************************************
1367 SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1368 TYPE(cp_cfm_type) :: zij(:)
1369 REAL(kind=dp), INTENT(IN) :: weights(:)
1370 REAL(kind=dp), INTENT(OUT) :: tolerance
1371 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1372
1373 CHARACTER(len=*), PARAMETER :: routinen = 'check_tolerance'
1374
1375 INTEGER :: handle
1376 TYPE(cp_fm_type) :: force
1377
1378 CALL timeset(routinen, handle)
1379
1380! compute gradient at t=0
1381
1382 CALL cp_fm_create(force, zij(1)%matrix_struct)
1383 CALL cp_fm_set_all(force, 0._dp)
1384 CALL grad_at_0(zij, weights, force)
1385 CALL cp_fm_maxabsval(force, tolerance)
1386 IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1387 CALL cp_fm_release(force)
1388
1389 CALL timestop(handle)
1390
1391 END SUBROUTINE check_tolerance
1392! **************************************************************************************************
1393!> \brief ...
1394!> \param rmat ...
1395!> \param vectors ...
1396! **************************************************************************************************
1397 SUBROUTINE rotate_orbitals(rmat, vectors)
1398 TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1399
1400 INTEGER :: k, n
1401 TYPE(cp_fm_type) :: wf
1402
1403 CALL cp_fm_create(wf, vectors%matrix_struct)
1404 CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1405 CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1406 CALL cp_fm_to_fm(wf, vectors)
1407 CALL cp_fm_release(wf)
1408 END SUBROUTINE rotate_orbitals
1409! **************************************************************************************************
1410!> \brief ...
1411!> \param rmat ...
1412!> \param vec_all ...
1413!> \param vectors ...
1414! **************************************************************************************************
1415 SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1416 TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1417
1418 INTEGER :: k, l, n
1419 TYPE(cp_fm_type) :: wf
1420
1421 CALL cp_fm_create(wf, vectors%matrix_struct)
1422 CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1423 CALL cp_fm_get_info(rmat, ncol_global=l)
1424
1425 CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1426 CALL cp_fm_to_fm(wf, vectors)
1427 CALL cp_fm_release(wf)
1428 END SUBROUTINE rotate_orbitals_edf
1429! **************************************************************************************************
1430!> \brief ...
1431!> \param diag ...
1432!> \param weights ...
1433!> \param matrix ...
1434!> \param ndim ...
1435! **************************************************************************************************
1436 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1437 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1438 REAL(kind=dp), INTENT(IN) :: weights(:)
1439 TYPE(cp_fm_type), INTENT(IN) :: matrix
1440 INTEGER, INTENT(IN) :: ndim
1441
1442 COMPLEX(KIND=dp) :: zii, zjj
1443 INTEGER :: idim, istate, jstate, ncol_local, &
1444 nrow_global, nrow_local
1445 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1446 REAL(kind=dp) :: gradsq_ij
1447
1448 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1449 ncol_local=ncol_local, nrow_global=nrow_global, &
1450 row_indices=row_indices, col_indices=col_indices)
1451
1452 DO istate = 1, nrow_local
1453 DO jstate = 1, ncol_local
1454! get real and imaginary parts
1455 gradsq_ij = 0.0_dp
1456 DO idim = 1, ndim
1457 zii = diag(row_indices(istate), idim)
1458 zjj = diag(col_indices(jstate), idim)
1459 gradsq_ij = gradsq_ij + weights(idim)* &
1460 4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=dp)
1461 END DO
1462 matrix%local_data(istate, jstate) = gradsq_ij
1463 END DO
1464 END DO
1465 END SUBROUTINE gradsq_at_0
1466! **************************************************************************************************
1467!> \brief ...
1468!> \param matrix_p ...
1469!> \param weights ...
1470!> \param matrix ...
1471! **************************************************************************************************
1472 SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1473 TYPE(cp_cfm_type) :: matrix_p(:)
1474 REAL(kind=dp), INTENT(IN) :: weights(:)
1475 TYPE(cp_fm_type), INTENT(IN) :: matrix
1476
1477 COMPLEX(KIND=dp) :: zii, zij, zjj
1478 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1479 INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1480 nrow_global, nrow_local
1481 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1482 REAL(kind=dp) :: grad_ij
1483
1484 NULLIFY (diag)
1485 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1486 ncol_local=ncol_local, nrow_global=nrow_global, &
1487 row_indices=row_indices, col_indices=col_indices)
1488 dim_m = SIZE(matrix_p, 1)
1489 ALLOCATE (diag(nrow_global, dim_m))
1490
1491 DO idim = 1, dim_m
1492 DO istate = 1, nrow_global
1493 CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1494 END DO
1495 END DO
1496
1497 DO istate = 1, nrow_local
1498 DO jstate = 1, ncol_local
1499! get real and imaginary parts
1500 grad_ij = 0.0_dp
1501 DO idim = 1, dim_m
1502 zii = diag(row_indices(istate), idim)
1503 zjj = diag(col_indices(jstate), idim)
1504 zij = matrix_p(idim)%local_data(istate, jstate)
1505 grad_ij = grad_ij + weights(idim)* &
1506 REAL(4.0_dp*conjg(zij)*(zjj - zii), dp)
1507 END DO
1508 matrix%local_data(istate, jstate) = grad_ij
1509 END DO
1510 END DO
1511 DEALLOCATE (diag)
1512 END SUBROUTINE grad_at_0
1513
1514! return energy and maximum gradient in the current point
1515! **************************************************************************************************
1516!> \brief ...
1517!> \param weights ...
1518!> \param zij ...
1519!> \param tolerance ...
1520!> \param value ...
1521! **************************************************************************************************
1522 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1523 REAL(kind=dp), INTENT(IN) :: weights(:)
1524 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1525 REAL(kind=dp) :: tolerance, value
1526
1527 COMPLEX(KIND=dp) :: kii, kij, kjj
1528 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1529 INTEGER :: idim, istate, jstate, ncol_local, &
1530 nrow_global, nrow_local
1531 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1532 REAL(kind=dp) :: grad_ij, ra, rb
1533
1534 NULLIFY (diag)
1535 CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1536 ncol_local=ncol_local, nrow_global=nrow_global, &
1537 row_indices=row_indices, col_indices=col_indices)
1538 ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1539 value = 0.0_dp
1540 DO idim = 1, SIZE(zij, 2)
1541 DO istate = 1, nrow_global
1542 CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1543 CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1544 diag(istate, idim) = cmplx(ra, rb, dp)
1545 value = value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1546 END DO
1547 END DO
1548 tolerance = 0.0_dp
1549 DO istate = 1, nrow_local
1550 DO jstate = 1, ncol_local
1551 grad_ij = 0.0_dp
1552 DO idim = 1, SIZE(zij, 2)
1553 kii = diag(row_indices(istate), idim)
1554 kjj = diag(col_indices(jstate), idim)
1555 ra = zij(1, idim)%local_data(istate, jstate)
1556 rb = zij(2, idim)%local_data(istate, jstate)
1557 kij = cmplx(ra, rb, dp)
1558 grad_ij = grad_ij + weights(idim)* &
1559 REAL(4.0_dp*conjg(kij)*(kjj - kii), dp)
1560 END DO
1561 tolerance = max(abs(grad_ij), tolerance)
1562 END DO
1563 END DO
1564 CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1565
1566 DEALLOCATE (diag)
1567
1568 END SUBROUTINE check_tolerance_new
1569
1570! **************************************************************************************************
1571!> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1572!> and rotates them all at the same time (hoping for the best of course)
1573!> \param weights ...
1574!> \param zij ...
1575!> \param vectors ...
1576!> \param max_iter ...
1577!> \param max_crazy_angle ...
1578!> \param crazy_scale ...
1579!> \param crazy_use_diag ...
1580!> \param eps_localization ...
1581!> \param iterations ...
1582!> \param converged ...
1583! **************************************************************************************************
1584 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1585 eps_localization, iterations, converged)
1586 REAL(kind=dp), INTENT(IN) :: weights(:)
1587 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1588 INTEGER, INTENT(IN) :: max_iter
1589 REAL(kind=dp), INTENT(IN) :: max_crazy_angle
1590 REAL(kind=dp) :: crazy_scale
1591 LOGICAL :: crazy_use_diag
1592 REAL(kind=dp), INTENT(IN) :: eps_localization
1593 INTEGER :: iterations
1594 LOGICAL, INTENT(out), OPTIONAL :: converged
1595
1596 CHARACTER(len=*), PARAMETER :: routinen = 'crazy_rotations'
1597 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1598 czero = (0.0_dp, 0.0_dp)
1599
1600 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1601 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1602 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1603 INTEGER :: dim2, handle, i, icol, idim, irow, &
1604 method, ncol_global, ncol_local, &
1605 norder, nrow_global, nrow_local, &
1606 nsquare, unit_nr
1607 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1608 LOGICAL :: do_emd
1609 REAL(kind=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1610 norm, ra, rb, theta, tolerance, value
1611 REAL(kind=dp), DIMENSION(:), POINTER :: evals
1612 TYPE(cp_cfm_type) :: cmat_a, cmat_r, cmat_t1
1613 TYPE(cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1614
1615 CALL timeset(routinen, handle)
1616 NULLIFY (row_indices, col_indices)
1617 CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1618 ncol_global=ncol_global, &
1619 row_indices=row_indices, col_indices=col_indices, &
1620 nrow_local=nrow_local, ncol_local=ncol_local)
1621
1622 limit_crazy_angle = max_crazy_angle
1623
1624 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1625 dim2 = SIZE(zij, 2)
1626 ALLOCATE (diag_z(nrow_global, dim2))
1627 ALLOCATE (evals(nrow_global))
1628 ALLOCATE (evals_exp(nrow_global))
1629
1630 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1631 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1632 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1633
1634 CALL cp_fm_create(mat_u, zij(1, 1)%matrix_struct)
1635 CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1636 CALL cp_fm_create(mat_r, zij(1, 1)%matrix_struct)
1637
1638 CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1639
1640 CALL cp_fm_set_all(mat_r, 0.0_dp, 1.0_dp)
1641 CALL cp_fm_set_all(mat_t, 0.0_dp)
1642 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1643 DO idim = 1, dim2
1644 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1645 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1646 END DO
1647 CALL cp_fm_syevd(mat_t, mat_u, evals)
1648 DO idim = 1, dim2
1649 ! rotate z's
1650 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1651 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1652 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1653 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1654 END DO
1655 ! collect rotation matrix
1656 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1657 CALL cp_fm_to_fm(mat_t, mat_r)
1658
1659 unit_nr = -1
1660 IF (cmat_a%matrix_struct%para_env%is_source()) THEN
1662 WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1663 "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1664 END IF
1665
1666 iterations = 0
1667 tolerance = 1.0_dp
1668
1669 DO
1670 iterations = iterations + 1
1671 DO idim = 1, dim2
1672 DO i = 1, nrow_global
1673 CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1674 CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1675 diag_z(i, idim) = cmplx(ra, rb, dp)
1676 END DO
1677 END DO
1678 DO irow = 1, nrow_local
1679 DO icol = 1, ncol_local
1680 DO idim = 1, dim2
1681 ra = zij(1, idim)%local_data(irow, icol)
1682 rb = zij(2, idim)%local_data(irow, icol)
1683 mij(idim) = cmplx(ra, rb, dp)
1684 mii(idim) = diag_z(row_indices(irow), idim)
1685 mjj(idim) = diag_z(col_indices(icol), idim)
1686 END DO
1687 IF (row_indices(irow) .NE. col_indices(icol)) THEN
1688 CALL get_angle(mii, mjj, mij, weights, theta)
1689 theta = crazy_scale*theta
1690 IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
1691 IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
1692 IF (crazy_use_diag) THEN
1693 cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta, dp)
1694 ELSE
1695 mat_theta%local_data(irow, icol) = -theta
1696 END IF
1697 ELSE
1698 IF (crazy_use_diag) THEN
1699 cmat_a%local_data(irow, icol) = czero
1700 ELSE
1701 mat_theta%local_data(irow, icol) = 0.0_dp
1702 END IF
1703 END IF
1704 END DO
1705 END DO
1706
1707 ! construct rotation matrix U based on A using diagonalization
1708 ! alternatively, exp based on repeated squaring could be faster
1709 IF (crazy_use_diag) THEN
1710 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1711 maxeval = maxval(abs(evals))
1712 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1713 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1714 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1715 CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1716 cmat_t1, cmat_r, czero, cmat_a)
1717 mat_u%local_data = real(cmat_a%local_data, kind=dp) ! U is a real matrix
1718 ELSE
1719 do_emd = .false.
1720 method = 2
1721 eps_exp = 1.0_dp*epsilon(eps_exp)
1722 CALL cp_fm_maxabsrownorm(mat_theta, norm)
1723 maxeval = norm ! an upper bound
1724 CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1725 CALL exp_pade_real(mat_u, mat_theta, nsquare, norder)
1726 END IF
1727
1728 DO idim = 1, dim2
1729 ! rotate z's
1730 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1731 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1732 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1733 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1734 END DO
1735 ! collect rotation matrix
1736 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1737 CALL cp_fm_to_fm(mat_t, mat_r)
1738
1739 CALL check_tolerance_new(weights, zij, tolerance, value)
1740
1741 IF (unit_nr > 0) THEN
1742 WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1743 "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1744 CALL m_flush(unit_nr)
1745 END IF
1746 IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter) EXIT
1747 END DO
1748
1749 IF (PRESENT(converged)) converged = (tolerance .LT. eps_localization)
1750
1751 CALL cp_cfm_release(cmat_a)
1752 CALL cp_cfm_release(cmat_r)
1753 CALL cp_cfm_release(cmat_t1)
1754
1755 CALL cp_fm_release(mat_u)
1756 CALL cp_fm_release(mat_t)
1757 CALL cp_fm_release(mat_theta)
1758
1759 CALL rotate_orbitals(mat_r, vectors)
1760
1761 CALL cp_fm_release(mat_r)
1762 DEALLOCATE (evals_exp, evals, diag_z)
1763 DEALLOCATE (mii, mij, mjj)
1764
1765 CALL timestop(handle)
1766
1767 END SUBROUTINE crazy_rotations
1768
1769! **************************************************************************************************
1770!> \brief use the exponential parametrization as described in to perform a direct mini
1771!> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1772!> none of the input is modified for the time being, just finds the rotations
1773!> that minimizes, and throws it away afterwards :-)
1774!> apart from being expensive and not cleaned, this works fine
1775!> useful to try different spread functionals
1776!> \param weights ...
1777!> \param zij ...
1778!> \param vectors ...
1779!> \param max_iter ...
1780!> \param eps_localization ...
1781!> \param iterations ...
1782! **************************************************************************************************
1783 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1784 REAL(kind=dp), INTENT(IN) :: weights(:)
1785 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1786 INTEGER, INTENT(IN) :: max_iter
1787 REAL(kind=dp), INTENT(IN) :: eps_localization
1788 INTEGER :: iterations
1789
1790 CHARACTER(len=*), PARAMETER :: routinen = 'direct_mini'
1791 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1792 czero = (0.0_dp, 0.0_dp)
1793 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
1794
1795 COMPLEX(KIND=dp) :: lk, ll, tmp
1796 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1797 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1798 INTEGER :: handle, i, icol, idim, irow, &
1799 line_search_count, line_searches, lsl, &
1800 lsm, lsr, n, ncol_local, ndim, &
1801 nrow_local, output_unit
1802 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1803 LOGICAL :: new_direction
1804 REAL(kind=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1805 fb, fc, nom, normg, normg_cross, &
1806 normg_old, npos, omega, tol, val, x0, &
1807 x1, xa, xb, xc
1808 REAL(kind=dp), DIMENSION(150) :: energy, grad, pos
1809 REAL(kind=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1810 TYPE(cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1811 cmat_t2, cmat_u
1812 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1813 TYPE(cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1814 matrix_g_search, matrix_h, matrix_r, &
1815 matrix_t
1816
1817 NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1818
1819 CALL timeset(routinen, handle)
1820 output_unit = cp_logger_get_default_io_unit()
1821
1822 n = zij(1, 1)%matrix_struct%nrow_global
1823 ndim = (SIZE(zij, 2))
1824
1825 IF (output_unit > 0) THEN
1826 WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1827 WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1828 END IF
1829
1830 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1831 ALLOCATE (c_zij(ndim))
1832
1833 ! create the three complex matrices Z
1834 DO idim = 1, ndim
1835 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1836 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1837 zij(2, idim)%local_data, dp)
1838 END DO
1839
1840 CALL cp_fm_create(matrix_a, zij(1, 1)%matrix_struct)
1841 CALL cp_fm_create(matrix_g, zij(1, 1)%matrix_struct)
1842 CALL cp_fm_create(matrix_t, zij(1, 1)%matrix_struct)
1843 CALL cp_fm_create(matrix_h, zij(1, 1)%matrix_struct)
1844 CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1845 CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1846 CALL cp_fm_create(matrix_r, zij(1, 1)%matrix_struct)
1847 CALL cp_fm_set_all(matrix_r, 0.0_dp, 1.0_dp)
1848
1849 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1850! CALL cp_fm_init_random ( matrix_A )
1851
1852 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1853 CALL cp_cfm_create(cmat_u, zij(1, 1)%matrix_struct)
1854 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1855 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1856 CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1857 CALL cp_cfm_create(cmat_b, zij(1, 1)%matrix_struct)
1858 CALL cp_cfm_create(cmat_m, zij(1, 1)%matrix_struct)
1859
1860 CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1861 row_indices=row_indices, col_indices=col_indices)
1862
1863 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1864 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1865 normg_old = 1.0e30_dp
1866 ds_min = 1.0_dp
1867 new_direction = .true.
1868 iterations = 0
1869 line_searches = 0
1870 line_search_count = 0
1871 DO
1872 iterations = iterations + 1
1873 ! compute U,R,evals given A
1874 cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data, dp) ! cmat_A is hermitian, evals are reals
1875 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1876 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1877 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1878 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1879 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1880 cmat_u%local_data = real(cmat_u%local_data, kind=dp) ! enforce numerics, U is a real matrix
1881
1882 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5) THEN ! reset with A .eq. 0
1883 DO idim = 1, ndim
1884 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1885 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1886 END DO
1887 ! collect rotation matrix
1888 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
1889 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1890 CALL cp_fm_to_fm(matrix_t, matrix_r)
1891
1892 CALL cp_cfm_set_all(cmat_u, czero, cone)
1893 CALL cp_cfm_set_all(cmat_r, czero, cone)
1894 CALL cp_cfm_set_all(cmat_a, czero)
1895 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1896 evals(:) = 0.0_dp
1897 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1898 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1899 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1900 normg_old = 1.0e30_dp
1901 END IF
1902
1903 ! compute Omega and M
1904 CALL cp_cfm_set_all(cmat_m, czero)
1905 omega = 0.0_dp
1906 DO idim = 1, ndim
1907 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1) ! t1=ZU
1908 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1909 DO i = 1, n
1910 CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1911 SELECT CASE (2) ! allows for selection of different spread functionals
1912 CASE (1)
1913 fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1914 fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1915 CASE (2) ! corresponds to the jacobi setup
1916 fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1917 fvald(i) = -weights(idim)
1918 END SELECT
1919 omega = omega + fval(i)
1920 END DO
1921 DO icol = 1, ncol_local
1922 DO irow = 1, nrow_local
1923 tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1924 cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1925 + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=dp)
1926 END DO
1927 END DO
1928 END DO
1929
1930 ! compute Hessian diagonal approximation for the preconditioner
1931 IF (.true.) THEN
1932 CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1933 ELSE
1934 CALL cp_fm_set_all(matrix_h, 1.0_dp)
1935 END IF
1936
1937 ! compute B
1938 DO icol = 1, ncol_local
1939 DO irow = 1, nrow_local
1940 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1941 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1942 IF (abs(ll - lk) .LT. 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1943 tmp = 1.0_dp
1944 cmat_b%local_data(irow, icol) = 0.0_dp
1945 DO i = 1, 16
1946 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1947 tmp = tmp*(ll - lk)/(i + 1)
1948 END DO
1949 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1950 ELSE
1951 cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1952 END IF
1953 END DO
1954 END DO
1955 ! compute gradient matrix_G
1956
1957 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1) ! t1=(M^T)(R^T)
1958 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1959 CALL cp_cfm_schur_product(cmat_t2, cmat_b, cmat_t1)
1960 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1961 CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1962 matrix_g%local_data = real(cmat_t1%local_data, kind=dp)
1963 CALL cp_fm_transpose(matrix_g, matrix_t)
1964 CALL cp_fm_scale_and_add(-1.0_dp, matrix_g, 1.0_dp, matrix_t)
1965 CALL cp_fm_maxabsval(matrix_g, tol)
1966
1967 ! from here on, minimizing technology
1968 IF (new_direction) THEN
1969 ! energy converged up to machine precision ?
1970 line_searches = line_searches + 1
1971 ! DO i=1,line_search_count
1972 ! write(15,*) pos(i),energy(i)
1973 ! ENDDO
1974 ! write(15,*) ""
1975 ! CALL m_flush(15)
1976 !write(16,*) evals(:)
1977 !write(17,*) matrix_A%local_data(:,:)
1978 !write(18,*) matrix_G%local_data(:,:)
1979 IF (output_unit > 0) THEN
1980 WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
1981 CALL m_flush(output_unit)
1982 END IF
1983 IF (tol < eps_localization .OR. iterations > max_iter) EXIT
1984
1985 IF (.true.) THEN ! do conjugate gradient CG
1986 CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
1987 normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
1988 ! apply the preconditioner
1989 DO icol = 1, ncol_local
1990 DO irow = 1, nrow_local
1991 matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
1992 END DO
1993 END DO
1994 CALL cp_fm_trace(matrix_g, matrix_g_old, normg)
1995 normg = normg*0.5_dp
1996 beta_pr = (normg - normg_cross)/normg_old
1997 normg_old = normg
1998 beta_pr = max(beta_pr, 0.0_dp)
1999 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2000 CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2001 IF (normg_cross .GE. 0) THEN ! back to SD
2002 IF (matrix_a%matrix_struct%para_env%is_source()) THEN
2003 WRITE (cp_logger_get_default_unit_nr(), *) "!"
2004 END IF
2005 beta_pr = 0.0_dp
2006 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2007 END IF
2008 ELSE ! SD
2009 CALL cp_fm_scale_and_add(0.0_dp, matrix_g_search, -1.0_dp, matrix_g)
2010 END IF
2011 ! ds_min=1.0E-4_dp
2012 line_search_count = 0
2013 END IF
2014 line_search_count = line_search_count + 1
2015 energy(line_search_count) = omega
2016
2017 ! line search section
2018 SELECT CASE (3)
2019 CASE (1) ! two point line search
2020 SELECT CASE (line_search_count)
2021 CASE (1)
2022 pos(1) = 0.0_dp
2023 pos(2) = ds_min
2024 CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2025 grad(1) = grad(1)/2.0_dp
2026 new_direction = .false.
2027 CASE (2)
2028 new_direction = .true.
2029 x0 = pos(1) ! 0.0_dp
2030 c = energy(1)
2031 b = grad(1)
2032 x1 = pos(2)
2033 a = (energy(2) - b*x1 - c)/(x1**2)
2034 IF (a .LE. 0.0_dp) a = 1.0e-15_dp
2035 npos = -b/(2.0_dp*a)
2036 val = a*npos**2 + b*npos + c
2037 IF (val .LT. energy(1) .AND. val .LE. energy(2)) THEN
2038 ! we go to a minimum, but ...
2039 ! we take a guard against too large steps
2040 pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2041 ELSE ! just take an extended step
2042 pos(3) = maxval(pos(1:2))*2.0_dp
2043 END IF
2044 END SELECT
2045 CASE (2) ! 3 point line search
2046 SELECT CASE (line_search_count)
2047 CASE (1)
2048 new_direction = .false.
2049 pos(1) = 0.0_dp
2050 pos(2) = ds_min*0.8_dp
2051 CASE (2)
2052 new_direction = .false.
2053 IF (energy(2) .GT. energy(1)) THEN
2054 pos(3) = ds_min*0.7_dp
2055 ELSE
2056 pos(3) = ds_min*1.4_dp
2057 END IF
2058 CASE (3)
2059 new_direction = .true.
2060 xa = pos(1)
2061 xb = pos(2)
2062 xc = pos(3)
2063 fa = energy(1)
2064 fb = energy(2)
2065 fc = energy(3)
2066 nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2067 denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2068 IF (abs(denom) .LE. 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa))) THEN
2069 npos = xb
2070 ELSE
2071 npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2072 END IF
2073 val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2074 (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2075 (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2076 IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc) THEN ! OK, we go to a minimum
2077 ! we take a guard against too large steps
2078 pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2079 min(npos, maxval(pos(1:3))*4.0_dp))
2080 ELSE ! just take an extended step
2081 pos(4) = maxval(pos(1:3))*2.0_dp
2082 END IF
2083 END SELECT
2084 CASE (3) ! golden section hunt
2085 new_direction = .false.
2086 IF (line_search_count .EQ. 1) THEN
2087 lsl = 1
2088 lsr = 0
2089 lsm = 1
2090 pos(1) = 0.0_dp
2091 pos(2) = ds_min/gold_sec
2092 ELSE
2093 IF (line_search_count .EQ. 150) cpabort("Too many")
2094 IF (lsr .EQ. 0) THEN
2095 IF (energy(line_search_count - 1) .LT. energy(line_search_count)) THEN
2096 lsr = line_search_count
2097 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2098 ELSE
2099 lsl = lsm
2100 lsm = line_search_count
2101 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2102 END IF
2103 ELSE
2104 IF (pos(line_search_count) .LT. pos(lsm)) THEN
2105 IF (energy(line_search_count) .LT. energy(lsm)) THEN
2106 lsr = lsm
2107 lsm = line_search_count
2108 ELSE
2109 lsl = line_search_count
2110 END IF
2111 ELSE
2112 IF (energy(line_search_count) .LT. energy(lsm)) THEN
2113 lsl = lsm
2114 lsm = line_search_count
2115 ELSE
2116 lsr = line_search_count
2117 END IF
2118 END IF
2119 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
2120 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2121 ELSE
2122 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2123 END IF
2124 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr)) THEN
2125 new_direction = .true.
2126 END IF
2127 END IF ! lsr .eq. 0
2128 END IF ! first step
2129 END SELECT
2130 ! now go to the suggested point
2131 ds_min = pos(line_search_count + 1)
2132 ds = pos(line_search_count + 1) - pos(line_search_count)
2133 CALL cp_fm_scale_and_add(1.0_dp, matrix_a, ds, matrix_g_search)
2134 END DO
2135
2136 ! collect rotation matrix
2137 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
2138 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2139 CALL cp_fm_to_fm(matrix_t, matrix_r)
2140 CALL rotate_orbitals(matrix_r, vectors)
2141 CALL cp_fm_release(matrix_r)
2142
2143 CALL cp_fm_release(matrix_a)
2144 CALL cp_fm_release(matrix_g)
2145 CALL cp_fm_release(matrix_h)
2146 CALL cp_fm_release(matrix_t)
2147 CALL cp_fm_release(matrix_g_search)
2148 CALL cp_fm_release(matrix_g_old)
2149 CALL cp_cfm_release(cmat_a)
2150 CALL cp_cfm_release(cmat_u)
2151 CALL cp_cfm_release(cmat_r)
2152 CALL cp_cfm_release(cmat_t1)
2153 CALL cp_cfm_release(cmat_t2)
2154 CALL cp_cfm_release(cmat_b)
2155 CALL cp_cfm_release(cmat_m)
2156
2157 DEALLOCATE (evals, evals_exp, fval, fvald)
2158
2159 DO idim = 1, SIZE(c_zij)
2160 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
2161 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2162 CALL cp_cfm_release(c_zij(idim))
2163 END DO
2164 DEALLOCATE (c_zij)
2165 DEALLOCATE (diag_z)
2166
2167 CALL timestop(handle)
2168
2169 END SUBROUTINE
2170
2171! **************************************************************************************************
2172!> \brief Parallel algorithm for jacobi rotations
2173!> \param weights ...
2174!> \param zij ...
2175!> \param vectors ...
2176!> \param para_env ...
2177!> \param max_iter ...
2178!> \param eps_localization ...
2179!> \param sweeps ...
2180!> \param out_each ...
2181!> \param target_time ...
2182!> \param start_time ...
2183!> \param restricted ...
2184!> \par History
2185!> use allgather for improved performance
2186!> \author MI (11.2009)
2187! **************************************************************************************************
2188 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2189 sweeps, out_each, target_time, start_time, restricted)
2190
2191 REAL(kind=dp), INTENT(IN) :: weights(:)
2192 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2193 TYPE(mp_para_env_type), POINTER :: para_env
2194 INTEGER, INTENT(IN) :: max_iter
2195 REAL(kind=dp), INTENT(IN) :: eps_localization
2196 INTEGER :: sweeps
2197 INTEGER, INTENT(IN) :: out_each
2198 REAL(dp) :: target_time, start_time
2199 INTEGER :: restricted
2200
2201 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para'
2202
2203 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2204 nblock, nblock_max, ns_me, nstate, &
2205 output_unit
2206 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2207 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2208 REAL(kind=dp) :: xstate
2209 TYPE(cp_fm_type) :: rmat
2210 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2211
2212 CALL timeset(routinen, handle)
2213
2214 output_unit = cp_logger_get_default_io_unit()
2215
2216 NULLIFY (cz_ij_loc)
2217
2218 dim2 = SIZE(zij, 2)
2219
2220 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2221 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2222
2223 CALL cp_fm_get_info(rmat, nrow_global=nstate)
2224
2225 IF (restricted > 0) THEN
2226 IF (output_unit > 0) THEN
2227 WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2228 END IF
2229 nstate = nstate - restricted
2230 END IF
2231
2232 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2233 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2234 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2235 DO ip = 1, para_env%num_pe
2236 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2237 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2238 END DO
2239 nblock_max = 0
2240 DO ip = 0, para_env%num_pe - 1
2241 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2242 nblock_max = max(nblock_max, nblock)
2243 END DO
2244
2245 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2246 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2247 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2248 ALLOCATE (cz_ij_loc(dim2))
2249 DO idim = 1, dim2
2250 DO ip = 0, para_env%num_pe - 1
2251 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2252 CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2253 CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2254 IF (para_env%mepos == ip) THEN
2255 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2256 DO i = 1, nblock
2257 DO j = 1, nstate
2258 cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2259 END DO
2260 END DO
2261 END IF
2262 END DO ! ip
2263 END DO
2264 DEALLOCATE (z_ij_loc_re)
2265 DEALLOCATE (z_ij_loc_im)
2266
2267 ALLOCATE (rotmat(nstate, 2*nblock_max))
2268
2269 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2270 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2271 target_time=target_time, start_time=start_time)
2272
2273 ilow1 = ns_bound(para_env%mepos, 1)
2274 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2275 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2276 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2277 DO idim = 1, dim2
2278 DO ip = 0, para_env%num_pe - 1
2279 z_ij_loc_re = 0.0_dp
2280 z_ij_loc_im = 0.0_dp
2281 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2282 IF (ip == para_env%mepos) THEN
2283 ns_me = nblock
2284 DO i = 1, ns_me
2285 ii = ilow1 + i - 1
2286 DO j = 1, nstate
2287 z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i), dp)
2288 z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2289 END DO
2290 END DO
2291 END IF
2292 CALL para_env%bcast(z_ij_loc_re, ip)
2293 CALL para_env%bcast(z_ij_loc_im, ip)
2294 CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2295 CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2296 END DO ! ip
2297 END DO
2298
2299 DO ip = 0, para_env%num_pe - 1
2300 z_ij_loc_re = 0.0_dp
2301 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2302 IF (ip == para_env%mepos) THEN
2303 ns_me = nblock
2304 DO i = 1, ns_me
2305 ii = ilow1 + i - 1
2306 DO j = 1, nstate
2307 z_ij_loc_re(j, i) = rotmat(j, i)
2308 END DO
2309 END DO
2310 END IF
2311 CALL para_env%bcast(z_ij_loc_re, ip)
2312 CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2313 END DO
2314
2315 DEALLOCATE (z_ij_loc_re)
2316 DEALLOCATE (z_ij_loc_im)
2317 DO idim = 1, dim2
2318 DEALLOCATE (cz_ij_loc(idim)%c_array)
2319 END DO
2320 DEALLOCATE (cz_ij_loc)
2321
2322 CALL para_env%sync()
2323 CALL rotate_orbitals(rmat, vectors)
2324 CALL cp_fm_release(rmat)
2325
2326 DEALLOCATE (rotmat)
2327 DEALLOCATE (ns_bound)
2328
2329 CALL timestop(handle)
2330
2331 END SUBROUTINE jacobi_rot_para
2332
2333! **************************************************************************************************
2334!> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2335!> \param weights ...
2336!> \param czij ...
2337!> \param para_env ...
2338!> \param max_iter ...
2339!> \param rmat ...
2340!> \param tol_out ...
2341!> \author Soumya Ghosh (08/21)
2342! **************************************************************************************************
2343 SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2344
2345 REAL(kind=dp), INTENT(IN) :: weights(:)
2346 TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2347 TYPE(mp_para_env_type), POINTER :: para_env
2348 INTEGER, INTENT(IN) :: max_iter
2349 TYPE(cp_cfm_type), INTENT(IN) :: rmat
2350 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2351
2352 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para_1'
2353
2354 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2355 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2356 nblock, nblock_max, ns_me, nstate, &
2357 sweeps
2358 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2359 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2360 REAL(kind=dp) :: xstate
2361 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2362
2363 CALL timeset(routinen, handle)
2364
2365 dim2 = SIZE(czij)
2366
2367 CALL cp_cfm_set_all(rmat, cmplx(0._dp, 0._dp, dp), cmplx(1._dp, 0._dp, dp))
2368
2369 CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2370
2371 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2372 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2373 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2374 DO ip = 1, para_env%num_pe
2375 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2376 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2377 END DO
2378 nblock_max = 0
2379 DO ip = 0, para_env%num_pe - 1
2380 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2381 nblock_max = max(nblock_max, nblock)
2382 END DO
2383
2384 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2385 ALLOCATE (czij_array(nstate, nblock_max))
2386 ALLOCATE (cz_ij_loc(dim2))
2387 DO idim = 1, dim2
2388 DO ip = 0, para_env%num_pe - 1
2389 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2390 ! cfm --> allocatable
2391 CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2392 IF (para_env%mepos == ip) THEN
2393 ns_me = nblock
2394 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2395 DO i = 1, ns_me
2396 DO j = 1, nstate
2397 cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2398 END DO
2399 END DO
2400 END IF
2401 END DO ! ip
2402 END DO
2403 DEALLOCATE (czij_array)
2404
2405 ALLOCATE (rotmat(nstate, 2*nblock_max))
2406
2407 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2408 cz_ij_loc, rotmat, 0, tol_out=tol_out)
2409
2410 ilow1 = ns_bound(para_env%mepos, 1)
2411 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2412 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2413
2414 DO ip = 0, para_env%num_pe - 1
2415 z_ij_loc_re = 0.0_dp
2416 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2417 IF (ip == para_env%mepos) THEN
2418 ns_me = nblock
2419 DO i = 1, ns_me
2420 ii = ilow1 + i - 1
2421 DO j = 1, nstate
2422 z_ij_loc_re(j, i) = rotmat(j, i)
2423 END DO
2424 END DO
2425 END IF
2426 CALL para_env%bcast(z_ij_loc_re, ip)
2427 CALL cp_cfm_set_submatrix(rmat, cmplx(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2428 END DO
2429
2430 DEALLOCATE (z_ij_loc_re)
2431 DO idim = 1, dim2
2432 DEALLOCATE (cz_ij_loc(idim)%c_array)
2433 END DO
2434 DEALLOCATE (cz_ij_loc)
2435
2436 CALL para_env%sync()
2437
2438 DEALLOCATE (rotmat)
2439 DEALLOCATE (ns_bound)
2440
2441 CALL timestop(handle)
2442
2443 END SUBROUTINE jacobi_rot_para_1
2444
2445! **************************************************************************************************
2446!> \brief Parallel algorithm for jacobi rotations
2447!> \param weights ...
2448!> \param para_env ...
2449!> \param max_iter ...
2450!> \param sweeps ...
2451!> \param out_each ...
2452!> \param dim2 ...
2453!> \param nstate ...
2454!> \param nblock_max ...
2455!> \param ns_bound ...
2456!> \param cz_ij_loc ...
2457!> \param rotmat ...
2458!> \param output_unit ...
2459!> \param tol_out ...
2460!> \param eps_localization ...
2461!> \param target_time ...
2462!> \param start_time ...
2463!> \par History
2464!> split out to reuse with different input types
2465!> \author HF (05.2022)
2466! **************************************************************************************************
2467 SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2468 ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2469
2470 REAL(kind=dp), INTENT(IN) :: weights(:)
2471 TYPE(mp_para_env_type), POINTER :: para_env
2472 INTEGER, INTENT(IN) :: max_iter
2473 INTEGER, INTENT(OUT) :: sweeps
2474 INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2475 INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2476 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2477 REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2478 INTEGER, INTENT(IN) :: output_unit
2479 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2480 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
2481 REAL(dp), OPTIONAL :: target_time, start_time
2482
2483 COMPLEX(KIND=dp) :: zi, zj
2484 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2485 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2486 INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2487 ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2488 iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2489 ns_recv_from, ns_recv_partner
2490 INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2491 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2492 LOGICAL :: should_stop
2493 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2494 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2495 REAL(kind=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2496 t2, theta, tolerance, zc, zr
2497 TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2498 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2499
2500 NULLIFY (zdiag_all, zdiag_me)
2501 NULLIFY (xyz_mix, xyz_mix_ns)
2502 NULLIFY (mii, mij, mjj)
2503
2504 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2505
2506 ALLOCATE (rcount(para_env%num_pe), stat=istat)
2507 ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2508
2509 tolerance = 1.0e10_dp
2510 sweeps = 0
2511
2512 ! number of processor pairs and number of permutations
2513 npair = (para_env%num_pe + 1)/2
2514 nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2515 ALLOCATE (list_pair(2, npair))
2516
2517 ! initialize rotation matrix
2518 rotmat = 0.0_dp
2519 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2520 ii = i - ns_bound(para_env%mepos, 1) + 1
2521 rotmat(i, ii) = 1.0_dp
2522 END DO
2523
2524 ALLOCATE (xyz_mix(dim2))
2525 ALLOCATE (xyz_mix_ns(dim2))
2526 ALLOCATE (zdiag_me(dim2))
2527 ALLOCATE (zdiag_all(dim2))
2528
2529 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2530 IF (ns_me /= 0) THEN
2531 ALLOCATE (c_array_me(nstate, ns_me, dim2))
2532 DO idim = 1, dim2
2533 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2534 END DO
2535 ALLOCATE (gmat(nstate, ns_me))
2536 END IF
2537
2538 DO idim = 1, dim2
2539 ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2540 zdiag_me(idim)%c_array = cmplx(0.0_dp, 0.0_dp, dp)
2541 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2542 zdiag_all(idim)%c_array = cmplx(0.0_dp, 0.0_dp, dp)
2543 END DO
2544 ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2545 ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2546
2547 ! buffer for message passing
2548 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2549
2550 IF (output_unit > 0) THEN
2551 WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2552 WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2553 END IF
2554
2555 DO lsweep = 1, max_iter + 1
2556 sweeps = lsweep
2557 IF (sweeps == max_iter + 1) THEN
2558 IF (output_unit > 0) THEN
2559 WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2560 WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2561 END IF
2562 EXIT
2563 END IF
2564 t1 = m_walltime()
2565
2566 DO iperm = 1, nperm
2567
2568 ! fix partners for this permutation, and get the number of states
2569 CALL eberlein(iperm, para_env, list_pair)
2570 ip_partner = -1
2571 ns_partner = 0
2572 DO ipair = 1, npair
2573 IF (list_pair(1, ipair) == para_env%mepos) THEN
2574 ip_partner = list_pair(2, ipair)
2575 EXIT
2576 ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2577 ip_partner = list_pair(1, ipair)
2578 EXIT
2579 END IF
2580 END DO
2581 IF (ip_partner >= 0) THEN
2582 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2583 ELSE
2584 ns_partner = 0
2585 END IF
2586
2587 ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2588 IF (ns_partner*ns_me /= 0) THEN
2589
2590 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2591 rmat_loc = 0.0_dp
2592 DO i = 1, ns_me + ns_partner
2593 rmat_loc(i, i) = 1.0_dp
2594 END DO
2595
2596 ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2597
2598 DO idim = 1, dim2
2599 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2600 DO i = 1, ns_me
2601 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2602 END DO
2603 END DO
2604
2605 CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2606 msgout=c_array_partner, source=ip_partner)
2607
2608 n1 = ns_me
2609 n2 = ns_partner
2610 ilow1 = ns_bound(para_env%mepos, 1)
2611 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2612 ilow2 = ns_bound(ip_partner, 1)
2613 iup2 = ns_bound(ip_partner, 1) + n2 - 1
2614 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2615 il1 = 1
2616 iu1 = n1
2617 iu1 = n1
2618 il2 = 1 + n1
2619 iu2 = n1 + n2
2620 ELSE
2621 il1 = 1 + n2
2622 iu1 = n1 + n2
2623 iu1 = n1 + n2
2624 il2 = 1
2625 iu2 = n2
2626 END IF
2627
2628 DO idim = 1, dim2
2629 DO i = 1, n1
2630 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2631 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2632 END DO
2633 DO i = 1, n2
2634 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2635 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2636 END DO
2637 END DO
2638
2639 DO istate = 1, n1 + n2
2640 DO jstate = istate + 1, n1 + n2
2641 DO idim = 1, dim2
2642 mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2643 mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2644 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2645 END DO
2646 CALL get_angle(mii, mjj, mij, weights, theta)
2647 st = sin(theta)
2648 ct = cos(theta)
2649 DO idim = 1, dim2
2650 DO i = 1, n1 + n2
2651 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2652 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2653 xyz_mix(idim)%c_array(i, istate) = zi
2654 xyz_mix(idim)%c_array(i, jstate) = zj
2655 END DO
2656 DO i = 1, n1 + n2
2657 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2658 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2659 xyz_mix(idim)%c_array(istate, i) = zi
2660 xyz_mix(idim)%c_array(jstate, i) = zj
2661 END DO
2662 END DO
2663
2664 DO i = 1, n1 + n2
2665 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2666 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2667 rmat_loc(i, istate) = ri
2668 rmat_loc(i, jstate) = rj
2669 END DO
2670 END DO
2671 END DO
2672
2673 k = nblock_max + 1
2674 CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2675 rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2676
2677 IF (ilow1 < ilow2) THEN
2678 ! no longer compiles in official sdgb:
2679 !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2680 ! probably inefficient:
2681 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2682 n2, 0.0_dp, gmat(:, :), nstate)
2683 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2684 n1 + n2, 1.0_dp, gmat(:, :), nstate)
2685 ELSE
2686 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2687 rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2688 ! no longer compiles in official sdgb:
2689 !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2690 ! probably inefficient:
2691 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2692 n1, 1.0_dp, gmat(:, :), nstate)
2693 END IF
2694
2695 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2696
2697 DO idim = 1, dim2
2698 DO i = 1, n1
2699 xyz_mix_ns(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp, dp)
2700 END DO
2701
2702 DO istate = 1, n1
2703 DO jstate = 1, nstate
2704 DO i = 1, n2
2705 xyz_mix_ns(idim)%c_array(jstate, istate) = &
2706 xyz_mix_ns(idim)%c_array(jstate, istate) + &
2707 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2708 END DO
2709 END DO
2710 END DO
2711 DO istate = 1, n1
2712 DO jstate = 1, nstate
2713 DO i = 1, n1
2714 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2715 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2716 END DO
2717 END DO
2718 END DO
2719 END DO ! idim
2720
2721 DEALLOCATE (c_array_partner)
2722
2723 ELSE ! save my data
2724 DO idim = 1, dim2
2725 DO i = 1, ns_me
2726 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2727 END DO
2728 END DO
2729 END IF
2730
2731 DO idim = 1, dim2
2732 DO i = 1, ns_me
2733 cz_ij_loc(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp, dp)
2734 END DO
2735 END DO
2736
2737 IF (ns_partner*ns_me /= 0) THEN
2738 ! transpose rotation matrix rmat_loc
2739 DO i = 1, ns_me + ns_partner
2740 DO j = i + 1, ns_me + ns_partner
2741 ri = rmat_loc(i, j)
2742 rmat_loc(i, j) = rmat_loc(j, i)
2743 rmat_loc(j, i) = ri
2744 END DO
2745 END DO
2746
2747 ! prepare for distribution
2748 DO i = 1, n1
2749 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2750 END DO
2751 ik = nblock_max
2752 DO i = 1, n2
2753 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2754 END DO
2755 ELSE
2756 rmat_send = 0.0_dp
2757 END IF
2758
2759 ! collect data from all tasks (this takes some significant time)
2760 CALL para_env%allgather(rmat_send, rmat_recv_all)
2761
2762 ! update blocks everywhere
2763 DO ip = 0, para_env%num_pe - 1
2764
2765 ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2766 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2767
2768 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2769
2770 IF (ns_me /= 0) THEN
2771 IF (ns_recv_from /= 0) THEN
2772 !look for the partner of ip_recv_from
2773 ip_recv_partner = -1
2774 ns_recv_partner = 0
2775 DO ipair = 1, npair
2776 IF (list_pair(1, ipair) == ip_recv_from) THEN
2777 ip_recv_partner = list_pair(2, ipair)
2778 EXIT
2779 ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2780 ip_recv_partner = list_pair(1, ipair)
2781 EXIT
2782 END IF
2783 END DO
2784
2785 IF (ip_recv_partner >= 0) THEN
2786 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2787 END IF
2788 IF (ns_recv_partner > 0) THEN
2789 il1 = ns_bound(para_env%mepos, 1)
2790 il_recv = ns_bound(ip_recv_from, 1)
2791 il_recv_partner = ns_bound(ip_recv_partner, 1)
2792 ik = nblock_max
2793
2794 DO idim = 1, dim2
2795 DO i = 1, ns_recv_from
2796 ii = il_recv + i - 1
2797 DO j = 1, ns_me
2798 jj = j
2799 DO k = 1, ns_recv_from
2800 kk = il_recv + k - 1
2801 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2802 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2803 END DO
2804 END DO
2805 END DO
2806 DO i = 1, ns_recv_from
2807 ii = il_recv + i - 1
2808 DO j = 1, ns_me
2809 jj = j
2810 DO k = 1, ns_recv_partner
2811 kk = il_recv_partner + k - 1
2812 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2813 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2814 END DO
2815 END DO
2816 END DO
2817 END DO ! idim
2818 ELSE
2819 il1 = ns_bound(para_env%mepos, 1)
2820 il_recv = ns_bound(ip_recv_from, 1)
2821 DO idim = 1, dim2
2822 DO j = 1, ns_me
2823 jj = j
2824 DO i = 1, ns_recv_from
2825 ii = il_recv + i - 1
2826 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2827 END DO
2828 END DO
2829 END DO ! idim
2830 END IF
2831 END IF
2832 END IF ! ns_me
2833 END DO ! ip
2834
2835 IF (ns_partner*ns_me /= 0) THEN
2836 DEALLOCATE (rmat_loc)
2837 DO idim = 1, dim2
2838 DEALLOCATE (xyz_mix(idim)%c_array)
2839 END DO
2840 END IF
2841
2842 END DO ! iperm
2843
2844 ! calculate the max gradient
2845 DO idim = 1, dim2
2846 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2847 ii = i - ns_bound(para_env%mepos, 1) + 1
2848 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2849 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2850 END DO
2851 rcount(:) = SIZE(zdiag_me(idim)%c_array)
2852 rdispl(1) = 0
2853 DO ip = 2, para_env%num_pe
2854 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2855 END DO
2856 ! collect all the diagonal elements in a replicated 1d array
2857 CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2858 END DO
2859
2860 gmax = 0.0_dp
2861 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2862 k = j - ns_bound(para_env%mepos, 1) + 1
2863 DO i = 1, j - 1
2864 ! find the location of the diagonal element (i,i)
2865 DO ip = 0, para_env%num_pe - 1
2866 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2867 ip_has_i = ip
2868 EXIT
2869 END IF
2870 END DO
2871 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2872 ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2873 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2874 grad = 0.0_dp
2875 DO idim = 1, dim2
2876 zi = zdiag_all(idim)%c_array(ii)
2877 zj = zdiag_all(idim)%c_array(jj)
2878 grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2879 END DO
2880 gmax = max(gmax, abs(grad))
2881 END DO
2882 END DO
2883
2884 CALL para_env%max(gmax)
2885 tolerance = gmax
2886 IF (PRESENT(tol_out)) tol_out = tolerance
2887
2888 func = 0.0_dp
2889 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2890 k = i - ns_bound(para_env%mepos, 1) + 1
2891 DO idim = 1, dim2
2892 zr = real(cz_ij_loc(idim)%c_array(i, k), dp)
2893 zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2894 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2895 END DO
2896 END DO
2897 CALL para_env%sum(func)
2898 t2 = m_walltime()
2899
2900 IF (output_unit > 0 .AND. modulo(sweeps, out_each) == 0) THEN
2901 WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2902 CALL m_flush(output_unit)
2903 END IF
2904 IF (PRESENT(eps_localization)) THEN
2905 IF (tolerance < eps_localization) EXIT
2906 END IF
2907 IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2908 CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2909 IF (should_stop) EXIT
2910 END IF
2911
2912 END DO ! lsweep
2913
2914 ! buffer for message passing
2915 DEALLOCATE (rmat_recv_all)
2916
2917 DEALLOCATE (rmat_recv)
2918 DEALLOCATE (rmat_send)
2919 IF (ns_me > 0) THEN
2920 DEALLOCATE (c_array_me)
2921 END IF
2922 DO idim = 1, dim2
2923 DEALLOCATE (zdiag_me(idim)%c_array)
2924 DEALLOCATE (zdiag_all(idim)%c_array)
2925 END DO
2926 DEALLOCATE (zdiag_me)
2927 DEALLOCATE (zdiag_all)
2928 DEALLOCATE (xyz_mix)
2929 DO idim = 1, dim2
2930 IF (ns_me /= 0) THEN
2931 DEALLOCATE (xyz_mix_ns(idim)%c_array)
2932 END IF
2933 END DO
2934 DEALLOCATE (xyz_mix_ns)
2935 IF (ns_me /= 0) THEN
2936 DEALLOCATE (gmat)
2937 END IF
2938 DEALLOCATE (mii)
2939 DEALLOCATE (mij)
2940 DEALLOCATE (mjj)
2941 DEALLOCATE (list_pair)
2942
2943 END SUBROUTINE jacobi_rot_para_core
2944
2945! **************************************************************************************************
2946!> \brief ...
2947!> \param iperm ...
2948!> \param para_env ...
2949!> \param list_pair ...
2950! **************************************************************************************************
2951 SUBROUTINE eberlein(iperm, para_env, list_pair)
2952 INTEGER, INTENT(IN) :: iperm
2953 TYPE(mp_para_env_type), POINTER :: para_env
2954 INTEGER, DIMENSION(:, :) :: list_pair
2955
2956 INTEGER :: i, ii, jj, npair
2957
2958 npair = (para_env%num_pe + 1)/2
2959 IF (iperm == 1) THEN
2960!..set up initial ordering
2961 DO i = 0, para_env%num_pe - 1
2962 ii = ((i + 1) + 1)/2
2963 jj = mod((i + 1) + 1, 2) + 1
2964 list_pair(jj, ii) = i
2965 END DO
2966 IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2967 ELSEIF (mod(iperm, 2) == 0) THEN
2968!..a type shift
2969 jj = list_pair(1, npair)
2970 DO i = npair, 3, -1
2971 list_pair(1, i) = list_pair(1, i - 1)
2972 END DO
2973 list_pair(1, 2) = list_pair(2, 1)
2974 list_pair(2, 1) = jj
2975 ELSE
2976!..b type shift
2977 jj = list_pair(2, 1)
2978 DO i = 1, npair - 1
2979 list_pair(2, i) = list_pair(2, i + 1)
2980 END DO
2981 list_pair(2, npair) = jj
2982 END IF
2983
2984 END SUBROUTINE eberlein
2985
2986! **************************************************************************************************
2987!> \brief ...
2988!> \param vectors ...
2989!> \param op_sm_set ...
2990!> \param zij_fm_set ...
2991! **************************************************************************************************
2992 SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
2993
2994 TYPE(cp_fm_type), INTENT(IN) :: vectors
2995 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
2996 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
2997
2998 CHARACTER(len=*), PARAMETER :: routinen = 'zij_matrix'
2999
3000 INTEGER :: handle, i, j, nao, nmoloc
3001 TYPE(cp_fm_type) :: opvec
3002
3003 CALL timeset(routinen, handle)
3004
3005 ! get rows and cols of the input
3006 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3007 ! replicate the input kind of matrix
3008 CALL cp_fm_create(opvec, vectors%matrix_struct)
3009
3010 ! Compute zij here
3011 DO i = 1, SIZE(zij_fm_set, 2)
3012 DO j = 1, SIZE(zij_fm_set, 1)
3013 CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3014 CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3015 CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3016 zij_fm_set(j, i))
3017 END DO
3018 END DO
3019
3020 CALL cp_fm_release(opvec)
3021 CALL timestop(handle)
3022
3023 END SUBROUTINE zij_matrix
3024
3025! **************************************************************************************************
3026!> \brief ...
3027!> \param vectors ...
3028! **************************************************************************************************
3029 SUBROUTINE scdm_qrfact(vectors)
3030
3031 TYPE(cp_fm_type), INTENT(IN) :: vectors
3032
3033 CHARACTER(len=*), PARAMETER :: routinen = 'scdm_qrfact'
3034
3035 INTEGER :: handle, ncolt, nrowt
3036 REAL(kind=dp), DIMENSION(:), POINTER :: tau
3037 TYPE(cp_fm_struct_type), POINTER :: cstruct
3038 TYPE(cp_fm_type) :: ctp, qf, tmp
3039
3040 CALL timeset(routinen, handle)
3041
3042 ! Create Transpose of Coefficient Matrix vectors
3043 nrowt = vectors%matrix_struct%ncol_global
3044 ncolt = vectors%matrix_struct%nrow_global
3045
3046 CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, &
3047 nrow_global=nrowt, ncol_global=ncolt)
3048 CALL cp_fm_create(ctp, cstruct)
3049 CALL cp_fm_struct_release(cstruct)
3050
3051 ALLOCATE (tau(nrowt))
3052
3053 CALL cp_fm_transpose(vectors, ctp)
3054
3055 ! Get QR decomposition of CTs
3056 CALL cp_fm_pdgeqpf(ctp, tau, nrowt, ncolt, 1, 1)
3057
3058 ! Construction of Q from the scalapack output
3059 CALL cp_fm_struct_create(cstruct, para_env=ctp%matrix_struct%para_env, &
3060 context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3061 ncol_global=ctp%matrix_struct%nrow_global)
3062 CALL cp_fm_create(qf, cstruct)
3063 CALL cp_fm_struct_release(cstruct)
3064 CALL cp_fm_to_fm_submat(ctp, qf, nrowt, nrowt, 1, 1, 1, 1)
3065
3066 ! Get Q
3067 CALL cp_fm_pdorgqr(qf, tau, nrowt, 1, 1)
3068
3069 ! Transform original coefficient matrix vectors
3070 CALL cp_fm_create(tmp, vectors%matrix_struct)
3071 CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3072 CALL cp_fm_to_fm(vectors, tmp)
3073 CALL parallel_gemm('N', 'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3074
3075 ! Cleanup
3076 CALL cp_fm_release(ctp)
3077 CALL cp_fm_release(tmp)
3078 CALL cp_fm_release(qf)
3079 DEALLOCATE (tau)
3080
3081 CALL timestop(handle)
3082
3083 END SUBROUTINE scdm_qrfact
3084
3085END MODULE qs_localization_methods
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
Definition cell_types.F:15
methods related to the blacs parallel environment
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
Definition cp_cfm_diag.F:14
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Definition cp_cfm_diag.F:58
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
basic linear algebra operations for full matrices
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
Definition cp_fm_diag.F:434
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:130
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:147
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
real(kind=dp), parameter, public twopi
Routines for calculating a complex matrix exponential.
Definition matrix_exp.F:13
subroutine, public exp_pade_real(exp_h, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
Definition matrix_exp.F:570
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
Definition matrix_exp.F:244
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public approx_l1_norm_sd(c, iterations, eps, converged, sweeps)
...
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.
Definition xc.F:17
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Represent a complex full matrix.
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment