(git:5f3bc36)
Loading...
Searching...
No Matches
qs_localization_methods.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Localization methods such as 2x2 Jacobi rotations
10!> Steepest Decents
11!> Conjugate Gradient
12!> \par History
13!> Initial parallellization of jacobi (JVDV 07.2003)
14!> direct minimization using exponential parametrization (JVDV 09.2003)
15!> crazy rotations go fast (JVDV 10.2003)
16!> \author CJM (04.2003)
17! **************************************************************************************************
19 USE cell_types, ONLY: cell_type
28 USE cp_cfm_diag, ONLY: cp_cfm_heevd
29 USE cp_cfm_types, ONLY: &
33 USE cp_dbcsr_api, ONLY: dbcsr_p_type
45 USE cp_fm_diag, ONLY: cp_fm_syevd
50 USE cp_fm_types, ONLY: &
56 USE kahan_sum, ONLY: accurate_sum
57 USE kinds, ONLY: dp
58 USE machine, ONLY: m_flush,&
60 USE mathconstants, ONLY: pi,&
61 twopi,&
62 z_zero
63 USE matrix_exp, ONLY: exp_pade_real,&
67#include "./base/base_uses.f90"
68
69 IMPLICIT NONE
73
74 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
75
76 PRIVATE
77
78 TYPE set_c_1d_type
79 COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array => null()
80 END TYPE
81
82 TYPE set_c_2d_type
83 COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array => null()
84 END TYPE
85
86CONTAINS
87! **************************************************************************************************
88!> \brief ...
89!> \param C ...
90!> \param iterations ...
91!> \param eps ...
92!> \param converged ...
93!> \param sweeps ...
94! **************************************************************************************************
95 SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
96 TYPE(cp_fm_type), INTENT(IN) :: c
97 INTEGER, INTENT(IN) :: iterations
98 REAL(kind=dp), INTENT(IN) :: eps
99 LOGICAL, INTENT(INOUT) :: converged
100 INTEGER, INTENT(INOUT) :: sweeps
101
102 CHARACTER(len=*), PARAMETER :: routinen = 'approx_l1_norm_sd'
103 INTEGER, PARAMETER :: taylor_order = 100
104 REAL(kind=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
105
106 INTEGER :: handle, i, istep, k, n, ncol_local, &
107 nrow_local, output_unit, p
108 REAL(kind=dp) :: expfactor, f2, f2old, gnorm, tnorm
109 TYPE(cp_blacs_env_type), POINTER :: context
110 TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
111 TYPE(cp_fm_type) :: ctmp, g, gp1, gp2, u
112 TYPE(mp_para_env_type), POINTER :: para_env
113
114 CALL timeset(routinen, handle)
115
116 NULLIFY (context, para_env, fm_struct_k_k)
117
118 output_unit = cp_logger_get_default_io_unit()
119
120 CALL cp_fm_struct_get(c%matrix_struct, nrow_global=n, ncol_global=k, &
121 nrow_local=nrow_local, ncol_local=ncol_local, &
122 para_env=para_env, context=context)
123 CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
124 nrow_global=k, ncol_global=k)
125 CALL cp_fm_create(ctmp, c%matrix_struct)
126 CALL cp_fm_create(u, fm_struct_k_k)
127 CALL cp_fm_create(g, fm_struct_k_k)
128 CALL cp_fm_create(gp1, fm_struct_k_k)
129 CALL cp_fm_create(gp2, fm_struct_k_k)
130 !
131 ! printing
132 IF (output_unit > 0) THEN
133 WRITE (output_unit, '(1X)')
134 WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
135 WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
136 WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
137 WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
138 WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
139 WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
140 WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
141 END IF
142 !
143 f2old = 0.0_dp
144 converged = .false.
145 !
146 ! Start the steepest descent
147 DO istep = 1, iterations
148 !
149 !-------------------------------------------------------------------
150 ! compute f_2
151 ! f_2(x)=(x^2+eps)^1/2
152 f2 = 0.0_dp
153 DO p = 1, ncol_local ! p
154 DO i = 1, nrow_local ! i
155 f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
156 END DO
157 END DO
158 CALL c%matrix_struct%para_env%sum(f2)
159 !write(*,*) 'qs_localize: f_2=',f2
160 !-------------------------------------------------------------------
161 ! compute the derivative of f_2
162 ! f_2(x)=(x^2+eps)^1/2
163 DO p = 1, ncol_local ! p
164 DO i = 1, nrow_local ! i
165 ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
166 END DO
167 END DO
168 CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
169 ! antisymmetrize
170 CALL cp_fm_transpose(g, u)
171 CALL cp_fm_scale_and_add(-0.5_dp, g, 0.5_dp, u)
172 !
173 !-------------------------------------------------------------------
174 !
175 gnorm = cp_fm_frobenius_norm(g)
176 !write(*,*) 'qs_localize: norm(G)=',gnorm
177 !
178 ! rescale for steepest descent
179 CALL cp_fm_scale(-alpha, g)
180 !
181 ! compute unitary transform
182 ! zeroth order
183 CALL cp_fm_set_all(u, 0.0_dp, 1.0_dp)
184 ! first order
185 expfactor = 1.0_dp
186 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, g)
187 tnorm = cp_fm_frobenius_norm(g)
188 !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
189 IF (tnorm .GT. 1.0e-10_dp) THEN
190 ! other orders
191 CALL cp_fm_to_fm(g, gp1)
192 DO i = 2, taylor_order
193 ! new power of G
194 CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
195 CALL cp_fm_to_fm(gp2, gp1)
196 ! add to the taylor expansion so far
197 expfactor = expfactor/real(i, kind=dp)
198 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, gp1)
199 tnorm = cp_fm_frobenius_norm(gp1)
200 !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
201 IF (tnorm*expfactor .LT. 1.0e-10_dp) EXIT
202 END DO
203 END IF
204 !
205 ! incrementaly rotate the MOs
206 CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
207 CALL cp_fm_to_fm(ctmp, c)
208 !
209 ! printing
210 IF (output_unit .GT. 0) THEN
211 WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
212 END IF
213 !
214 ! Are we done?
215 sweeps = istep
216 !IF(gnorm.LE.grad_thresh.AND.ABS((f2-f2old)/f2).LE.f2_thresh.AND.istep.GT.1) THEN
217 IF (abs((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1) THEN
218 converged = .true.
219 EXIT
220 END IF
221 f2old = f2
222 END DO
223 !
224 ! here we should do one refine step to enforce C'*S*C=1 for any case
225 !
226 ! Print the final result
227 IF (output_unit .GT. 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
228 !
229 ! sparsity
230 !DO i=1,size(thresh,1)
231 ! gnorm = 0.0_dp
232 ! DO o=1,ncol_local
233 ! DO p=1,nrow_local
234 ! IF(ABS(C%local_data(p,o)).GT.thresh(i)) THEN
235 ! gnorm = gnorm + 1.0_dp
236 ! ENDIF
237 ! ENDDO
238 ! ENDDO
239 ! CALL C%matrix_struct%para_env%sum(gnorm)
240 ! IF(output_unit.GT.0) THEN
241 ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
242 ! ENDIF
243 !ENDDO
244 !
245 ! deallocate
246 CALL cp_fm_struct_release(fm_struct_k_k)
247 CALL cp_fm_release(ctmp)
248 CALL cp_fm_release(u)
249 CALL cp_fm_release(g)
250 CALL cp_fm_release(gp1)
251 CALL cp_fm_release(gp2)
252
253 CALL timestop(handle)
254
255 END SUBROUTINE approx_l1_norm_sd
256! **************************************************************************************************
257!> \brief ...
258!> \param cell ...
259!> \param weights ...
260! **************************************************************************************************
261 SUBROUTINE initialize_weights(cell, weights)
262
263 TYPE(cell_type), POINTER :: cell
264 REAL(kind=dp), DIMENSION(:) :: weights
265
266 REAL(kind=dp), DIMENSION(3, 3) :: metric
267
268 cpassert(ASSOCIATED(cell))
269
270 metric = 0.0_dp
271 CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
272
273 weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
274 weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
275 weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
276 weights(4) = metric(1, 2)
277 weights(5) = metric(1, 3)
278 weights(6) = metric(2, 3)
279
280 END SUBROUTINE initialize_weights
281
282! **************************************************************************************************
283!> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
284!> can deal with serial para_envs.
285!> \param weights ...
286!> \param zij ...
287!> \param vectors ...
288!> \param para_env ...
289!> \param max_iter ...
290!> \param eps_localization ...
291!> \param sweeps ...
292!> \param out_each ...
293!> \param target_time ...
294!> \param start_time ...
295!> \param restricted ...
296!> \par History
297!> \author Joost VandeVondele (02.2010)
298! **************************************************************************************************
299 SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
300 eps_localization, sweeps, out_each, target_time, start_time, restricted)
301
302 REAL(kind=dp), INTENT(IN) :: weights(:)
303 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
304 TYPE(mp_para_env_type), POINTER :: para_env
305 INTEGER, INTENT(IN) :: max_iter
306 REAL(kind=dp), INTENT(IN) :: eps_localization
307 INTEGER :: sweeps
308 INTEGER, INTENT(IN) :: out_each
309 REAL(dp) :: target_time, start_time
310 INTEGER :: restricted
311
312 IF (para_env%num_pe == 1) THEN
313 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
314 sweeps, out_each, restricted=restricted)
315 ELSE
316 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
317 sweeps, out_each, target_time, start_time, restricted=restricted)
318 END IF
319
320 END SUBROUTINE jacobi_rotations
321
322! **************************************************************************************************
323!> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
324!> while the routine below works in parallel, it is too slow to be useful
325!> \param weights ...
326!> \param zij ...
327!> \param vectors ...
328!> \param max_iter ...
329!> \param eps_localization ...
330!> \param sweeps ...
331!> \param out_each ...
332!> \param restricted ...
333! **************************************************************************************************
334 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
335 out_each, restricted)
336 REAL(kind=dp), INTENT(IN) :: weights(:)
337 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
338 INTEGER, INTENT(IN) :: max_iter
339 REAL(kind=dp), INTENT(IN) :: eps_localization
340 INTEGER :: sweeps
341 INTEGER, INTENT(IN) :: out_each
342 INTEGER :: restricted
343
344 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial'
345
346 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
347 INTEGER :: dim2, handle, idim, istate, jstate, &
348 nstate, unit_nr
349 REAL(kind=dp) :: ct, st, t1, t2, theta, tolerance
350 TYPE(cp_cfm_type) :: c_rmat
351 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
352 TYPE(cp_fm_type) :: rmat
353
354 CALL timeset(routinen, handle)
355
356 dim2 = SIZE(zij, 2)
357 ALLOCATE (c_zij(dim2))
358 NULLIFY (mii, mij, mjj)
359 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
360
361 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
362 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
363
364 CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
365 CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
366 DO idim = 1, dim2
367 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
368 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
369 zij(2, idim)%local_data, dp)
370 END DO
371
372 CALL cp_fm_get_info(rmat, nrow_global=nstate)
373 tolerance = 1.0e10_dp
374
375 sweeps = 0
376 unit_nr = -1
377 IF (rmat%matrix_struct%para_env%is_source()) THEN
379 WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
380 END IF
381
382 IF (restricted > 0) THEN
384 WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
385 nstate = nstate - restricted
386 END IF
387
388 ! do jacobi sweeps until converged
389 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
390 sweeps = sweeps + 1
391 t1 = m_walltime()
392
393 DO istate = 1, nstate
394 DO jstate = istate + 1, nstate
395 DO idim = 1, dim2
396 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
397 CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
398 CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
399 END DO
400 CALL get_angle(mii, mjj, mij, weights, theta)
401 st = sin(theta)
402 ct = cos(theta)
403 CALL rotate_zij(istate, jstate, st, ct, c_zij)
404
405 CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
406 END DO
407 END DO
408
409 CALL check_tolerance(c_zij, weights, tolerance)
410
411 t2 = m_walltime()
412 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
413 WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
414 "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
415 CALL m_flush(unit_nr)
416 END IF
417
418 END DO
419
420 DO idim = 1, dim2
421 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
422 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
423 CALL cp_cfm_release(c_zij(idim))
424 END DO
425 DEALLOCATE (c_zij)
426 DEALLOCATE (mii, mij, mjj)
427 rmat%local_data = real(c_rmat%local_data, dp)
428 CALL cp_cfm_release(c_rmat)
429 CALL rotate_orbitals(rmat, vectors)
430 CALL cp_fm_release(rmat)
431
432 CALL timestop(handle)
433
434 END SUBROUTINE jacobi_rotations_serial
435! **************************************************************************************************
436!> \brief very similar to jacobi_rotations_serial with some extra output options
437!> \param weights ...
438!> \param c_zij ...
439!> \param max_iter ...
440!> \param c_rmat ...
441!> \param eps_localization ...
442!> \param tol_out ...
443!> \param jsweeps ...
444!> \param out_each ...
445!> \param c_zij_out ...
446!> \param grad_final ...
447! **************************************************************************************************
448 SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
449 tol_out, jsweeps, out_each, c_zij_out, grad_final)
450 REAL(kind=dp), INTENT(IN) :: weights(:)
451 TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
452 INTEGER, INTENT(IN) :: max_iter
453 TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
454 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
455 REAL(kind=dp), INTENT(OUT), OPTIONAL :: tol_out
456 INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
457 INTEGER, INTENT(IN), OPTIONAL :: out_each
458 TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
459 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
460
461 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial_1'
462
463 COMPLEX(KIND=dp) :: mzii
464 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
465 INTEGER :: dim2, handle, idim, istate, jstate, &
466 nstate, sweeps, unit_nr
467 REAL(kind=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
468 sum_spread_ii, t1, t2, theta, tolerance
469 TYPE(cp_cfm_type) :: c_rmat_local
470 TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
471
472 CALL timeset(routinen, handle)
473
474 dim2 = SIZE(c_zij)
475 NULLIFY (mii, mij, mjj)
476 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
477
478 ALLOCATE (c_zij_local(dim2))
479 CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
480 CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
481 DO idim = 1, dim2
482 CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
483 c_zij_local(idim)%local_data = c_zij(idim)%local_data
484 END DO
485
486 CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
487 tolerance = 1.0e10_dp
488
489 IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
490
491 sweeps = 0
492 IF (PRESENT(out_each)) THEN
493 unit_nr = -1
494 IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
496 END IF
497 alpha = 0.0_dp
498 DO idim = 1, dim2
499 alpha = alpha + weights(idim)
500 END DO
501 END IF
502
503 ! do jacobi sweeps until converged
504 DO WHILE (sweeps < max_iter)
505 sweeps = sweeps + 1
506 IF (PRESENT(eps_localization)) THEN
507 IF (tolerance < eps_localization) EXIT
508 END IF
509 IF (PRESENT(out_each)) t1 = m_walltime()
510
511 DO istate = 1, nstate
512 DO jstate = istate + 1, nstate
513 DO idim = 1, dim2
514 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
515 CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
516 CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
517 END DO
518 CALL get_angle(mii, mjj, mij, weights, theta)
519 st = sin(theta)
520 ct = cos(theta)
521 CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
522
523 CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
524 END DO
525 END DO
526
527 IF (PRESENT(grad_final)) THEN
528 CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
529 ELSE
530 CALL check_tolerance(c_zij_local, weights, tolerance)
531 END IF
532 IF (PRESENT(tol_out)) tol_out = tolerance
533
534 IF (PRESENT(out_each)) THEN
535 t2 = m_walltime()
536 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
537 sum_spread_ii = 0.0_dp
538 DO istate = 1, nstate
539 spread_ii = 0.0_dp
540 DO idim = 1, dim2
541 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
542 spread_ii = spread_ii + weights(idim)* &
543 abs(mzii)**2/twopi/twopi
544 END DO
545 sum_spread_ii = sum_spread_ii + spread_ii
546 END DO
547 sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
548 avg_spread_ii = sum_spread_ii/nstate
549 WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
550 "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
551 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
552 sweeps, avg_spread_ii, tolerance, t2 - t1
553 CALL m_flush(unit_nr)
554 END IF
555 IF (PRESENT(jsweeps)) jsweeps = sweeps
556 END IF
557
558 END DO
559
560 IF (PRESENT(c_zij_out)) THEN
561 DO idim = 1, dim2
562 CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
563 END DO
564 END IF
565 CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
566
567 DEALLOCATE (mii, mij, mjj)
568 DO idim = 1, dim2
569 CALL cp_cfm_release(c_zij_local(idim))
570 END DO
571 DEALLOCATE (c_zij_local)
572 CALL cp_cfm_release(c_rmat_local)
573
574 CALL timestop(handle)
575
576 END SUBROUTINE jacobi_rotations_serial_1
577! **************************************************************************************************
578!> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
579!> for partially occupied wannier functions
580!> \param para_env ...
581!> \param weights ...
582!> \param zij ...
583!> \param vectors ...
584!> \param max_iter ...
585!> \param eps_localization ...
586!> \param iter ...
587!> \param out_each ...
588!> \param nextra ...
589!> \param do_cg ...
590!> \param nmo ...
591!> \param vectors_2 ...
592!> \param mos_guess ...
593! **************************************************************************************************
594 SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
595 iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
596 TYPE(mp_para_env_type), POINTER :: para_env
597 REAL(kind=dp), INTENT(IN) :: weights(:)
598 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
599 INTEGER, INTENT(IN) :: max_iter
600 REAL(kind=dp), INTENT(IN) :: eps_localization
601 INTEGER :: iter
602 INTEGER, INTENT(IN) :: out_each, nextra
603 LOGICAL, INTENT(IN) :: do_cg
604 INTEGER, INTENT(IN), OPTIONAL :: nmo
605 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
606
607 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_cg_edf_ls'
608 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
609 czero = (0.0_dp, 0.0_dp)
610 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
611
612 COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
613 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
614 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
615 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
616 INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
617 lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
618 INTEGER, DIMENSION(1) :: iloc
619 LOGICAL :: do_cinit_mo, do_cinit_random, &
620 do_u_guess_mo, new_direction
621 REAL(kind=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
622 norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
623 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
624 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
625 REAL(kind=dp), DIMENSION(50) :: energy, pos
626 REAL(kind=dp), DIMENSION(:), POINTER :: tmp_arr
627 TYPE(cp_blacs_env_type), POINTER :: context
628 TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
629 grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
630 tmp_cfm_2, u, ul, v, vl, zdiag
631 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
632 TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
633 TYPE(cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
634 matrix_v_all, rmat, tmp_fm, vectors_all
635
636 CALL timeset(routinen, handle)
637
638 dim2 = SIZE(zij, 2)
639 NULLIFY (context)
640 NULLIFY (matrix_zii, arr_zii)
641 NULLIFY (tmp_fm_struct)
642 NULLIFY (tmp_arr)
643
644 ALLOCATE (c_zij(dim2))
645
646 CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
647
648 ALLOCATE (sum_spread(nstate))
649 ALLOCATE (matrix_zii(nstate, dim2))
650 matrix_zii = czero
651 sum_spread = 0.0_dp
652
653 alpha = 0.0_dp
654 DO idim = 1, dim2
655 alpha = alpha + weights(idim)
656 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
657 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
658 zij(2, idim)%local_data, dp)
659 END DO
660
661 ALLOCATE (zij_0(dim2))
662
663 CALL cp_cfm_create(u, zij(1, 1)%matrix_struct)
664 CALL cp_fm_create(matrix_u, zij(1, 1)%matrix_struct)
665
666 CALL cp_cfm_set_all(u, czero, cone)
667 CALL cp_fm_set_all(matrix_u, 0.0_dp, 1.0_dp)
668
669 CALL cp_fm_get_info(vectors, nrow_global=nao)
670 IF (nextra > 0) THEN
671 IF (PRESENT(mos_guess)) THEN
672 do_cinit_random = .false.
673 do_cinit_mo = .true.
674 CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
675 ELSE
676 do_cinit_random = .true.
677 do_cinit_mo = .false.
678 ndummy = nstate
679 END IF
680
681 IF (do_cinit_random) THEN
682 icinit = 1
683 do_u_guess_mo = .false.
684 ELSEIF (do_cinit_mo) THEN
685 icinit = 2
686 do_u_guess_mo = .true.
687 END IF
688
689 nocc = nstate - nextra
690 northo = nmo - nocc
691 norextra = nmo - nstate
692 CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
693
694 ALLOCATE (tmp_cmat(nstate, nstate))
695 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
696 para_env=para_env, context=context)
697 DO idim = 1, dim2
698 CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
699 CALL cp_cfm_set_all(zij_0(idim), czero, cone)
700 CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
701 CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
702 END DO
703 CALL cp_fm_struct_release(tmp_fm_struct)
704 DEALLOCATE (tmp_cmat)
705
706 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
707 para_env=para_env, context=context)
708 CALL cp_cfm_create(v, tmp_fm_struct)
709 CALL cp_fm_create(matrix_v, tmp_fm_struct)
710 CALL cp_cfm_create(zdiag, tmp_fm_struct)
711 CALL cp_fm_create(rmat, tmp_fm_struct)
712 CALL cp_fm_struct_release(tmp_fm_struct)
713 CALL cp_cfm_set_all(v, czero, cone)
714 CALL cp_fm_set_all(matrix_v, 0.0_dp, 1.0_dp)
715
716 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
717 para_env=para_env, context=context)
718 CALL cp_fm_create(matrix_v_all, tmp_fm_struct)
719 CALL cp_fm_struct_release(tmp_fm_struct)
720 CALL cp_fm_set_all(matrix_v_all, 0._dp, 1._dp)
721
722 ALLOCATE (arr_zii(nstate))
723
724 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
725 para_env=para_env, context=context)
726 CALL cp_cfm_create(c_tilde, tmp_fm_struct)
727 CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
728 CALL cp_cfm_create(gct_old, tmp_fm_struct)
729 CALL cp_cfm_create(skc, tmp_fm_struct)
730 CALL cp_fm_struct_release(tmp_fm_struct)
731 CALL cp_cfm_set_all(c_tilde, czero)
732 CALL cp_cfm_set_all(gct_old, czero)
733 CALL cp_cfm_set_all(skc, czero)
734
735 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
736 para_env=para_env, context=context)
737 CALL cp_cfm_create(vl, tmp_fm_struct)
738 CALL cp_cfm_set_all(vl, czero)
739 CALL cp_fm_struct_release(tmp_fm_struct)
740
741 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
742 para_env=para_env, context=context)
743 CALL cp_fm_create(id_nextra, tmp_fm_struct)
744 CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
745 CALL cp_fm_struct_release(tmp_fm_struct)
746 CALL cp_cfm_set_all(ctrans_lambda, czero)
747 CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
748
749 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
750 para_env=para_env, context=context)
751 CALL cp_cfm_create(ul, tmp_fm_struct)
752 CALL cp_fm_struct_release(tmp_fm_struct)
753 CALL cp_cfm_set_all(ul, czero)
754
755 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
756 para_env=para_env, context=context)
757 CALL cp_fm_create(vectors_all, tmp_fm_struct)
758 CALL cp_fm_struct_release(tmp_fm_struct)
759 ALLOCATE (tmp_mat(nao, nstate))
760 CALL cp_fm_get_submatrix(vectors, tmp_mat)
761 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
762 DEALLOCATE (tmp_mat)
763 ALLOCATE (tmp_mat(nao, norextra))
764 CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
765 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
766 DEALLOCATE (tmp_mat)
767
768 ! initialize c_tilde
769 SELECT CASE (icinit)
770 CASE (1) ! random coefficients
771 !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
772 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
773 CALL cp_fm_init_random(tmp_fm, nextra)
774 CALL ortho_vectors(tmp_fm)
775 c_tilde%local_data = tmp_fm%local_data
776 CALL cp_fm_release(tmp_fm)
777 ALLOCATE (tmp_cmat(northo, nextra))
778 CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
779 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
780 DEALLOCATE (tmp_cmat)
781 CASE (2) ! MO based coeffs
782 CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
783 ALLOCATE (tmp_arr(nmo))
784 ALLOCATE (tmp_mat(nmo, ndummy))
785 ALLOCATE (tmp_mat_1(nmo, nstate))
786 ! normalize matrix_V_all
787 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat)
788 DO istate = 1, ndummy
789 tmp_arr(:) = tmp_mat(:, istate)
790 norm = norm2(tmp_arr)
791 tmp_arr(:) = tmp_arr(:)/norm
792 tmp_mat(:, istate) = tmp_arr(:)
793 END DO
794 CALL cp_fm_set_submatrix(matrix_v_all, tmp_mat)
795 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat_1, 1, 1, nmo, nstate)
796 CALL cp_fm_set_submatrix(matrix_v, tmp_mat_1)
797 DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
798 CALL cp_fm_to_cfm(msourcer=matrix_v, mtarget=v)
799 ALLOCATE (tmp_mat(northo, ndummy))
800 ALLOCATE (tmp_mat_1(northo, nextra))
801 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat, nocc + 1, 1, northo, ndummy)
802 ALLOCATE (tmp_arr(ndummy))
803 tmp_arr = 0.0_dp
804 DO istate = 1, ndummy
805 tmp_arr(istate) = norm2(tmp_mat(:, istate))
806 END DO
807 ! find edfs
808 DO istate = 1, nextra
809 iloc = maxloc(tmp_arr)
810 tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
811 tmp_arr(iloc(1)) = 0.0_dp
812 END DO
813
814 DEALLOCATE (tmp_arr, tmp_mat)
815
816 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
817 para_env=para_env, context=context)
818 CALL cp_fm_create(tmp_fm, tmp_fm_struct)
819 CALL cp_fm_struct_release(tmp_fm_struct)
820 CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
821 DEALLOCATE (tmp_mat_1)
822 CALL ortho_vectors(tmp_fm)
823 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
824 CALL cp_fm_release(tmp_fm)
825 ! initialize U
826 IF (do_u_guess_mo) THEN
827 ALLOCATE (tmp_cmat(nocc, nstate))
828 CALL cp_cfm_get_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
829 CALL cp_cfm_set_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
830 DEALLOCATE (tmp_cmat)
831 ALLOCATE (tmp_cmat(northo, nstate))
832 CALL cp_cfm_get_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
833 CALL cp_cfm_set_submatrix(vl, tmp_cmat, 1, 1, northo, nstate)
834 DEALLOCATE (tmp_cmat)
835 CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
836 ALLOCATE (tmp_cmat(nextra, nstate))
837 CALL cp_cfm_get_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
838 CALL cp_cfm_set_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
839 DEALLOCATE (tmp_cmat)
840 CALL cp_fm_create(tmp_fm, u%matrix_struct)
841 tmp_fm%local_data = real(u%local_data, kind=dp)
842 CALL ortho_vectors(tmp_fm)
843 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=u)
844 CALL cp_fm_release(tmp_fm)
845 CALL cp_cfm_to_fm(u, matrix_u)
846 END IF
847 ! reevaluate V
848 ALLOCATE (tmp_cmat(nocc, nstate))
849 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
850 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
851 DEALLOCATE (tmp_cmat)
852 ALLOCATE (tmp_cmat(nextra, nstate))
853 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
854 CALL cp_cfm_set_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
855 DEALLOCATE (tmp_cmat)
856 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
857 ALLOCATE (tmp_cmat(northo, nstate))
858 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
859 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
860 DEALLOCATE (tmp_cmat)
861 END SELECT
862 ELSE
863 DO idim = 1, dim2
864 CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
865 CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
866 END DO
867 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
868 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
869 END IF
870
871 unit_nr = -1
872 IF (rmat%matrix_struct%para_env%is_source()) THEN
874 WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
875 END IF
876
877 norm2_old = 1.0e30_dp
878 ds_min = 1.0_dp
879 new_direction = .true.
880 iter = 0
881 line_searches = 0
882 line_search_count = 0
883 tol = 1.0e+20_dp
884 mintol = 1.0e+10_dp
885 miniter = 0
886
887 !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
888 ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
889
890 ! do conjugate gradient until converged
891 DO WHILE (iter < max_iter)
892 iter = iter + 1
893 !WRITE(*,*) 'iter = ', iter
894 t1 = m_walltime()
895
896 IF (iter > 1) THEN
897 ! comput U
898 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
899 CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
900 IF (para_env%num_pe == 1) THEN
901 CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
902 ELSE
903 CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
904 END IF
905 CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
906 CALL cp_cfm_to_cfm(tmp_cfm, u)
907 CALL cp_cfm_release(tmp_cfm)
908 CALL cp_cfm_release(tmp_cfm_2)
909 END IF
910
911 IF (nextra > 0) THEN
912 ALLOCATE (tmp_cmat(nextra, nstate))
913 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
914 CALL cp_cfm_set_submatrix(ul, tmp_cmat)
915 DEALLOCATE (tmp_cmat)
916 IF (iter > 1) THEN
917 ! orthonormalize c_tilde
918 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
919 tmp_fm%local_data = real(c_tilde%local_data, kind=dp)
920 CALL ortho_vectors(tmp_fm)
921 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
922 CALL cp_fm_release(tmp_fm)
923
924 ALLOCATE (tmp_cmat(nocc, nstate))
925 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
926 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
927 DEALLOCATE (tmp_cmat)
928 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
929 ALLOCATE (tmp_cmat(northo, nstate))
930 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
931 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
932 DEALLOCATE (tmp_cmat)
933 END IF
934
935 ! reset if new_direction
936 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5) THEN
937 CALL cp_cfm_set_all(skc, czero)
938 CALL cp_cfm_set_all(gct_old, czero)
939 norm2_old = 1.0e30_dp
940 END IF
941
942 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
943 CALL cp_cfm_to_cfm(v, tmp_cfm)
944 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
945 ndummy = nmo
946 ELSE
947 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
948 CALL cp_cfm_to_cfm(u, tmp_cfm)
949 CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
950 ndummy = nstate
951 END IF
952 ! update z_ij
953 DO idim = 1, dim2
954 ! 'tmp_cfm_1 = zij_0*tmp_cfm'
955 CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
956 tmp_cfm, czero, tmp_cfm_1)
957 ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
958 CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
959 czero, c_zij(idim))
960 END DO
961 CALL cp_cfm_release(tmp_cfm)
962 CALL cp_cfm_release(tmp_cfm_1)
963 ! compute spread
964 DO istate = 1, nstate
965 spread_ii = 0.0_dp
966 DO idim = 1, dim2
967 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
968 spread_ii = spread_ii + weights(idim)* &
969 abs(mzii)**2/twopi/twopi
970 matrix_zii(istate, idim) = mzii
971 END DO
972 !WRITE(*,*) 'spread_ii', spread_ii
973 sum_spread(istate) = spread_ii
974 END DO
975 CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
976 spread_sum = accurate_sum(sum_spread)
977
978 IF (nextra > 0) THEN
979 ! update c_tilde
980 CALL cp_cfm_set_all(zdiag, czero)
981 CALL cp_cfm_set_all(grad_ctilde, czero)
982 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
983 CALL cp_cfm_set_all(tmp_cfm, czero)
984 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
985 CALL cp_cfm_set_all(tmp_cfm_1, czero)
986 ALLOCATE (tmp_cmat(northo, nstate))
987 DO idim = 1, dim2
988 weight = weights(idim)
989 arr_zii = matrix_zii(:, idim)
990 ! tmp_cfm = zij_0*V
991 CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
992 zij_0(idim), v, czero, tmp_cfm)
993 ! tmp_cfm = tmp_cfm*diag_zij_dagg
994 CALL cp_cfm_column_scale(tmp_cfm, conjg(arr_zii))
995 ! tmp_cfm_1 = tmp_cfm*U_dagg
996 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
997 u, czero, tmp_cfm_1)
998 CALL cp_cfm_scale(weight, tmp_cfm_1)
999 ! zdiag = zdiag + tmp_cfm_1'
1000 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1001
1002 ! tmp_cfm = zij_0_dagg*V
1003 CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1004 zij_0(idim), v, czero, tmp_cfm)
1005
1006 ! tmp_cfm = tmp_cfm*diag_zij
1007 CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1008 ! tmp_cfm_1 = tmp_cfm*U_dagg
1009 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1010 u, czero, tmp_cfm_1)
1011 CALL cp_cfm_scale(weight, tmp_cfm_1)
1012 ! zdiag = zdiag + tmp_cfm_1'
1013 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1014 END DO ! idim
1015 CALL cp_cfm_release(tmp_cfm)
1016 CALL cp_cfm_release(tmp_cfm_1)
1017 DEALLOCATE (tmp_cmat)
1018 ALLOCATE (tmp_cmat(northo, nextra))
1019 CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1020 northo, nextra, .false.)
1021 ! 'grad_ctilde'
1022 CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1023 DEALLOCATE (tmp_cmat)
1024 ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1025 CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1026 !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1027 ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1028 CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1029 END IF ! nextra > 0
1030
1031 ! tolerance
1032 IF (nextra > 0) THEN
1033 tolc = 0.0_dp
1034 CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1035 CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1036 CALL cp_fm_maxabsval(tmp_fm, tolc)
1037 CALL cp_fm_release(tmp_fm)
1038 !WRITE(*,*) 'tolc = ', tolc
1039 tol = tol + tolc
1040 END IF
1041 !WRITE(*,*) 'tol = ', tol
1042
1043 IF (nextra > 0) THEN
1044 !WRITE(*,*) 'new_direction: ', new_direction
1045 IF (new_direction) THEN
1046 line_searches = line_searches + 1
1047 IF (mintol > tol) THEN
1048 mintol = tol
1049 miniter = iter
1050 END IF
1051
1052 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1053 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1054 avg_spread_ii = sum_spread_ii/nstate
1055 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1056 "Iteration", "Avg. Spread_ii", "Tolerance"
1057 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1058 iter, avg_spread_ii, tol
1059 CALL m_flush(unit_nr)
1060 END IF
1061 IF (tol < eps_localization) EXIT
1062
1063 IF (do_cg) THEN
1064 cnorm2_gct = czero
1065 cnorm2_gct_cross = czero
1066 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1067 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1068 gct_old%local_data = grad_ctilde%local_data
1069 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct)
1070 norm2_gct = real(cnorm2_gct, kind=dp)
1071 ! compute beta_pr
1072 beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1073 norm2_old = norm2_gct
1074 beta = max(0.0_dp, beta_pr)
1075 !WRITE(*,*) 'beta = ', beta
1076 ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1077 CALL cp_cfm_scale(beta, skc)
1078 CALL cp_cfm_scale_and_add(cone, skc, cone, gct_old)
1079 CALL cp_cfm_trace(skc, gct_old, cnorm2_gct_cross)
1080 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1081 IF (norm2_gct_cross .LE. 0.0_dp) THEN ! back to steepest ascent
1082 CALL cp_cfm_scale_and_add(czero, skc, cone, gct_old)
1083 END IF
1084 ELSE
1085 CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1086 END IF
1087 line_search_count = 0
1088 END IF
1089
1090 line_search_count = line_search_count + 1
1091 !WRITE(*,*) 'line_search_count = ', line_search_count
1092 energy(line_search_count) = spread_sum
1093
1094 ! gold line search
1095 new_direction = .false.
1096 IF (line_search_count .EQ. 1) THEN
1097 lsl = 1
1098 lsr = 0
1099 lsm = 1
1100 pos(1) = 0.0_dp
1101 pos(2) = ds_min/gold_sec
1102 ds = pos(2)
1103 ELSE
1104 IF (line_search_count .EQ. 50) THEN
1105 IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp) THEN
1106 cpwarn("Line search failed to converge properly")
1107 ds_min = 0.1_dp
1108 new_direction = .true.
1109 ds = pos(line_search_count)
1110 line_search_count = 0
1111 ELSE
1112 cpabort("No. of line searches exceeds 50")
1113 END IF
1114 ELSE
1115 IF (lsr .EQ. 0) THEN
1116 IF (energy(line_search_count - 1) .GT. energy(line_search_count)) THEN
1117 lsr = line_search_count
1118 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1119 ELSE
1120 lsl = lsm
1121 lsm = line_search_count
1122 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1123 END IF
1124 ELSE
1125 IF (pos(line_search_count) .LT. pos(lsm)) THEN
1126 IF (energy(line_search_count) .GT. energy(lsm)) THEN
1127 lsr = lsm
1128 lsm = line_search_count
1129 ELSE
1130 lsl = line_search_count
1131 END IF
1132 ELSE
1133 IF (energy(line_search_count) .GT. energy(lsm)) THEN
1134 lsl = lsm
1135 lsm = line_search_count
1136 ELSE
1137 lsr = line_search_count
1138 END IF
1139 END IF
1140 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
1141 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1142 ELSE
1143 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1144 END IF
1145 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr)) THEN
1146 new_direction = .true.
1147 END IF
1148 END IF ! lsr .eq. 0
1149 END IF ! line_search_count .eq. 50
1150 ! now go to the suggested point
1151 ds = pos(line_search_count + 1) - pos(line_search_count)
1152 !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1153 IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1)) THEN
1154 new_direction = .true.
1155 ds_min = 0.5_dp/alpha
1156 ELSEIF (abs(ds) > 10.0_dp) THEN
1157 new_direction = .true.
1158 ds_min = 0.5_dp/alpha
1159 ELSE
1160 ds_min = pos(line_search_count + 1)
1161 END IF
1162 END IF ! first step
1163 ! 'c_tilde = c_tilde + d*skc'
1164 CALL cp_cfm_scale(ds, skc)
1165 CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1166 ELSE
1167 IF (mintol > tol) THEN
1168 mintol = tol
1169 miniter = iter
1170 END IF
1171 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1172 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1173 avg_spread_ii = sum_spread_ii/nstate
1174 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1175 "Iteration", "Avg. Spread_ii", "Tolerance"
1176 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1177 iter, avg_spread_ii, tol
1178 CALL m_flush(unit_nr)
1179 END IF
1180 IF (tol < eps_localization) EXIT
1181 END IF ! nextra > 0
1182
1183 END DO ! iteration
1184
1185 IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1186 WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1187 WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1188 CALL m_flush(unit_nr)
1189 END IF
1190
1191 CALL cp_cfm_to_fm(u, matrix_u)
1192
1193 IF (nextra > 0) THEN
1194 rmat%local_data = real(v%local_data, kind=dp)
1195 CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1196
1197 CALL cp_cfm_release(c_tilde)
1198 CALL cp_cfm_release(grad_ctilde)
1199 CALL cp_cfm_release(gct_old)
1200 CALL cp_cfm_release(skc)
1201 CALL cp_cfm_release(ul)
1202 CALL cp_cfm_release(zdiag)
1203 CALL cp_cfm_release(ctrans_lambda)
1204 CALL cp_fm_release(id_nextra)
1205 CALL cp_fm_release(vectors_all)
1206 CALL cp_cfm_release(v)
1207 CALL cp_fm_release(matrix_v)
1208 CALL cp_fm_release(matrix_v_all)
1209 CALL cp_cfm_release(vl)
1210 DEALLOCATE (arr_zii)
1211 ELSE
1212 rmat%local_data = matrix_u%local_data
1213 CALL rotate_orbitals(rmat, vectors)
1214 END IF
1215 DO idim = 1, dim2
1216 CALL cp_cfm_release(zij_0(idim))
1217 END DO
1218 DEALLOCATE (zij_0)
1219
1220 DO idim = 1, dim2
1221 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
1222 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1223 CALL cp_cfm_release(c_zij(idim))
1224 END DO
1225 DEALLOCATE (c_zij)
1226 CALL cp_fm_release(rmat)
1227 CALL cp_cfm_release(u)
1228 CALL cp_fm_release(matrix_u)
1229 DEALLOCATE (matrix_zii, sum_spread)
1230
1231 CALL timestop(handle)
1232
1233 END SUBROUTINE jacobi_cg_edf_ls
1234
1235! **************************************************************************************************
1236!> \brief ...
1237!> \param vmatrix ...
1238! **************************************************************************************************
1239 SUBROUTINE ortho_vectors(vmatrix)
1240
1241 TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1242
1243 CHARACTER(LEN=*), PARAMETER :: routinen = 'ortho_vectors'
1244
1245 INTEGER :: handle, n, ncol
1246 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1247 TYPE(cp_fm_type) :: overlap_vv
1248
1249 CALL timeset(routinen, handle)
1250
1251 NULLIFY (fm_struct_tmp)
1252
1253 CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1254
1255 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1256 para_env=vmatrix%matrix_struct%para_env, &
1257 context=vmatrix%matrix_struct%context)
1258 CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1259 CALL cp_fm_struct_release(fm_struct_tmp)
1260
1261 CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1262 CALL cp_fm_cholesky_decompose(overlap_vv)
1263 CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.true.)
1264
1265 CALL cp_fm_release(overlap_vv)
1266
1267 CALL timestop(handle)
1268
1269 END SUBROUTINE ortho_vectors
1270
1271! **************************************************************************************************
1272!> \brief ...
1273!> \param istate ...
1274!> \param jstate ...
1275!> \param st ...
1276!> \param ct ...
1277!> \param zij ...
1278! **************************************************************************************************
1279 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1280 INTEGER, INTENT(IN) :: istate, jstate
1281 REAL(kind=dp), INTENT(IN) :: st, ct
1282 TYPE(cp_cfm_type) :: zij(:)
1283
1284 INTEGER :: id
1285
1286! Locals
1287
1288 DO id = 1, SIZE(zij, 1)
1289 CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1290 CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1291 END DO
1292
1293 END SUBROUTINE rotate_zij
1294! **************************************************************************************************
1295!> \brief ...
1296!> \param istate ...
1297!> \param jstate ...
1298!> \param st ...
1299!> \param ct ...
1300!> \param rmat ...
1301! **************************************************************************************************
1302 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1303 INTEGER, INTENT(IN) :: istate, jstate
1304 REAL(kind=dp), INTENT(IN) :: st, ct
1305 TYPE(cp_cfm_type), INTENT(IN) :: rmat
1306
1307 CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1308
1309 END SUBROUTINE rotate_rmat
1310! **************************************************************************************************
1311!> \brief ...
1312!> \param mii ...
1313!> \param mjj ...
1314!> \param mij ...
1315!> \param weights ...
1316!> \param theta ...
1317!> \param grad_ij ...
1318!> \param step ...
1319! **************************************************************************************************
1320 SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1321 COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1322 REAL(kind=dp), INTENT(IN) :: weights(:)
1323 REAL(kind=dp), INTENT(OUT) :: theta
1324 REAL(kind=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1325
1326 COMPLEX(KIND=dp) :: z11, z12, z22
1327 INTEGER :: dim_m, idim
1328 REAL(kind=dp) :: a12, b12, d2, ratio
1329
1330 a12 = 0.0_dp
1331 b12 = 0.0_dp
1332 dim_m = SIZE(mii)
1333 DO idim = 1, dim_m
1334 z11 = mii(idim)
1335 z22 = mjj(idim)
1336 z12 = mij(idim)
1337 a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=dp)
1338 b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1339 0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=dp)
1340 END DO
1341 IF (abs(b12) > 1.e-10_dp) THEN
1342 ratio = -a12/b12
1343 theta = 0.25_dp*atan(ratio)
1344 ELSEIF (abs(b12) < 1.e-10_dp) THEN
1345 b12 = 0.0_dp
1346 theta = 0.0_dp
1347 ELSE
1348 theta = 0.25_dp*pi
1349 END IF
1350 IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1351! Check second derivative info
1352 d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1353 IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1354 IF (theta > 0.0_dp) THEN ! make theta as small as possible
1355 theta = theta - 0.25_dp*pi
1356 ELSE
1357 theta = theta + 0.25_dp*pi
1358 END IF
1359 END IF
1360 END SUBROUTINE get_angle
1361! **************************************************************************************************
1362!> \brief ...
1363!> \param zij ...
1364!> \param weights ...
1365!> \param tolerance ...
1366!> \param grad ...
1367! **************************************************************************************************
1368 SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1369 TYPE(cp_cfm_type) :: zij(:)
1370 REAL(kind=dp), INTENT(IN) :: weights(:)
1371 REAL(kind=dp), INTENT(OUT) :: tolerance
1372 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1373
1374 CHARACTER(len=*), PARAMETER :: routinen = 'check_tolerance'
1375
1376 INTEGER :: handle
1377 TYPE(cp_fm_type) :: force
1378
1379 CALL timeset(routinen, handle)
1380
1381! compute gradient at t=0
1382
1383 CALL cp_fm_create(force, zij(1)%matrix_struct)
1384 CALL cp_fm_set_all(force, 0._dp)
1385 CALL grad_at_0(zij, weights, force)
1386 CALL cp_fm_maxabsval(force, tolerance)
1387 IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1388 CALL cp_fm_release(force)
1389
1390 CALL timestop(handle)
1391
1392 END SUBROUTINE check_tolerance
1393! **************************************************************************************************
1394!> \brief ...
1395!> \param rmat ...
1396!> \param vectors ...
1397! **************************************************************************************************
1398 SUBROUTINE rotate_orbitals(rmat, vectors)
1399 TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1400
1401 INTEGER :: k, n
1402 TYPE(cp_fm_type) :: wf
1403
1404 CALL cp_fm_create(wf, vectors%matrix_struct)
1405 CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1406 CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1407 CALL cp_fm_to_fm(wf, vectors)
1408 CALL cp_fm_release(wf)
1409 END SUBROUTINE rotate_orbitals
1410! **************************************************************************************************
1411!> \brief ...
1412!> \param rmat ...
1413!> \param vec_all ...
1414!> \param vectors ...
1415! **************************************************************************************************
1416 SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1417 TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1418
1419 INTEGER :: k, l, n
1420 TYPE(cp_fm_type) :: wf
1421
1422 CALL cp_fm_create(wf, vectors%matrix_struct)
1423 CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1424 CALL cp_fm_get_info(rmat, ncol_global=l)
1425
1426 CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1427 CALL cp_fm_to_fm(wf, vectors)
1428 CALL cp_fm_release(wf)
1429 END SUBROUTINE rotate_orbitals_edf
1430! **************************************************************************************************
1431!> \brief ...
1432!> \param diag ...
1433!> \param weights ...
1434!> \param matrix ...
1435!> \param ndim ...
1436! **************************************************************************************************
1437 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1438 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1439 REAL(kind=dp), INTENT(IN) :: weights(:)
1440 TYPE(cp_fm_type), INTENT(IN) :: matrix
1441 INTEGER, INTENT(IN) :: ndim
1442
1443 COMPLEX(KIND=dp) :: zii, zjj
1444 INTEGER :: idim, istate, jstate, ncol_local, &
1445 nrow_global, nrow_local
1446 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1447 REAL(kind=dp) :: gradsq_ij
1448
1449 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1450 ncol_local=ncol_local, nrow_global=nrow_global, &
1451 row_indices=row_indices, col_indices=col_indices)
1452
1453 DO istate = 1, nrow_local
1454 DO jstate = 1, ncol_local
1455! get real and imaginary parts
1456 gradsq_ij = 0.0_dp
1457 DO idim = 1, ndim
1458 zii = diag(row_indices(istate), idim)
1459 zjj = diag(col_indices(jstate), idim)
1460 gradsq_ij = gradsq_ij + weights(idim)* &
1461 4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=dp)
1462 END DO
1463 matrix%local_data(istate, jstate) = gradsq_ij
1464 END DO
1465 END DO
1466 END SUBROUTINE gradsq_at_0
1467! **************************************************************************************************
1468!> \brief ...
1469!> \param matrix_p ...
1470!> \param weights ...
1471!> \param matrix ...
1472! **************************************************************************************************
1473 SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1474 TYPE(cp_cfm_type) :: matrix_p(:)
1475 REAL(kind=dp), INTENT(IN) :: weights(:)
1476 TYPE(cp_fm_type), INTENT(IN) :: matrix
1477
1478 COMPLEX(KIND=dp) :: zii, zij, zjj
1479 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1480 INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1481 nrow_global, nrow_local
1482 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1483 REAL(kind=dp) :: grad_ij
1484
1485 NULLIFY (diag)
1486 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1487 ncol_local=ncol_local, nrow_global=nrow_global, &
1488 row_indices=row_indices, col_indices=col_indices)
1489 dim_m = SIZE(matrix_p, 1)
1490 ALLOCATE (diag(nrow_global, dim_m))
1491
1492 DO idim = 1, dim_m
1493 DO istate = 1, nrow_global
1494 CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1495 END DO
1496 END DO
1497
1498 DO istate = 1, nrow_local
1499 DO jstate = 1, ncol_local
1500! get real and imaginary parts
1501 grad_ij = 0.0_dp
1502 DO idim = 1, dim_m
1503 zii = diag(row_indices(istate), idim)
1504 zjj = diag(col_indices(jstate), idim)
1505 zij = matrix_p(idim)%local_data(istate, jstate)
1506 grad_ij = grad_ij + weights(idim)* &
1507 REAL(4.0_dp*conjg(zij)*(zjj - zii), dp)
1508 END DO
1509 matrix%local_data(istate, jstate) = grad_ij
1510 END DO
1511 END DO
1512 DEALLOCATE (diag)
1513 END SUBROUTINE grad_at_0
1514
1515! return energy and maximum gradient in the current point
1516! **************************************************************************************************
1517!> \brief ...
1518!> \param weights ...
1519!> \param zij ...
1520!> \param tolerance ...
1521!> \param value ...
1522! **************************************************************************************************
1523 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1524 REAL(kind=dp), INTENT(IN) :: weights(:)
1525 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1526 REAL(kind=dp) :: tolerance, value
1527
1528 COMPLEX(KIND=dp) :: kii, kij, kjj
1529 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1530 INTEGER :: idim, istate, jstate, ncol_local, &
1531 nrow_global, nrow_local
1532 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1533 REAL(kind=dp) :: grad_ij, ra, rb
1534
1535 NULLIFY (diag)
1536 CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1537 ncol_local=ncol_local, nrow_global=nrow_global, &
1538 row_indices=row_indices, col_indices=col_indices)
1539 ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1540 value = 0.0_dp
1541 DO idim = 1, SIZE(zij, 2)
1542 DO istate = 1, nrow_global
1543 CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1544 CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1545 diag(istate, idim) = cmplx(ra, rb, dp)
1546 value = value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1547 END DO
1548 END DO
1549 tolerance = 0.0_dp
1550 DO istate = 1, nrow_local
1551 DO jstate = 1, ncol_local
1552 grad_ij = 0.0_dp
1553 DO idim = 1, SIZE(zij, 2)
1554 kii = diag(row_indices(istate), idim)
1555 kjj = diag(col_indices(jstate), idim)
1556 ra = zij(1, idim)%local_data(istate, jstate)
1557 rb = zij(2, idim)%local_data(istate, jstate)
1558 kij = cmplx(ra, rb, dp)
1559 grad_ij = grad_ij + weights(idim)* &
1560 REAL(4.0_dp*conjg(kij)*(kjj - kii), dp)
1561 END DO
1562 tolerance = max(abs(grad_ij), tolerance)
1563 END DO
1564 END DO
1565 CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1566
1567 DEALLOCATE (diag)
1568
1569 END SUBROUTINE check_tolerance_new
1570
1571! **************************************************************************************************
1572!> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1573!> and rotates them all at the same time (hoping for the best of course)
1574!> \param weights ...
1575!> \param zij ...
1576!> \param vectors ...
1577!> \param max_iter ...
1578!> \param max_crazy_angle ...
1579!> \param crazy_scale ...
1580!> \param crazy_use_diag ...
1581!> \param eps_localization ...
1582!> \param iterations ...
1583!> \param converged ...
1584! **************************************************************************************************
1585 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1586 eps_localization, iterations, converged)
1587 REAL(kind=dp), INTENT(IN) :: weights(:)
1588 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1589 INTEGER, INTENT(IN) :: max_iter
1590 REAL(kind=dp), INTENT(IN) :: max_crazy_angle
1591 REAL(kind=dp) :: crazy_scale
1592 LOGICAL :: crazy_use_diag
1593 REAL(kind=dp), INTENT(IN) :: eps_localization
1594 INTEGER :: iterations
1595 LOGICAL, INTENT(out), OPTIONAL :: converged
1596
1597 CHARACTER(len=*), PARAMETER :: routinen = 'crazy_rotations'
1598 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1599 czero = (0.0_dp, 0.0_dp)
1600
1601 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1602 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1603 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1604 INTEGER :: dim2, handle, i, icol, idim, irow, &
1605 method, ncol_global, ncol_local, &
1606 norder, nrow_global, nrow_local, &
1607 nsquare, unit_nr
1608 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1609 LOGICAL :: do_emd
1610 REAL(kind=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1611 norm, ra, rb, theta, tolerance, value
1612 REAL(kind=dp), DIMENSION(:), POINTER :: evals
1613 TYPE(cp_cfm_type) :: cmat_a, cmat_r, cmat_t1
1614 TYPE(cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1615
1616 CALL timeset(routinen, handle)
1617 NULLIFY (row_indices, col_indices)
1618 CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1619 ncol_global=ncol_global, &
1620 row_indices=row_indices, col_indices=col_indices, &
1621 nrow_local=nrow_local, ncol_local=ncol_local)
1622
1623 limit_crazy_angle = max_crazy_angle
1624
1625 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1626 dim2 = SIZE(zij, 2)
1627 ALLOCATE (diag_z(nrow_global, dim2))
1628 ALLOCATE (evals(nrow_global))
1629 ALLOCATE (evals_exp(nrow_global))
1630
1631 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1632 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1633 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1634
1635 CALL cp_fm_create(mat_u, zij(1, 1)%matrix_struct)
1636 CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1637 CALL cp_fm_create(mat_r, zij(1, 1)%matrix_struct)
1638
1639 CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1640
1641 CALL cp_fm_set_all(mat_r, 0.0_dp, 1.0_dp)
1642 CALL cp_fm_set_all(mat_t, 0.0_dp)
1643 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1644 DO idim = 1, dim2
1645 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1646 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1647 END DO
1648 CALL cp_fm_syevd(mat_t, mat_u, evals)
1649 DO idim = 1, dim2
1650 ! rotate z's
1651 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1652 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1653 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1654 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1655 END DO
1656 ! collect rotation matrix
1657 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1658 CALL cp_fm_to_fm(mat_t, mat_r)
1659
1660 unit_nr = -1
1661 IF (cmat_a%matrix_struct%para_env%is_source()) THEN
1663 WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1664 "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1665 END IF
1666
1667 iterations = 0
1668 tolerance = 1.0_dp
1669
1670 DO
1671 iterations = iterations + 1
1672 DO idim = 1, dim2
1673 DO i = 1, nrow_global
1674 CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1675 CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1676 diag_z(i, idim) = cmplx(ra, rb, dp)
1677 END DO
1678 END DO
1679 DO irow = 1, nrow_local
1680 DO icol = 1, ncol_local
1681 DO idim = 1, dim2
1682 ra = zij(1, idim)%local_data(irow, icol)
1683 rb = zij(2, idim)%local_data(irow, icol)
1684 mij(idim) = cmplx(ra, rb, dp)
1685 mii(idim) = diag_z(row_indices(irow), idim)
1686 mjj(idim) = diag_z(col_indices(icol), idim)
1687 END DO
1688 IF (row_indices(irow) .NE. col_indices(icol)) THEN
1689 CALL get_angle(mii, mjj, mij, weights, theta)
1690 theta = crazy_scale*theta
1691 IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
1692 IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
1693 IF (crazy_use_diag) THEN
1694 cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta, dp)
1695 ELSE
1696 mat_theta%local_data(irow, icol) = -theta
1697 END IF
1698 ELSE
1699 IF (crazy_use_diag) THEN
1700 cmat_a%local_data(irow, icol) = czero
1701 ELSE
1702 mat_theta%local_data(irow, icol) = 0.0_dp
1703 END IF
1704 END IF
1705 END DO
1706 END DO
1707
1708 ! construct rotation matrix U based on A using diagonalization
1709 ! alternatively, exp based on repeated squaring could be faster
1710 IF (crazy_use_diag) THEN
1711 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1712 maxeval = maxval(abs(evals))
1713 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1714 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1715 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1716 CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1717 cmat_t1, cmat_r, czero, cmat_a)
1718 mat_u%local_data = real(cmat_a%local_data, kind=dp) ! U is a real matrix
1719 ELSE
1720 do_emd = .false.
1721 method = 2
1722 eps_exp = 1.0_dp*epsilon(eps_exp)
1723 CALL cp_fm_maxabsrownorm(mat_theta, norm)
1724 maxeval = norm ! an upper bound
1725 CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1726 CALL exp_pade_real(mat_u, mat_theta, nsquare, norder)
1727 END IF
1728
1729 DO idim = 1, dim2
1730 ! rotate z's
1731 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1732 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1733 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1734 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1735 END DO
1736 ! collect rotation matrix
1737 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1738 CALL cp_fm_to_fm(mat_t, mat_r)
1739
1740 CALL check_tolerance_new(weights, zij, tolerance, value)
1741
1742 IF (unit_nr > 0) THEN
1743 WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1744 "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1745 CALL m_flush(unit_nr)
1746 END IF
1747 IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter) EXIT
1748 END DO
1749
1750 IF (PRESENT(converged)) converged = (tolerance .LT. eps_localization)
1751
1752 CALL cp_cfm_release(cmat_a)
1753 CALL cp_cfm_release(cmat_r)
1754 CALL cp_cfm_release(cmat_t1)
1755
1756 CALL cp_fm_release(mat_u)
1757 CALL cp_fm_release(mat_t)
1758 CALL cp_fm_release(mat_theta)
1759
1760 CALL rotate_orbitals(mat_r, vectors)
1761
1762 CALL cp_fm_release(mat_r)
1763 DEALLOCATE (evals_exp, evals, diag_z)
1764 DEALLOCATE (mii, mij, mjj)
1765
1766 CALL timestop(handle)
1767
1768 END SUBROUTINE crazy_rotations
1769
1770! **************************************************************************************************
1771!> \brief use the exponential parametrization as described in to perform a direct mini
1772!> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1773!> none of the input is modified for the time being, just finds the rotations
1774!> that minimizes, and throws it away afterwards :-)
1775!> apart from being expensive and not cleaned, this works fine
1776!> useful to try different spread functionals
1777!> \param weights ...
1778!> \param zij ...
1779!> \param vectors ...
1780!> \param max_iter ...
1781!> \param eps_localization ...
1782!> \param iterations ...
1783! **************************************************************************************************
1784 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1785 REAL(kind=dp), INTENT(IN) :: weights(:)
1786 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1787 INTEGER, INTENT(IN) :: max_iter
1788 REAL(kind=dp), INTENT(IN) :: eps_localization
1789 INTEGER :: iterations
1790
1791 CHARACTER(len=*), PARAMETER :: routinen = 'direct_mini'
1792 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1793 czero = (0.0_dp, 0.0_dp)
1794 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
1795
1796 COMPLEX(KIND=dp) :: lk, ll, tmp
1797 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1798 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1799 INTEGER :: handle, i, icol, idim, irow, &
1800 line_search_count, line_searches, lsl, &
1801 lsm, lsr, n, ncol_local, ndim, &
1802 nrow_local, output_unit
1803 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1804 LOGICAL :: new_direction
1805 REAL(kind=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1806 fb, fc, nom, normg, normg_cross, &
1807 normg_old, npos, omega, tol, val, x0, &
1808 x1, xa, xb, xc
1809 REAL(kind=dp), DIMENSION(150) :: energy, grad, pos
1810 REAL(kind=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1811 TYPE(cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1812 cmat_t2, cmat_u
1813 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1814 TYPE(cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1815 matrix_g_search, matrix_h, matrix_r, &
1816 matrix_t
1817
1818 NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1819
1820 CALL timeset(routinen, handle)
1821 output_unit = cp_logger_get_default_io_unit()
1822
1823 n = zij(1, 1)%matrix_struct%nrow_global
1824 ndim = (SIZE(zij, 2))
1825
1826 IF (output_unit > 0) THEN
1827 WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1828 WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1829 END IF
1830
1831 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1832 ALLOCATE (c_zij(ndim))
1833
1834 ! create the three complex matrices Z
1835 DO idim = 1, ndim
1836 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1837 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1838 zij(2, idim)%local_data, dp)
1839 END DO
1840
1841 CALL cp_fm_create(matrix_a, zij(1, 1)%matrix_struct)
1842 CALL cp_fm_create(matrix_g, zij(1, 1)%matrix_struct)
1843 CALL cp_fm_create(matrix_t, zij(1, 1)%matrix_struct)
1844 CALL cp_fm_create(matrix_h, zij(1, 1)%matrix_struct)
1845 CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1846 CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1847 CALL cp_fm_create(matrix_r, zij(1, 1)%matrix_struct)
1848 CALL cp_fm_set_all(matrix_r, 0.0_dp, 1.0_dp)
1849
1850 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1851! CALL cp_fm_init_random ( matrix_A )
1852
1853 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1854 CALL cp_cfm_create(cmat_u, zij(1, 1)%matrix_struct)
1855 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1856 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1857 CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1858 CALL cp_cfm_create(cmat_b, zij(1, 1)%matrix_struct)
1859 CALL cp_cfm_create(cmat_m, zij(1, 1)%matrix_struct)
1860
1861 CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1862 row_indices=row_indices, col_indices=col_indices)
1863
1864 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1865 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1866 normg_old = 1.0e30_dp
1867 ds_min = 1.0_dp
1868 new_direction = .true.
1869 iterations = 0
1870 line_searches = 0
1871 line_search_count = 0
1872 DO
1873 iterations = iterations + 1
1874 ! compute U,R,evals given A
1875 cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data, dp) ! cmat_A is hermitian, evals are reals
1876 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1877 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1878 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1879 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1880 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1881 cmat_u%local_data = real(cmat_u%local_data, kind=dp) ! enforce numerics, U is a real matrix
1882
1883 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5) THEN ! reset with A .eq. 0
1884 DO idim = 1, ndim
1885 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1886 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1887 END DO
1888 ! collect rotation matrix
1889 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
1890 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1891 CALL cp_fm_to_fm(matrix_t, matrix_r)
1892
1893 CALL cp_cfm_set_all(cmat_u, czero, cone)
1894 CALL cp_cfm_set_all(cmat_r, czero, cone)
1895 CALL cp_cfm_set_all(cmat_a, czero)
1896 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1897 evals(:) = 0.0_dp
1898 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1899 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1900 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1901 normg_old = 1.0e30_dp
1902 END IF
1903
1904 ! compute Omega and M
1905 CALL cp_cfm_set_all(cmat_m, czero)
1906 omega = 0.0_dp
1907 DO idim = 1, ndim
1908 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1) ! t1=ZU
1909 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1910 DO i = 1, n
1911 CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1912 SELECT CASE (2) ! allows for selection of different spread functionals
1913 CASE (1)
1914 fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1915 fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1916 CASE (2) ! corresponds to the jacobi setup
1917 fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1918 fvald(i) = -weights(idim)
1919 END SELECT
1920 omega = omega + fval(i)
1921 END DO
1922 DO icol = 1, ncol_local
1923 DO irow = 1, nrow_local
1924 tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1925 cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1926 + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=dp)
1927 END DO
1928 END DO
1929 END DO
1930
1931 ! compute Hessian diagonal approximation for the preconditioner
1932 IF (.true.) THEN
1933 CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1934 ELSE
1935 CALL cp_fm_set_all(matrix_h, 1.0_dp)
1936 END IF
1937
1938 ! compute B
1939 DO icol = 1, ncol_local
1940 DO irow = 1, nrow_local
1941 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1942 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1943 IF (abs(ll - lk) .LT. 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1944 tmp = 1.0_dp
1945 cmat_b%local_data(irow, icol) = 0.0_dp
1946 DO i = 1, 16
1947 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1948 tmp = tmp*(ll - lk)/(i + 1)
1949 END DO
1950 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1951 ELSE
1952 cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1953 END IF
1954 END DO
1955 END DO
1956 ! compute gradient matrix_G
1957
1958 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1) ! t1=(M^T)(R^T)
1959 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1960 CALL cp_cfm_schur_product(cmat_t2, cmat_b, cmat_t1)
1961 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1962 CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1963 matrix_g%local_data = real(cmat_t1%local_data, kind=dp)
1964 CALL cp_fm_transpose(matrix_g, matrix_t)
1965 CALL cp_fm_scale_and_add(-1.0_dp, matrix_g, 1.0_dp, matrix_t)
1966 CALL cp_fm_maxabsval(matrix_g, tol)
1967
1968 ! from here on, minimizing technology
1969 IF (new_direction) THEN
1970 ! energy converged up to machine precision ?
1971 line_searches = line_searches + 1
1972 ! DO i=1,line_search_count
1973 ! write(15,*) pos(i),energy(i)
1974 ! ENDDO
1975 ! write(15,*) ""
1976 ! CALL m_flush(15)
1977 !write(16,*) evals(:)
1978 !write(17,*) matrix_A%local_data(:,:)
1979 !write(18,*) matrix_G%local_data(:,:)
1980 IF (output_unit > 0) THEN
1981 WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
1982 CALL m_flush(output_unit)
1983 END IF
1984 IF (tol < eps_localization .OR. iterations > max_iter) EXIT
1985
1986 IF (.true.) THEN ! do conjugate gradient CG
1987 CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
1988 normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
1989 ! apply the preconditioner
1990 DO icol = 1, ncol_local
1991 DO irow = 1, nrow_local
1992 matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
1993 END DO
1994 END DO
1995 CALL cp_fm_trace(matrix_g, matrix_g_old, normg)
1996 normg = normg*0.5_dp
1997 beta_pr = (normg - normg_cross)/normg_old
1998 normg_old = normg
1999 beta_pr = max(beta_pr, 0.0_dp)
2000 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2001 CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2002 IF (normg_cross .GE. 0) THEN ! back to SD
2003 IF (matrix_a%matrix_struct%para_env%is_source()) THEN
2004 WRITE (cp_logger_get_default_unit_nr(), *) "!"
2005 END IF
2006 beta_pr = 0.0_dp
2007 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2008 END IF
2009 ELSE ! SD
2010 CALL cp_fm_scale_and_add(0.0_dp, matrix_g_search, -1.0_dp, matrix_g)
2011 END IF
2012 ! ds_min=1.0E-4_dp
2013 line_search_count = 0
2014 END IF
2015 line_search_count = line_search_count + 1
2016 energy(line_search_count) = omega
2017
2018 ! line search section
2019 SELECT CASE (3)
2020 CASE (1) ! two point line search
2021 SELECT CASE (line_search_count)
2022 CASE (1)
2023 pos(1) = 0.0_dp
2024 pos(2) = ds_min
2025 CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2026 grad(1) = grad(1)/2.0_dp
2027 new_direction = .false.
2028 CASE (2)
2029 new_direction = .true.
2030 x0 = pos(1) ! 0.0_dp
2031 c = energy(1)
2032 b = grad(1)
2033 x1 = pos(2)
2034 a = (energy(2) - b*x1 - c)/(x1**2)
2035 IF (a .LE. 0.0_dp) a = 1.0e-15_dp
2036 npos = -b/(2.0_dp*a)
2037 val = a*npos**2 + b*npos + c
2038 IF (val .LT. energy(1) .AND. val .LE. energy(2)) THEN
2039 ! we go to a minimum, but ...
2040 ! we take a guard against too large steps
2041 pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2042 ELSE ! just take an extended step
2043 pos(3) = maxval(pos(1:2))*2.0_dp
2044 END IF
2045 END SELECT
2046 CASE (2) ! 3 point line search
2047 SELECT CASE (line_search_count)
2048 CASE (1)
2049 new_direction = .false.
2050 pos(1) = 0.0_dp
2051 pos(2) = ds_min*0.8_dp
2052 CASE (2)
2053 new_direction = .false.
2054 IF (energy(2) .GT. energy(1)) THEN
2055 pos(3) = ds_min*0.7_dp
2056 ELSE
2057 pos(3) = ds_min*1.4_dp
2058 END IF
2059 CASE (3)
2060 new_direction = .true.
2061 xa = pos(1)
2062 xb = pos(2)
2063 xc = pos(3)
2064 fa = energy(1)
2065 fb = energy(2)
2066 fc = energy(3)
2067 nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2068 denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2069 IF (abs(denom) .LE. 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa))) THEN
2070 npos = xb
2071 ELSE
2072 npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2073 END IF
2074 val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2075 (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2076 (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2077 IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc) THEN ! OK, we go to a minimum
2078 ! we take a guard against too large steps
2079 pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2080 min(npos, maxval(pos(1:3))*4.0_dp))
2081 ELSE ! just take an extended step
2082 pos(4) = maxval(pos(1:3))*2.0_dp
2083 END IF
2084 END SELECT
2085 CASE (3) ! golden section hunt
2086 new_direction = .false.
2087 IF (line_search_count .EQ. 1) THEN
2088 lsl = 1
2089 lsr = 0
2090 lsm = 1
2091 pos(1) = 0.0_dp
2092 pos(2) = ds_min/gold_sec
2093 ELSE
2094 IF (line_search_count .EQ. 150) cpabort("Too many")
2095 IF (lsr .EQ. 0) THEN
2096 IF (energy(line_search_count - 1) .LT. energy(line_search_count)) THEN
2097 lsr = line_search_count
2098 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2099 ELSE
2100 lsl = lsm
2101 lsm = line_search_count
2102 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2103 END IF
2104 ELSE
2105 IF (pos(line_search_count) .LT. pos(lsm)) THEN
2106 IF (energy(line_search_count) .LT. energy(lsm)) THEN
2107 lsr = lsm
2108 lsm = line_search_count
2109 ELSE
2110 lsl = line_search_count
2111 END IF
2112 ELSE
2113 IF (energy(line_search_count) .LT. energy(lsm)) THEN
2114 lsl = lsm
2115 lsm = line_search_count
2116 ELSE
2117 lsr = line_search_count
2118 END IF
2119 END IF
2120 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
2121 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2122 ELSE
2123 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2124 END IF
2125 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr)) THEN
2126 new_direction = .true.
2127 END IF
2128 END IF ! lsr .eq. 0
2129 END IF ! first step
2130 END SELECT
2131 ! now go to the suggested point
2132 ds_min = pos(line_search_count + 1)
2133 ds = pos(line_search_count + 1) - pos(line_search_count)
2134 CALL cp_fm_scale_and_add(1.0_dp, matrix_a, ds, matrix_g_search)
2135 END DO
2136
2137 ! collect rotation matrix
2138 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
2139 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2140 CALL cp_fm_to_fm(matrix_t, matrix_r)
2141 CALL rotate_orbitals(matrix_r, vectors)
2142 CALL cp_fm_release(matrix_r)
2143
2144 CALL cp_fm_release(matrix_a)
2145 CALL cp_fm_release(matrix_g)
2146 CALL cp_fm_release(matrix_h)
2147 CALL cp_fm_release(matrix_t)
2148 CALL cp_fm_release(matrix_g_search)
2149 CALL cp_fm_release(matrix_g_old)
2150 CALL cp_cfm_release(cmat_a)
2151 CALL cp_cfm_release(cmat_u)
2152 CALL cp_cfm_release(cmat_r)
2153 CALL cp_cfm_release(cmat_t1)
2154 CALL cp_cfm_release(cmat_t2)
2155 CALL cp_cfm_release(cmat_b)
2156 CALL cp_cfm_release(cmat_m)
2157
2158 DEALLOCATE (evals, evals_exp, fval, fvald)
2159
2160 DO idim = 1, SIZE(c_zij)
2161 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
2162 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2163 CALL cp_cfm_release(c_zij(idim))
2164 END DO
2165 DEALLOCATE (c_zij)
2166 DEALLOCATE (diag_z)
2167
2168 CALL timestop(handle)
2169
2170 END SUBROUTINE
2171
2172! **************************************************************************************************
2173!> \brief Parallel algorithm for jacobi rotations
2174!> \param weights ...
2175!> \param zij ...
2176!> \param vectors ...
2177!> \param para_env ...
2178!> \param max_iter ...
2179!> \param eps_localization ...
2180!> \param sweeps ...
2181!> \param out_each ...
2182!> \param target_time ...
2183!> \param start_time ...
2184!> \param restricted ...
2185!> \par History
2186!> use allgather for improved performance
2187!> \author MI (11.2009)
2188! **************************************************************************************************
2189 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2190 sweeps, out_each, target_time, start_time, restricted)
2191
2192 REAL(kind=dp), INTENT(IN) :: weights(:)
2193 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2194 TYPE(mp_para_env_type), POINTER :: para_env
2195 INTEGER, INTENT(IN) :: max_iter
2196 REAL(kind=dp), INTENT(IN) :: eps_localization
2197 INTEGER :: sweeps
2198 INTEGER, INTENT(IN) :: out_each
2199 REAL(dp) :: target_time, start_time
2200 INTEGER :: restricted
2201
2202 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para'
2203
2204 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2205 nblock, nblock_max, ns_me, nstate, &
2206 output_unit
2207 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2208 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2209 REAL(kind=dp) :: xstate
2210 TYPE(cp_fm_type) :: rmat
2211 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2212
2213 CALL timeset(routinen, handle)
2214
2215 output_unit = cp_logger_get_default_io_unit()
2216
2217 NULLIFY (cz_ij_loc)
2218
2219 dim2 = SIZE(zij, 2)
2220
2221 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2222 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2223
2224 CALL cp_fm_get_info(rmat, nrow_global=nstate)
2225
2226 IF (restricted > 0) THEN
2227 IF (output_unit > 0) THEN
2228 WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2229 END IF
2230 nstate = nstate - restricted
2231 END IF
2232
2233 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2234 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2235 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2236 DO ip = 1, para_env%num_pe
2237 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2238 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2239 END DO
2240 nblock_max = 0
2241 DO ip = 0, para_env%num_pe - 1
2242 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2243 nblock_max = max(nblock_max, nblock)
2244 END DO
2245
2246 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2247 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2248 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2249 ALLOCATE (cz_ij_loc(dim2))
2250 DO idim = 1, dim2
2251 DO ip = 0, para_env%num_pe - 1
2252 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2253 CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2254 CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2255 IF (para_env%mepos == ip) THEN
2256 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2257 DO i = 1, nblock
2258 DO j = 1, nstate
2259 cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2260 END DO
2261 END DO
2262 END IF
2263 END DO ! ip
2264 END DO
2265 DEALLOCATE (z_ij_loc_re)
2266 DEALLOCATE (z_ij_loc_im)
2267
2268 ALLOCATE (rotmat(nstate, 2*nblock_max))
2269
2270 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2271 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2272 target_time=target_time, start_time=start_time)
2273
2274 ilow1 = ns_bound(para_env%mepos, 1)
2275 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2276 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2277 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2278 DO idim = 1, dim2
2279 DO ip = 0, para_env%num_pe - 1
2280 z_ij_loc_re = 0.0_dp
2281 z_ij_loc_im = 0.0_dp
2282 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2283 IF (ip == para_env%mepos) THEN
2284 ns_me = nblock
2285 DO i = 1, ns_me
2286 ii = ilow1 + i - 1
2287 DO j = 1, nstate
2288 z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i), dp)
2289 z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2290 END DO
2291 END DO
2292 END IF
2293 CALL para_env%bcast(z_ij_loc_re, ip)
2294 CALL para_env%bcast(z_ij_loc_im, ip)
2295 CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2296 CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2297 END DO ! ip
2298 END DO
2299
2300 DO ip = 0, para_env%num_pe - 1
2301 z_ij_loc_re = 0.0_dp
2302 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2303 IF (ip == para_env%mepos) THEN
2304 ns_me = nblock
2305 DO i = 1, ns_me
2306 ii = ilow1 + i - 1
2307 DO j = 1, nstate
2308 z_ij_loc_re(j, i) = rotmat(j, i)
2309 END DO
2310 END DO
2311 END IF
2312 CALL para_env%bcast(z_ij_loc_re, ip)
2313 CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2314 END DO
2315
2316 DEALLOCATE (z_ij_loc_re)
2317 DEALLOCATE (z_ij_loc_im)
2318 DO idim = 1, dim2
2319 DEALLOCATE (cz_ij_loc(idim)%c_array)
2320 END DO
2321 DEALLOCATE (cz_ij_loc)
2322
2323 CALL para_env%sync()
2324 CALL rotate_orbitals(rmat, vectors)
2325 CALL cp_fm_release(rmat)
2326
2327 DEALLOCATE (rotmat)
2328 DEALLOCATE (ns_bound)
2329
2330 CALL timestop(handle)
2331
2332 END SUBROUTINE jacobi_rot_para
2333
2334! **************************************************************************************************
2335!> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2336!> \param weights ...
2337!> \param czij ...
2338!> \param para_env ...
2339!> \param max_iter ...
2340!> \param rmat ...
2341!> \param tol_out ...
2342!> \author Soumya Ghosh (08/21)
2343! **************************************************************************************************
2344 SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2345
2346 REAL(kind=dp), INTENT(IN) :: weights(:)
2347 TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2348 TYPE(mp_para_env_type), POINTER :: para_env
2349 INTEGER, INTENT(IN) :: max_iter
2350 TYPE(cp_cfm_type), INTENT(IN) :: rmat
2351 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2352
2353 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para_1'
2354
2355 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2356 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2357 nblock, nblock_max, ns_me, nstate, &
2358 sweeps
2359 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2360 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2361 REAL(kind=dp) :: xstate
2362 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2363
2364 CALL timeset(routinen, handle)
2365
2366 dim2 = SIZE(czij)
2367
2368 CALL cp_cfm_set_all(rmat, cmplx(0._dp, 0._dp, dp), cmplx(1._dp, 0._dp, dp))
2369
2370 CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2371
2372 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2373 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2374 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2375 DO ip = 1, para_env%num_pe
2376 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2377 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2378 END DO
2379 nblock_max = 0
2380 DO ip = 0, para_env%num_pe - 1
2381 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2382 nblock_max = max(nblock_max, nblock)
2383 END DO
2384
2385 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2386 ALLOCATE (czij_array(nstate, nblock_max))
2387 ALLOCATE (cz_ij_loc(dim2))
2388 DO idim = 1, dim2
2389 DO ip = 0, para_env%num_pe - 1
2390 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2391 ! cfm --> allocatable
2392 CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2393 IF (para_env%mepos == ip) THEN
2394 ns_me = nblock
2395 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2396 DO i = 1, ns_me
2397 DO j = 1, nstate
2398 cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2399 END DO
2400 END DO
2401 END IF
2402 END DO ! ip
2403 END DO
2404 DEALLOCATE (czij_array)
2405
2406 ALLOCATE (rotmat(nstate, 2*nblock_max))
2407
2408 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2409 cz_ij_loc, rotmat, 0, tol_out=tol_out)
2410
2411 ilow1 = ns_bound(para_env%mepos, 1)
2412 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2413 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2414
2415 DO ip = 0, para_env%num_pe - 1
2416 z_ij_loc_re = 0.0_dp
2417 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2418 IF (ip == para_env%mepos) THEN
2419 ns_me = nblock
2420 DO i = 1, ns_me
2421 ii = ilow1 + i - 1
2422 DO j = 1, nstate
2423 z_ij_loc_re(j, i) = rotmat(j, i)
2424 END DO
2425 END DO
2426 END IF
2427 CALL para_env%bcast(z_ij_loc_re, ip)
2428 CALL cp_cfm_set_submatrix(rmat, cmplx(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2429 END DO
2430
2431 DEALLOCATE (z_ij_loc_re)
2432 DO idim = 1, dim2
2433 DEALLOCATE (cz_ij_loc(idim)%c_array)
2434 END DO
2435 DEALLOCATE (cz_ij_loc)
2436
2437 CALL para_env%sync()
2438
2439 DEALLOCATE (rotmat)
2440 DEALLOCATE (ns_bound)
2441
2442 CALL timestop(handle)
2443
2444 END SUBROUTINE jacobi_rot_para_1
2445
2446! **************************************************************************************************
2447!> \brief Parallel algorithm for jacobi rotations
2448!> \param weights ...
2449!> \param para_env ...
2450!> \param max_iter ...
2451!> \param sweeps ...
2452!> \param out_each ...
2453!> \param dim2 ...
2454!> \param nstate ...
2455!> \param nblock_max ...
2456!> \param ns_bound ...
2457!> \param cz_ij_loc ...
2458!> \param rotmat ...
2459!> \param output_unit ...
2460!> \param tol_out ...
2461!> \param eps_localization ...
2462!> \param target_time ...
2463!> \param start_time ...
2464!> \par History
2465!> split out to reuse with different input types
2466!> \author HF (05.2022)
2467! **************************************************************************************************
2468 SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2469 ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2470
2471 REAL(kind=dp), INTENT(IN) :: weights(:)
2472 TYPE(mp_para_env_type), POINTER :: para_env
2473 INTEGER, INTENT(IN) :: max_iter
2474 INTEGER, INTENT(OUT) :: sweeps
2475 INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2476 INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2477 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2478 REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2479 INTEGER, INTENT(IN) :: output_unit
2480 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2481 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
2482 REAL(dp), OPTIONAL :: target_time, start_time
2483
2484 COMPLEX(KIND=dp) :: zi, zj
2485 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2486 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2487 INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2488 ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2489 iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2490 ns_recv_from, ns_recv_partner
2491 INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2492 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2493 LOGICAL :: should_stop
2494 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2495 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2496 REAL(kind=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2497 t2, theta, tolerance, zc, zr
2498 TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2499 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2500
2501 NULLIFY (zdiag_all, zdiag_me)
2502 NULLIFY (xyz_mix, xyz_mix_ns)
2503 NULLIFY (mii, mij, mjj)
2504
2505 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2506
2507 ALLOCATE (rcount(para_env%num_pe), stat=istat)
2508 ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2509
2510 tolerance = 1.0e10_dp
2511 sweeps = 0
2512
2513 ! number of processor pairs and number of permutations
2514 npair = (para_env%num_pe + 1)/2
2515 nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2516 ALLOCATE (list_pair(2, npair))
2517
2518 ! initialize rotation matrix
2519 rotmat = 0.0_dp
2520 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2521 ii = i - ns_bound(para_env%mepos, 1) + 1
2522 rotmat(i, ii) = 1.0_dp
2523 END DO
2524
2525 ALLOCATE (xyz_mix(dim2))
2526 ALLOCATE (xyz_mix_ns(dim2))
2527 ALLOCATE (zdiag_me(dim2))
2528 ALLOCATE (zdiag_all(dim2))
2529
2530 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2531 IF (ns_me /= 0) THEN
2532 ALLOCATE (c_array_me(nstate, ns_me, dim2))
2533 DO idim = 1, dim2
2534 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2535 END DO
2536 ALLOCATE (gmat(nstate, ns_me))
2537 END IF
2538
2539 DO idim = 1, dim2
2540 ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2541 zdiag_me(idim)%c_array = z_zero
2542 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2543 zdiag_all(idim)%c_array = z_zero
2544 END DO
2545 ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2546 ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2547
2548 ! buffer for message passing
2549 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2550
2551 IF (output_unit > 0) THEN
2552 WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2553 WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2554 END IF
2555
2556 DO lsweep = 1, max_iter + 1
2557 sweeps = lsweep
2558 IF (sweeps == max_iter + 1) THEN
2559 IF (output_unit > 0) THEN
2560 WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2561 WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2562 END IF
2563 EXIT
2564 END IF
2565 t1 = m_walltime()
2566
2567 DO iperm = 1, nperm
2568
2569 ! fix partners for this permutation, and get the number of states
2570 CALL eberlein(iperm, para_env, list_pair)
2571 ip_partner = -1
2572 ns_partner = 0
2573 DO ipair = 1, npair
2574 IF (list_pair(1, ipair) == para_env%mepos) THEN
2575 ip_partner = list_pair(2, ipair)
2576 EXIT
2577 ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2578 ip_partner = list_pair(1, ipair)
2579 EXIT
2580 END IF
2581 END DO
2582 IF (ip_partner >= 0) THEN
2583 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2584 ELSE
2585 ns_partner = 0
2586 END IF
2587
2588 ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2589 IF (ns_partner*ns_me /= 0) THEN
2590
2591 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2592 rmat_loc = 0.0_dp
2593 DO i = 1, ns_me + ns_partner
2594 rmat_loc(i, i) = 1.0_dp
2595 END DO
2596
2597 ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2598
2599 DO idim = 1, dim2
2600 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2601 DO i = 1, ns_me
2602 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2603 END DO
2604 END DO
2605
2606 CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2607 msgout=c_array_partner, source=ip_partner)
2608
2609 n1 = ns_me
2610 n2 = ns_partner
2611 ilow1 = ns_bound(para_env%mepos, 1)
2612 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2613 ilow2 = ns_bound(ip_partner, 1)
2614 iup2 = ns_bound(ip_partner, 1) + n2 - 1
2615 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2616 il1 = 1
2617 iu1 = n1
2618 iu1 = n1
2619 il2 = 1 + n1
2620 iu2 = n1 + n2
2621 ELSE
2622 il1 = 1 + n2
2623 iu1 = n1 + n2
2624 iu1 = n1 + n2
2625 il2 = 1
2626 iu2 = n2
2627 END IF
2628
2629 DO idim = 1, dim2
2630 DO i = 1, n1
2631 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2632 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2633 END DO
2634 DO i = 1, n2
2635 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2636 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2637 END DO
2638 END DO
2639
2640 DO istate = 1, n1 + n2
2641 DO jstate = istate + 1, n1 + n2
2642 DO idim = 1, dim2
2643 mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2644 mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2645 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2646 END DO
2647 CALL get_angle(mii, mjj, mij, weights, theta)
2648 st = sin(theta)
2649 ct = cos(theta)
2650 DO idim = 1, dim2
2651 DO i = 1, n1 + n2
2652 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2653 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2654 xyz_mix(idim)%c_array(i, istate) = zi
2655 xyz_mix(idim)%c_array(i, jstate) = zj
2656 END DO
2657 DO i = 1, n1 + n2
2658 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2659 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2660 xyz_mix(idim)%c_array(istate, i) = zi
2661 xyz_mix(idim)%c_array(jstate, i) = zj
2662 END DO
2663 END DO
2664
2665 DO i = 1, n1 + n2
2666 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2667 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2668 rmat_loc(i, istate) = ri
2669 rmat_loc(i, jstate) = rj
2670 END DO
2671 END DO
2672 END DO
2673
2674 k = nblock_max + 1
2675 CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2676 rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2677
2678 IF (ilow1 < ilow2) THEN
2679 ! no longer compiles in official sdgb:
2680 !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2681 ! probably inefficient:
2682 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2683 n2, 0.0_dp, gmat(:, :), nstate)
2684 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2685 n1 + n2, 1.0_dp, gmat(:, :), nstate)
2686 ELSE
2687 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2688 rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2689 ! no longer compiles in official sdgb:
2690 !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2691 ! probably inefficient:
2692 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2693 n1, 1.0_dp, gmat(:, :), nstate)
2694 END IF
2695
2696 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2697
2698 DO idim = 1, dim2
2699 DO i = 1, n1
2700 xyz_mix_ns(idim)%c_array(1:nstate, i) = z_zero
2701 END DO
2702
2703 DO istate = 1, n1
2704 DO jstate = 1, nstate
2705 DO i = 1, n2
2706 xyz_mix_ns(idim)%c_array(jstate, istate) = &
2707 xyz_mix_ns(idim)%c_array(jstate, istate) + &
2708 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2709 END DO
2710 END DO
2711 END DO
2712 DO istate = 1, n1
2713 DO jstate = 1, nstate
2714 DO i = 1, n1
2715 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2716 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2717 END DO
2718 END DO
2719 END DO
2720 END DO ! idim
2721
2722 DEALLOCATE (c_array_partner)
2723
2724 ELSE ! save my data
2725 DO idim = 1, dim2
2726 DO i = 1, ns_me
2727 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2728 END DO
2729 END DO
2730 END IF
2731
2732 DO idim = 1, dim2
2733 DO i = 1, ns_me
2734 cz_ij_loc(idim)%c_array(1:nstate, i) = z_zero
2735 END DO
2736 END DO
2737
2738 IF (ns_partner*ns_me /= 0) THEN
2739 ! transpose rotation matrix rmat_loc
2740 DO i = 1, ns_me + ns_partner
2741 DO j = i + 1, ns_me + ns_partner
2742 ri = rmat_loc(i, j)
2743 rmat_loc(i, j) = rmat_loc(j, i)
2744 rmat_loc(j, i) = ri
2745 END DO
2746 END DO
2747
2748 ! prepare for distribution
2749 DO i = 1, n1
2750 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2751 END DO
2752 ik = nblock_max
2753 DO i = 1, n2
2754 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2755 END DO
2756 ELSE
2757 rmat_send = 0.0_dp
2758 END IF
2759
2760 ! collect data from all tasks (this takes some significant time)
2761 CALL para_env%allgather(rmat_send, rmat_recv_all)
2762
2763 ! update blocks everywhere
2764 DO ip = 0, para_env%num_pe - 1
2765
2766 ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2767 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2768
2769 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2770
2771 IF (ns_me /= 0) THEN
2772 IF (ns_recv_from /= 0) THEN
2773 !look for the partner of ip_recv_from
2774 ip_recv_partner = -1
2775 ns_recv_partner = 0
2776 DO ipair = 1, npair
2777 IF (list_pair(1, ipair) == ip_recv_from) THEN
2778 ip_recv_partner = list_pair(2, ipair)
2779 EXIT
2780 ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2781 ip_recv_partner = list_pair(1, ipair)
2782 EXIT
2783 END IF
2784 END DO
2785
2786 IF (ip_recv_partner >= 0) THEN
2787 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2788 END IF
2789 IF (ns_recv_partner > 0) THEN
2790 il1 = ns_bound(para_env%mepos, 1)
2791 il_recv = ns_bound(ip_recv_from, 1)
2792 il_recv_partner = ns_bound(ip_recv_partner, 1)
2793 ik = nblock_max
2794
2795 DO idim = 1, dim2
2796 DO i = 1, ns_recv_from
2797 ii = il_recv + i - 1
2798 DO j = 1, ns_me
2799 jj = j
2800 DO k = 1, ns_recv_from
2801 kk = il_recv + k - 1
2802 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2803 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2804 END DO
2805 END DO
2806 END DO
2807 DO i = 1, ns_recv_from
2808 ii = il_recv + i - 1
2809 DO j = 1, ns_me
2810 jj = j
2811 DO k = 1, ns_recv_partner
2812 kk = il_recv_partner + k - 1
2813 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2814 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2815 END DO
2816 END DO
2817 END DO
2818 END DO ! idim
2819 ELSE
2820 il1 = ns_bound(para_env%mepos, 1)
2821 il_recv = ns_bound(ip_recv_from, 1)
2822 DO idim = 1, dim2
2823 DO j = 1, ns_me
2824 jj = j
2825 DO i = 1, ns_recv_from
2826 ii = il_recv + i - 1
2827 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2828 END DO
2829 END DO
2830 END DO ! idim
2831 END IF
2832 END IF
2833 END IF ! ns_me
2834 END DO ! ip
2835
2836 IF (ns_partner*ns_me /= 0) THEN
2837 DEALLOCATE (rmat_loc)
2838 DO idim = 1, dim2
2839 DEALLOCATE (xyz_mix(idim)%c_array)
2840 END DO
2841 END IF
2842
2843 END DO ! iperm
2844
2845 ! calculate the max gradient
2846 DO idim = 1, dim2
2847 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2848 ii = i - ns_bound(para_env%mepos, 1) + 1
2849 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2850 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2851 END DO
2852 rcount(:) = SIZE(zdiag_me(idim)%c_array)
2853 rdispl(1) = 0
2854 DO ip = 2, para_env%num_pe
2855 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2856 END DO
2857 ! collect all the diagonal elements in a replicated 1d array
2858 CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2859 END DO
2860
2861 gmax = 0.0_dp
2862 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2863 k = j - ns_bound(para_env%mepos, 1) + 1
2864 DO i = 1, j - 1
2865 ! find the location of the diagonal element (i,i)
2866 DO ip = 0, para_env%num_pe - 1
2867 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2868 ip_has_i = ip
2869 EXIT
2870 END IF
2871 END DO
2872 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2873 ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2874 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2875 grad = 0.0_dp
2876 DO idim = 1, dim2
2877 zi = zdiag_all(idim)%c_array(ii)
2878 zj = zdiag_all(idim)%c_array(jj)
2879 grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2880 END DO
2881 gmax = max(gmax, abs(grad))
2882 END DO
2883 END DO
2884
2885 CALL para_env%max(gmax)
2886 tolerance = gmax
2887 IF (PRESENT(tol_out)) tol_out = tolerance
2888
2889 func = 0.0_dp
2890 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2891 k = i - ns_bound(para_env%mepos, 1) + 1
2892 DO idim = 1, dim2
2893 zr = real(cz_ij_loc(idim)%c_array(i, k), dp)
2894 zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2895 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2896 END DO
2897 END DO
2898 CALL para_env%sum(func)
2899 t2 = m_walltime()
2900
2901 IF (output_unit > 0 .AND. modulo(sweeps, out_each) == 0) THEN
2902 WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2903 CALL m_flush(output_unit)
2904 END IF
2905 IF (PRESENT(eps_localization)) THEN
2906 IF (tolerance < eps_localization) EXIT
2907 END IF
2908 IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2909 CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2910 IF (should_stop) EXIT
2911 END IF
2912
2913 END DO ! lsweep
2914
2915 ! buffer for message passing
2916 DEALLOCATE (rmat_recv_all)
2917
2918 DEALLOCATE (rmat_recv)
2919 DEALLOCATE (rmat_send)
2920 IF (ns_me > 0) THEN
2921 DEALLOCATE (c_array_me)
2922 END IF
2923 DO idim = 1, dim2
2924 DEALLOCATE (zdiag_me(idim)%c_array)
2925 DEALLOCATE (zdiag_all(idim)%c_array)
2926 END DO
2927 DEALLOCATE (zdiag_me)
2928 DEALLOCATE (zdiag_all)
2929 DEALLOCATE (xyz_mix)
2930 DO idim = 1, dim2
2931 IF (ns_me /= 0) THEN
2932 DEALLOCATE (xyz_mix_ns(idim)%c_array)
2933 END IF
2934 END DO
2935 DEALLOCATE (xyz_mix_ns)
2936 IF (ns_me /= 0) THEN
2937 DEALLOCATE (gmat)
2938 END IF
2939 DEALLOCATE (mii)
2940 DEALLOCATE (mij)
2941 DEALLOCATE (mjj)
2942 DEALLOCATE (list_pair)
2943
2944 END SUBROUTINE jacobi_rot_para_core
2945
2946! **************************************************************************************************
2947!> \brief ...
2948!> \param iperm ...
2949!> \param para_env ...
2950!> \param list_pair ...
2951! **************************************************************************************************
2952 SUBROUTINE eberlein(iperm, para_env, list_pair)
2953 INTEGER, INTENT(IN) :: iperm
2954 TYPE(mp_para_env_type), POINTER :: para_env
2955 INTEGER, DIMENSION(:, :) :: list_pair
2956
2957 INTEGER :: i, ii, jj, npair
2958
2959 npair = (para_env%num_pe + 1)/2
2960 IF (iperm == 1) THEN
2961!..set up initial ordering
2962 DO i = 0, para_env%num_pe - 1
2963 ii = ((i + 1) + 1)/2
2964 jj = mod((i + 1) + 1, 2) + 1
2965 list_pair(jj, ii) = i
2966 END DO
2967 IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2968 ELSEIF (mod(iperm, 2) == 0) THEN
2969!..a type shift
2970 jj = list_pair(1, npair)
2971 DO i = npair, 3, -1
2972 list_pair(1, i) = list_pair(1, i - 1)
2973 END DO
2974 list_pair(1, 2) = list_pair(2, 1)
2975 list_pair(2, 1) = jj
2976 ELSE
2977!..b type shift
2978 jj = list_pair(2, 1)
2979 DO i = 1, npair - 1
2980 list_pair(2, i) = list_pair(2, i + 1)
2981 END DO
2982 list_pair(2, npair) = jj
2983 END IF
2984
2985 END SUBROUTINE eberlein
2986
2987! **************************************************************************************************
2988!> \brief ...
2989!> \param vectors ...
2990!> \param op_sm_set ...
2991!> \param zij_fm_set ...
2992! **************************************************************************************************
2993 SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
2994
2995 TYPE(cp_fm_type), INTENT(IN) :: vectors
2996 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
2997 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
2998
2999 CHARACTER(len=*), PARAMETER :: routinen = 'zij_matrix'
3000
3001 INTEGER :: handle, i, j, nao, nmoloc
3002 TYPE(cp_fm_type) :: opvec
3003
3004 CALL timeset(routinen, handle)
3005
3006 ! get rows and cols of the input
3007 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3008 ! replicate the input kind of matrix
3009 CALL cp_fm_create(opvec, vectors%matrix_struct)
3010
3011 ! Compute zij here
3012 DO i = 1, SIZE(zij_fm_set, 2)
3013 DO j = 1, SIZE(zij_fm_set, 1)
3014 CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3015 CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3016 CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3017 zij_fm_set(j, i))
3018 END DO
3019 END DO
3020
3021 CALL cp_fm_release(opvec)
3022 CALL timestop(handle)
3023
3024 END SUBROUTINE zij_matrix
3025
3026! **************************************************************************************************
3027!> \brief ...
3028!> \param vectors ...
3029! **************************************************************************************************
3030 SUBROUTINE scdm_qrfact(vectors)
3031
3032 TYPE(cp_fm_type), INTENT(IN) :: vectors
3033
3034 CHARACTER(len=*), PARAMETER :: routinen = 'scdm_qrfact'
3035
3036 INTEGER :: handle, ncolt, nrowt
3037 REAL(kind=dp), DIMENSION(:), POINTER :: tau
3038 TYPE(cp_fm_struct_type), POINTER :: cstruct
3039 TYPE(cp_fm_type) :: ctp, qf, tmp
3040
3041 CALL timeset(routinen, handle)
3042
3043 ! Create Transpose of Coefficient Matrix vectors
3044 nrowt = vectors%matrix_struct%ncol_global
3045 ncolt = vectors%matrix_struct%nrow_global
3046
3047 CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, &
3048 nrow_global=nrowt, ncol_global=ncolt)
3049 CALL cp_fm_create(ctp, cstruct)
3050 CALL cp_fm_struct_release(cstruct)
3051
3052 ALLOCATE (tau(nrowt))
3053
3054 CALL cp_fm_transpose(vectors, ctp)
3055
3056 ! Get QR decomposition of CTs
3057 CALL cp_fm_pdgeqpf(ctp, tau, nrowt, ncolt, 1, 1)
3058
3059 ! Construction of Q from the scalapack output
3060 CALL cp_fm_struct_create(cstruct, para_env=ctp%matrix_struct%para_env, &
3061 context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3062 ncol_global=ctp%matrix_struct%nrow_global)
3063 CALL cp_fm_create(qf, cstruct)
3064 CALL cp_fm_struct_release(cstruct)
3065 CALL cp_fm_to_fm_submat(ctp, qf, nrowt, nrowt, 1, 1, 1, 1)
3066
3067 ! Get Q
3068 CALL cp_fm_pdorgqr(qf, tau, nrowt, 1, 1)
3069
3070 ! Transform original coefficient matrix vectors
3071 CALL cp_fm_create(tmp, vectors%matrix_struct)
3072 CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3073 CALL cp_fm_to_fm(vectors, tmp)
3074 CALL parallel_gemm('N', 'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3075
3076 ! Cleanup
3077 CALL cp_fm_release(ctp)
3078 CALL cp_fm_release(tmp)
3079 CALL cp_fm_release(qf)
3080 DEALLOCATE (tau)
3081
3082 CALL timestop(handle)
3083
3084 END SUBROUTINE scdm_qrfact
3085
3086END MODULE qs_localization_methods
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
Definition cell_types.F:15
methods related to the blacs parallel environment
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
Definition cp_cfm_diag.F:14
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Definition cp_cfm_diag.F:60
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
Definition cp_fm_diag.F:443
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:131
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:148
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
real(kind=dp), parameter, public twopi
complex(kind=dp), parameter, public z_zero
Routines for calculating a complex matrix exponential.
Definition matrix_exp.F:13
subroutine, public exp_pade_real(exp_h, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
Definition matrix_exp.F:570
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
Definition matrix_exp.F:244
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public approx_l1_norm_sd(c, iterations, eps, converged, sweeps)
...
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.
Definition xc.F:17
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Represent a complex full matrix.
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment