(git:ab76537)
Loading...
Searching...
No Matches
qs_localization_methods.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Localization methods such as 2x2 Jacobi rotations
10!> Steepest Decents
11!> Conjugate Gradient
12!> \par History
13!> Initial parallellization of jacobi (JVDV 07.2003)
14!> direct minimization using exponential parametrization (JVDV 09.2003)
15!> crazy rotations go fast (JVDV 10.2003)
16!> \author CJM (04.2003)
17! **************************************************************************************************
19 USE cell_types, ONLY: cell_type
28 USE cp_cfm_diag, ONLY: cp_cfm_heevd
29 USE cp_cfm_types, ONLY: &
33 USE cp_dbcsr_api, ONLY: dbcsr_p_type
45 USE cp_fm_diag, ONLY: cp_fm_syevd
50 USE cp_fm_types, ONLY: &
56 USE kahan_sum, ONLY: accurate_sum
57 USE kinds, ONLY: dp
58 USE machine, ONLY: m_flush,&
60 USE mathconstants, ONLY: pi,&
61 twopi,&
62 z_zero
63 USE matrix_exp, ONLY: exp_pade_real,&
67#include "./base/base_uses.f90"
68
69 IMPLICIT NONE
73
74 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
75
76 PRIVATE
77
78 TYPE set_c_1d_type
79 COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array => null()
80 END TYPE set_c_1d_type
81
82 TYPE set_c_2d_type
83 COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array => null()
84 END TYPE set_c_2d_type
85
86CONTAINS
87! **************************************************************************************************
88!> \brief ...
89!> \param C ...
90!> \param iterations ...
91!> \param eps ...
92!> \param converged ...
93!> \param sweeps ...
94! **************************************************************************************************
95 SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
96 TYPE(cp_fm_type), INTENT(IN) :: c
97 INTEGER, INTENT(IN) :: iterations
98 REAL(kind=dp), INTENT(IN) :: eps
99 LOGICAL, INTENT(INOUT) :: converged
100 INTEGER, INTENT(INOUT) :: sweeps
101
102 CHARACTER(len=*), PARAMETER :: routinen = 'approx_l1_norm_sd'
103 INTEGER, PARAMETER :: taylor_order = 100
104 REAL(kind=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
105
106 INTEGER :: handle, i, istep, k, n, ncol_local, &
107 nrow_local, output_unit, p
108 REAL(kind=dp) :: expfactor, f2, f2old, gnorm, tnorm
109 TYPE(cp_blacs_env_type), POINTER :: context
110 TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
111 TYPE(cp_fm_type) :: ctmp, g, gp1, gp2, u
112 TYPE(mp_para_env_type), POINTER :: para_env
113
114 CALL timeset(routinen, handle)
115
116 NULLIFY (context, para_env, fm_struct_k_k)
117
118 output_unit = cp_logger_get_default_io_unit()
119
120 CALL cp_fm_struct_get(c%matrix_struct, nrow_global=n, ncol_global=k, &
121 nrow_local=nrow_local, ncol_local=ncol_local, &
122 para_env=para_env, context=context)
123 CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
124 nrow_global=k, ncol_global=k)
125 CALL cp_fm_create(ctmp, c%matrix_struct)
126 CALL cp_fm_create(u, fm_struct_k_k)
127 CALL cp_fm_create(g, fm_struct_k_k)
128 CALL cp_fm_create(gp1, fm_struct_k_k)
129 CALL cp_fm_create(gp2, fm_struct_k_k)
130 !
131 ! printing
132 IF (output_unit > 0) THEN
133 WRITE (output_unit, '(1X)')
134 WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
135 WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
136 WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
137 WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
138 WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
139 WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
140 WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
141 END IF
142 !
143 f2old = 0.0_dp
144 converged = .false.
145 !
146 ! Start the steepest descent
147 DO istep = 1, iterations
148 !
149 !-------------------------------------------------------------------
150 ! compute f_2
151 ! f_2(x)=(x^2+eps)^1/2
152 f2 = 0.0_dp
153 DO p = 1, ncol_local ! p
154 DO i = 1, nrow_local ! i
155 f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
156 END DO
157 END DO
158 CALL c%matrix_struct%para_env%sum(f2)
159 !write(*,*) 'qs_localize: f_2=',f2
160 !-------------------------------------------------------------------
161 ! compute the derivative of f_2
162 ! f_2(x)=(x^2+eps)^1/2
163 DO p = 1, ncol_local ! p
164 DO i = 1, nrow_local ! i
165 ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
166 END DO
167 END DO
168 CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
169 ! antisymmetrize
170 CALL cp_fm_transpose(g, u)
171 CALL cp_fm_scale_and_add(-0.5_dp, g, 0.5_dp, u)
172 !
173 !-------------------------------------------------------------------
174 !
175 gnorm = cp_fm_frobenius_norm(g)
176 !write(*,*) 'qs_localize: norm(G)=',gnorm
177 !
178 ! rescale for steepest descent
179 CALL cp_fm_scale(-alpha, g)
180 !
181 ! compute unitary transform
182 ! zeroth order
183 CALL cp_fm_set_all(u, 0.0_dp, 1.0_dp)
184 ! first order
185 expfactor = 1.0_dp
186 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, g)
187 tnorm = cp_fm_frobenius_norm(g)
188 !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
189 IF (tnorm > 1.0e-10_dp) THEN
190 ! other orders
191 CALL cp_fm_to_fm(g, gp1)
192 DO i = 2, taylor_order
193 ! new power of G
194 CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
195 CALL cp_fm_to_fm(gp2, gp1)
196 ! add to the taylor expansion so far
197 expfactor = expfactor/real(i, kind=dp)
198 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, gp1)
199 tnorm = cp_fm_frobenius_norm(gp1)
200 !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
201 IF (tnorm*expfactor < 1.0e-10_dp) EXIT
202 END DO
203 END IF
204 !
205 ! incrementaly rotate the MOs
206 CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
207 CALL cp_fm_to_fm(ctmp, c)
208 !
209 ! printing
210 IF (output_unit > 0) THEN
211 WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
212 END IF
213 !
214 ! Are we done?
215 sweeps = istep
216 !IF(gnorm<=grad_thresh.AND.ABS((f2-f2old)/f2)<=f2_thresh.AND.istep>1) THEN
217 IF (abs((f2 - f2old)/f2) <= eps .AND. istep > 1) THEN
218 converged = .true.
219 EXIT
220 END IF
221 f2old = f2
222 END DO
223 !
224 ! here we should do one refine step to enforce C'*S*C=1 for any case
225 !
226 ! Print the final result
227 IF (output_unit > 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
228 !
229 ! sparsity
230 !DO i=1,size(thresh,1)
231 ! gnorm = 0.0_dp
232 ! DO o=1,ncol_local
233 ! DO p=1,nrow_local
234 ! IF(ABS(C%local_data(p,o))>thresh(i)) THEN
235 ! gnorm = gnorm + 1.0_dp
236 ! ENDIF
237 ! ENDDO
238 ! ENDDO
239 ! CALL C%matrix_struct%para_env%sum(gnorm)
240 ! IF(output_unit>0) THEN
241 ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
242 ! ENDIF
243 !ENDDO
244 !
245 ! deallocate
246 CALL cp_fm_struct_release(fm_struct_k_k)
247 CALL cp_fm_release(ctmp)
248 CALL cp_fm_release(u)
249 CALL cp_fm_release(g)
250 CALL cp_fm_release(gp1)
251 CALL cp_fm_release(gp2)
252
253 CALL timestop(handle)
254
255 END SUBROUTINE approx_l1_norm_sd
256! **************************************************************************************************
257!> \brief ...
258!> \param cell ...
259!> \param weights ...
260! **************************************************************************************************
261 SUBROUTINE initialize_weights(cell, weights)
262
263 TYPE(cell_type), POINTER :: cell
264 REAL(kind=dp), DIMENSION(:) :: weights
265
266 REAL(kind=dp), DIMENSION(3, 3) :: metric
267
268 cpassert(ASSOCIATED(cell))
269
270 metric = 0.0_dp
271 CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
272
273 weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
274 weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
275 weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
276 weights(4) = metric(1, 2)
277 weights(5) = metric(1, 3)
278 weights(6) = metric(2, 3)
279
280 END SUBROUTINE initialize_weights
281
282! **************************************************************************************************
283!> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
284!> can deal with serial para_envs.
285!> \param weights ...
286!> \param zij ...
287!> \param vectors ...
288!> \param para_env ...
289!> \param max_iter ...
290!> \param eps_localization ...
291!> \param sweeps ...
292!> \param out_each ...
293!> \param target_time ...
294!> \param start_time ...
295!> \param restricted ...
296!> \par History
297!> \author Joost VandeVondele (02.2010)
298! **************************************************************************************************
299 SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
300 eps_localization, sweeps, out_each, target_time, start_time, restricted)
301
302 REAL(kind=dp), INTENT(IN) :: weights(:)
303 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
304 TYPE(mp_para_env_type), POINTER :: para_env
305 INTEGER, INTENT(IN) :: max_iter
306 REAL(kind=dp), INTENT(IN) :: eps_localization
307 INTEGER :: sweeps
308 INTEGER, INTENT(IN) :: out_each
309 REAL(dp) :: target_time, start_time
310 INTEGER :: restricted
311
312 IF (para_env%num_pe == 1) THEN
313 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
314 sweeps, out_each, restricted=restricted)
315 ELSE
316 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
317 sweeps, out_each, target_time, start_time, restricted=restricted)
318 END IF
319
320 END SUBROUTINE jacobi_rotations
321
322! **************************************************************************************************
323!> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
324!> while the routine below works in parallel, it is too slow to be useful
325!> \param weights ...
326!> \param zij ...
327!> \param vectors ...
328!> \param max_iter ...
329!> \param eps_localization ...
330!> \param sweeps ...
331!> \param out_each ...
332!> \param restricted ...
333! **************************************************************************************************
334 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
335 out_each, restricted)
336 REAL(kind=dp), INTENT(IN) :: weights(:)
337 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
338 INTEGER, INTENT(IN) :: max_iter
339 REAL(kind=dp), INTENT(IN) :: eps_localization
340 INTEGER :: sweeps
341 INTEGER, INTENT(IN) :: out_each
342 INTEGER :: restricted
343
344 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial'
345
346 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
347 INTEGER :: dim2, handle, idim, istate, jstate, &
348 nstate, unit_nr
349 REAL(kind=dp) :: ct, st, t1, t2, theta, tolerance
350 TYPE(cp_cfm_type) :: c_rmat
351 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
352 TYPE(cp_fm_type) :: rmat
353
354 CALL timeset(routinen, handle)
355
356 dim2 = SIZE(zij, 2)
357 ALLOCATE (c_zij(dim2))
358 NULLIFY (mii, mij, mjj)
359 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
360
361 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
362 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
363
364 CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
365 CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
366 DO idim = 1, dim2
367 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
368 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
369 zij(2, idim)%local_data, dp)
370 END DO
371
372 CALL cp_fm_get_info(rmat, nrow_global=nstate)
373 tolerance = 1.0e10_dp
374
375 sweeps = 0
376 unit_nr = -1
377 IF (rmat%matrix_struct%para_env%is_source()) THEN
379 WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
380 END IF
381
382 IF (restricted > 0) THEN
384 WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
385 nstate = nstate - restricted
386 END IF
387
388 ! do jacobi sweeps until converged
389 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
390 sweeps = sweeps + 1
391 t1 = m_walltime()
392
393 DO istate = 1, nstate
394 DO jstate = istate + 1, nstate
395 DO idim = 1, dim2
396 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
397 CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
398 CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
399 END DO
400 CALL get_angle(mii, mjj, mij, weights, theta)
401 st = sin(theta)
402 ct = cos(theta)
403 CALL rotate_zij(istate, jstate, st, ct, c_zij)
404
405 CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
406 END DO
407 END DO
408
409 CALL check_tolerance(c_zij, weights, tolerance)
410
411 t2 = m_walltime()
412 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
413 WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
414 "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
415 CALL m_flush(unit_nr)
416 END IF
417
418 END DO
419
420 DO idim = 1, dim2
421 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
422 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
423 CALL cp_cfm_release(c_zij(idim))
424 END DO
425 DEALLOCATE (c_zij)
426 DEALLOCATE (mii, mij, mjj)
427 rmat%local_data = real(c_rmat%local_data, dp)
428 CALL cp_cfm_release(c_rmat)
429 CALL rotate_orbitals(rmat, vectors)
430 CALL cp_fm_release(rmat)
431
432 CALL timestop(handle)
433
434 END SUBROUTINE jacobi_rotations_serial
435! **************************************************************************************************
436!> \brief very similar to jacobi_rotations_serial with some extra output options
437!> \param weights ...
438!> \param c_zij ...
439!> \param max_iter ...
440!> \param c_rmat ...
441!> \param eps_localization ...
442!> \param tol_out ...
443!> \param jsweeps ...
444!> \param out_each ...
445!> \param c_zij_out ...
446!> \param grad_final ...
447! **************************************************************************************************
448 SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
449 tol_out, jsweeps, out_each, c_zij_out, grad_final)
450 REAL(kind=dp), INTENT(IN) :: weights(:)
451 TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
452 INTEGER, INTENT(IN) :: max_iter
453 TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
454 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
455 REAL(kind=dp), INTENT(OUT), OPTIONAL :: tol_out
456 INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
457 INTEGER, INTENT(IN), OPTIONAL :: out_each
458 TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
459 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
460
461 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial_1'
462
463 COMPLEX(KIND=dp) :: mzii
464 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
465 INTEGER :: dim2, handle, idim, istate, jstate, &
466 nstate, sweeps, unit_nr
467 REAL(kind=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
468 sum_spread_ii, t1, t2, theta, tolerance
469 TYPE(cp_cfm_type) :: c_rmat_local
470 TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
471
472 CALL timeset(routinen, handle)
473
474 dim2 = SIZE(c_zij)
475 NULLIFY (mii, mij, mjj)
476 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
477
478 ALLOCATE (c_zij_local(dim2))
479 CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
480 CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
481 DO idim = 1, dim2
482 CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
483 c_zij_local(idim)%local_data = c_zij(idim)%local_data
484 END DO
485
486 CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
487 tolerance = 1.0e10_dp
488
489 IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
490
491 sweeps = 0
492 IF (PRESENT(out_each)) THEN
493 unit_nr = -1
494 IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
496 END IF
497 alpha = 0.0_dp
498 DO idim = 1, dim2
499 alpha = alpha + weights(idim)
500 END DO
501 END IF
502
503 ! do jacobi sweeps until converged
504 DO WHILE (sweeps < max_iter)
505 sweeps = sweeps + 1
506 IF (PRESENT(eps_localization)) THEN
507 IF (tolerance < eps_localization) EXIT
508 END IF
509 IF (PRESENT(out_each)) t1 = m_walltime()
510
511 DO istate = 1, nstate
512 DO jstate = istate + 1, nstate
513 DO idim = 1, dim2
514 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
515 CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
516 CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
517 END DO
518 CALL get_angle(mii, mjj, mij, weights, theta)
519 st = sin(theta)
520 ct = cos(theta)
521 CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
522
523 CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
524 END DO
525 END DO
526
527 IF (PRESENT(grad_final)) THEN
528 CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
529 ELSE
530 CALL check_tolerance(c_zij_local, weights, tolerance)
531 END IF
532 IF (PRESENT(tol_out)) tol_out = tolerance
533
534 IF (PRESENT(out_each)) THEN
535 t2 = m_walltime()
536 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
537 sum_spread_ii = 0.0_dp
538 DO istate = 1, nstate
539 spread_ii = 0.0_dp
540 DO idim = 1, dim2
541 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
542 spread_ii = spread_ii + weights(idim)* &
543 abs(mzii)**2/twopi/twopi
544 END DO
545 sum_spread_ii = sum_spread_ii + spread_ii
546 END DO
547 sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
548 avg_spread_ii = sum_spread_ii/nstate
549 WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
550 "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
551 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
552 sweeps, avg_spread_ii, tolerance, t2 - t1
553 CALL m_flush(unit_nr)
554 END IF
555 IF (PRESENT(jsweeps)) jsweeps = sweeps
556 END IF
557
558 END DO
559
560 IF (PRESENT(c_zij_out)) THEN
561 DO idim = 1, dim2
562 CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
563 END DO
564 END IF
565 CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
566
567 DEALLOCATE (mii, mij, mjj)
568 DO idim = 1, dim2
569 CALL cp_cfm_release(c_zij_local(idim))
570 END DO
571 DEALLOCATE (c_zij_local)
572 CALL cp_cfm_release(c_rmat_local)
573
574 CALL timestop(handle)
575
576 END SUBROUTINE jacobi_rotations_serial_1
577! **************************************************************************************************
578!> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
579!> for partially occupied wannier functions
580!> \param para_env ...
581!> \param weights ...
582!> \param zij ...
583!> \param vectors ...
584!> \param max_iter ...
585!> \param eps_localization ...
586!> \param iter ...
587!> \param out_each ...
588!> \param nextra ...
589!> \param do_cg ...
590!> \param nmo ...
591!> \param vectors_2 ...
592!> \param mos_guess ...
593! **************************************************************************************************
594 SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
595 iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
596 TYPE(mp_para_env_type), POINTER :: para_env
597 REAL(kind=dp), INTENT(IN) :: weights(:)
598 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
599 INTEGER, INTENT(IN) :: max_iter
600 REAL(kind=dp), INTENT(IN) :: eps_localization
601 INTEGER :: iter
602 INTEGER, INTENT(IN) :: out_each, nextra
603 LOGICAL, INTENT(IN) :: do_cg
604 INTEGER, INTENT(IN), OPTIONAL :: nmo
605 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
606
607 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_cg_edf_ls'
608 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
609 czero = (0.0_dp, 0.0_dp)
610 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
611
612 COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
613 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
614 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
615 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
616 INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
617 lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
618 INTEGER, DIMENSION(1) :: iloc
619 LOGICAL :: do_cinit_mo, do_cinit_random, &
620 do_u_guess_mo, new_direction
621 REAL(kind=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
622 norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
623 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
624 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
625 REAL(kind=dp), DIMENSION(50) :: energy, pos
626 REAL(kind=dp), DIMENSION(:), POINTER :: tmp_arr
627 TYPE(cp_blacs_env_type), POINTER :: context
628 TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
629 grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
630 tmp_cfm_2, u, ul, v, vl, zdiag
631 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
632 TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
633 TYPE(cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
634 matrix_v_all, rmat, tmp_fm, vectors_all
635
636 CALL timeset(routinen, handle)
637
638 dim2 = SIZE(zij, 2)
639 NULLIFY (context)
640 NULLIFY (matrix_zii, arr_zii)
641 NULLIFY (tmp_fm_struct)
642 NULLIFY (tmp_arr)
643
644 ALLOCATE (c_zij(dim2))
645
646 CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
647
648 ALLOCATE (sum_spread(nstate))
649 ALLOCATE (matrix_zii(nstate, dim2))
650 matrix_zii = czero
651 sum_spread = 0.0_dp
652
653 alpha = 0.0_dp
654 DO idim = 1, dim2
655 alpha = alpha + weights(idim)
656 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
657 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
658 zij(2, idim)%local_data, dp)
659 END DO
660
661 ALLOCATE (zij_0(dim2))
662
663 CALL cp_cfm_create(u, zij(1, 1)%matrix_struct)
664 CALL cp_fm_create(matrix_u, zij(1, 1)%matrix_struct)
665
666 CALL cp_cfm_set_all(u, czero, cone)
667 CALL cp_fm_set_all(matrix_u, 0.0_dp, 1.0_dp)
668
669 CALL cp_fm_get_info(vectors, nrow_global=nao)
670 IF (nextra > 0) THEN
671 IF (PRESENT(mos_guess)) THEN
672 do_cinit_random = .false.
673 do_cinit_mo = .true.
674 CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
675 ELSE
676 do_cinit_random = .true.
677 do_cinit_mo = .false.
678 ndummy = nstate
679 END IF
680
681 IF (do_cinit_random) THEN
682 icinit = 1
683 do_u_guess_mo = .false.
684 ELSEIF (do_cinit_mo) THEN
685 icinit = 2
686 do_u_guess_mo = .true.
687 END IF
688
689 nocc = nstate - nextra
690 northo = nmo - nocc
691 norextra = nmo - nstate
692 CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
693
694 ALLOCATE (tmp_cmat(nstate, nstate))
695 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
696 para_env=para_env, context=context)
697 DO idim = 1, dim2
698 CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
699 CALL cp_cfm_set_all(zij_0(idim), czero, cone)
700 CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
701 CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
702 END DO
703 CALL cp_fm_struct_release(tmp_fm_struct)
704 DEALLOCATE (tmp_cmat)
705
706 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
707 para_env=para_env, context=context)
708 CALL cp_cfm_create(v, tmp_fm_struct)
709 CALL cp_fm_create(matrix_v, tmp_fm_struct)
710 CALL cp_cfm_create(zdiag, tmp_fm_struct)
711 CALL cp_fm_create(rmat, tmp_fm_struct)
712 CALL cp_fm_struct_release(tmp_fm_struct)
713 CALL cp_cfm_set_all(v, czero, cone)
714 CALL cp_fm_set_all(matrix_v, 0.0_dp, 1.0_dp)
715
716 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
717 para_env=para_env, context=context)
718 CALL cp_fm_create(matrix_v_all, tmp_fm_struct)
719 CALL cp_fm_struct_release(tmp_fm_struct)
720 CALL cp_fm_set_all(matrix_v_all, 0._dp, 1._dp)
721
722 ALLOCATE (arr_zii(nstate))
723
724 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
725 para_env=para_env, context=context)
726 CALL cp_cfm_create(c_tilde, tmp_fm_struct)
727 CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
728 CALL cp_cfm_create(gct_old, tmp_fm_struct)
729 CALL cp_cfm_create(skc, tmp_fm_struct)
730 CALL cp_fm_struct_release(tmp_fm_struct)
731 CALL cp_cfm_set_all(c_tilde, czero)
732 CALL cp_cfm_set_all(gct_old, czero)
733 CALL cp_cfm_set_all(skc, czero)
734
735 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
736 para_env=para_env, context=context)
737 CALL cp_cfm_create(vl, tmp_fm_struct)
738 CALL cp_cfm_set_all(vl, czero)
739 CALL cp_fm_struct_release(tmp_fm_struct)
740
741 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
742 para_env=para_env, context=context)
743 CALL cp_fm_create(id_nextra, tmp_fm_struct)
744 CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
745 CALL cp_fm_struct_release(tmp_fm_struct)
746 CALL cp_cfm_set_all(ctrans_lambda, czero)
747 CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
748
749 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
750 para_env=para_env, context=context)
751 CALL cp_cfm_create(ul, tmp_fm_struct)
752 CALL cp_fm_struct_release(tmp_fm_struct)
753 CALL cp_cfm_set_all(ul, czero)
754
755 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
756 para_env=para_env, context=context)
757 CALL cp_fm_create(vectors_all, tmp_fm_struct)
758 CALL cp_fm_struct_release(tmp_fm_struct)
759 ALLOCATE (tmp_mat(nao, nstate))
760 CALL cp_fm_get_submatrix(vectors, tmp_mat)
761 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
762 DEALLOCATE (tmp_mat)
763 ALLOCATE (tmp_mat(nao, norextra))
764 CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
765 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
766 DEALLOCATE (tmp_mat)
767
768 ! initialize c_tilde
769 SELECT CASE (icinit)
770 CASE (1) ! random coefficients
771 !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
772 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
773 CALL cp_fm_init_random(tmp_fm, nextra)
774 CALL ortho_vectors(tmp_fm)
775 c_tilde%local_data = tmp_fm%local_data
776 CALL cp_fm_release(tmp_fm)
777 ALLOCATE (tmp_cmat(northo, nextra))
778 CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
779 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
780 DEALLOCATE (tmp_cmat)
781 CASE (2) ! MO based coeffs
782 CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
783 ALLOCATE (tmp_arr(nmo))
784 ALLOCATE (tmp_mat(nmo, ndummy))
785 ALLOCATE (tmp_mat_1(nmo, nstate))
786 ! normalize matrix_V_all
787 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat)
788 DO istate = 1, ndummy
789 tmp_arr(:) = tmp_mat(:, istate)
790 norm = norm2(tmp_arr)
791 tmp_arr(:) = tmp_arr(:)/norm
792 tmp_mat(:, istate) = tmp_arr(:)
793 END DO
794 CALL cp_fm_set_submatrix(matrix_v_all, tmp_mat)
795 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat_1, 1, 1, nmo, nstate)
796 CALL cp_fm_set_submatrix(matrix_v, tmp_mat_1)
797 DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
798 CALL cp_fm_to_cfm(msourcer=matrix_v, mtarget=v)
799 ALLOCATE (tmp_mat(northo, ndummy))
800 ALLOCATE (tmp_mat_1(northo, nextra))
801 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat, nocc + 1, 1, northo, ndummy)
802 ALLOCATE (tmp_arr(ndummy))
803 tmp_arr = 0.0_dp
804 DO istate = 1, ndummy
805 tmp_arr(istate) = norm2(tmp_mat(:, istate))
806 END DO
807 ! find edfs
808 DO istate = 1, nextra
809 iloc = maxloc(tmp_arr)
810 tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
811 tmp_arr(iloc(1)) = 0.0_dp
812 END DO
813
814 DEALLOCATE (tmp_arr, tmp_mat)
815
816 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
817 para_env=para_env, context=context)
818 CALL cp_fm_create(tmp_fm, tmp_fm_struct)
819 CALL cp_fm_struct_release(tmp_fm_struct)
820 CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
821 DEALLOCATE (tmp_mat_1)
822 CALL ortho_vectors(tmp_fm)
823 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
824 CALL cp_fm_release(tmp_fm)
825 ! initialize U
826 IF (do_u_guess_mo) THEN
827 ALLOCATE (tmp_cmat(nocc, nstate))
828 CALL cp_cfm_get_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
829 CALL cp_cfm_set_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
830 DEALLOCATE (tmp_cmat)
831 ALLOCATE (tmp_cmat(northo, nstate))
832 CALL cp_cfm_get_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
833 CALL cp_cfm_set_submatrix(vl, tmp_cmat, 1, 1, northo, nstate)
834 DEALLOCATE (tmp_cmat)
835 CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
836 ALLOCATE (tmp_cmat(nextra, nstate))
837 CALL cp_cfm_get_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
838 CALL cp_cfm_set_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
839 DEALLOCATE (tmp_cmat)
840 CALL cp_fm_create(tmp_fm, u%matrix_struct)
841 tmp_fm%local_data = real(u%local_data, kind=dp)
842 CALL ortho_vectors(tmp_fm)
843 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=u)
844 CALL cp_fm_release(tmp_fm)
845 CALL cp_cfm_to_fm(u, matrix_u)
846 END IF
847 ! reevaluate V
848 ALLOCATE (tmp_cmat(nocc, nstate))
849 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
850 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
851 DEALLOCATE (tmp_cmat)
852 ALLOCATE (tmp_cmat(nextra, nstate))
853 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
854 CALL cp_cfm_set_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
855 DEALLOCATE (tmp_cmat)
856 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
857 ALLOCATE (tmp_cmat(northo, nstate))
858 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
859 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
860 DEALLOCATE (tmp_cmat)
861 END SELECT
862 ELSE
863 DO idim = 1, dim2
864 CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
865 CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
866 END DO
867 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
868 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
869 END IF
870
871 unit_nr = -1
872 IF (rmat%matrix_struct%para_env%is_source()) THEN
874 WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
875 END IF
876
877 norm2_old = 1.0e30_dp
878 ds_min = 1.0_dp
879 new_direction = .true.
880 iter = 0
881 line_searches = 0
882 line_search_count = 0
883 tol = 1.0e+20_dp
884 mintol = 1.0e+10_dp
885 miniter = 0
886
887 !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
888 ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
889
890 ! do conjugate gradient until converged
891 DO WHILE (iter < max_iter)
892 iter = iter + 1
893 !WRITE(*,*) 'iter = ', iter
894 t1 = m_walltime()
895
896 IF (iter > 1) THEN
897 ! comput U
898 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
899 CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
900 IF (para_env%num_pe == 1) THEN
901 CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
902 ELSE
903 CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
904 END IF
905 CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
906 CALL cp_cfm_to_cfm(tmp_cfm, u)
907 CALL cp_cfm_release(tmp_cfm)
908 CALL cp_cfm_release(tmp_cfm_2)
909 END IF
910
911 IF (nextra > 0) THEN
912 ALLOCATE (tmp_cmat(nextra, nstate))
913 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
914 CALL cp_cfm_set_submatrix(ul, tmp_cmat)
915 DEALLOCATE (tmp_cmat)
916 IF (iter > 1) THEN
917 ! orthonormalize c_tilde
918 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
919 tmp_fm%local_data = real(c_tilde%local_data, kind=dp)
920 CALL ortho_vectors(tmp_fm)
921 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
922 CALL cp_fm_release(tmp_fm)
923
924 ALLOCATE (tmp_cmat(nocc, nstate))
925 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
926 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
927 DEALLOCATE (tmp_cmat)
928 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
929 ALLOCATE (tmp_cmat(northo, nstate))
930 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
931 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
932 DEALLOCATE (tmp_cmat)
933 END IF
934
935 ! reset if new_direction
936 IF (new_direction .AND. mod(line_searches, 20) == 5) THEN
937 CALL cp_cfm_set_all(skc, czero)
938 CALL cp_cfm_set_all(gct_old, czero)
939 norm2_old = 1.0e30_dp
940 END IF
941
942 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
943 CALL cp_cfm_to_cfm(v, tmp_cfm)
944 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
945 ndummy = nmo
946 ELSE
947 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
948 CALL cp_cfm_to_cfm(u, tmp_cfm)
949 CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
950 ndummy = nstate
951 END IF
952 ! update z_ij
953 DO idim = 1, dim2
954 ! 'tmp_cfm_1 = zij_0*tmp_cfm'
955 CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
956 tmp_cfm, czero, tmp_cfm_1)
957 ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
958 CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
959 czero, c_zij(idim))
960 END DO
961 CALL cp_cfm_release(tmp_cfm)
962 CALL cp_cfm_release(tmp_cfm_1)
963 ! compute spread
964 DO istate = 1, nstate
965 spread_ii = 0.0_dp
966 DO idim = 1, dim2
967 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
968 spread_ii = spread_ii + weights(idim)* &
969 abs(mzii)**2/twopi/twopi
970 matrix_zii(istate, idim) = mzii
971 END DO
972 !WRITE(*,*) 'spread_ii', spread_ii
973 sum_spread(istate) = spread_ii
974 END DO
975 CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
976 spread_sum = accurate_sum(sum_spread)
977
978 IF (nextra > 0) THEN
979 ! update c_tilde
980 CALL cp_cfm_set_all(zdiag, czero)
981 CALL cp_cfm_set_all(grad_ctilde, czero)
982 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
983 CALL cp_cfm_set_all(tmp_cfm, czero)
984 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
985 CALL cp_cfm_set_all(tmp_cfm_1, czero)
986 ALLOCATE (tmp_cmat(northo, nstate))
987 DO idim = 1, dim2
988 weight = weights(idim)
989 arr_zii = matrix_zii(:, idim)
990 ! tmp_cfm = zij_0*V
991 CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
992 zij_0(idim), v, czero, tmp_cfm)
993 ! tmp_cfm = tmp_cfm*diag_zij_dagg
994 CALL cp_cfm_column_scale(tmp_cfm, conjg(arr_zii))
995 ! tmp_cfm_1 = tmp_cfm*U_dagg
996 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
997 u, czero, tmp_cfm_1)
998 CALL cp_cfm_scale(weight, tmp_cfm_1)
999 ! zdiag = zdiag + tmp_cfm_1'
1000 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1001
1002 ! tmp_cfm = zij_0_dagg*V
1003 CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1004 zij_0(idim), v, czero, tmp_cfm)
1005
1006 ! tmp_cfm = tmp_cfm*diag_zij
1007 CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1008 ! tmp_cfm_1 = tmp_cfm*U_dagg
1009 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1010 u, czero, tmp_cfm_1)
1011 CALL cp_cfm_scale(weight, tmp_cfm_1)
1012 ! zdiag = zdiag + tmp_cfm_1'
1013 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1014 END DO ! idim
1015 CALL cp_cfm_release(tmp_cfm)
1016 CALL cp_cfm_release(tmp_cfm_1)
1017 DEALLOCATE (tmp_cmat)
1018 ALLOCATE (tmp_cmat(northo, nextra))
1019 CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1020 northo, nextra, .false.)
1021 ! 'grad_ctilde'
1022 CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1023 DEALLOCATE (tmp_cmat)
1024 ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1025 CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1026 !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1027 ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1028 CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1029 END IF ! nextra > 0
1030
1031 ! tolerance
1032 IF (nextra > 0) THEN
1033 tolc = 0.0_dp
1034 CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1035 CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1036 CALL cp_fm_maxabsval(tmp_fm, tolc)
1037 CALL cp_fm_release(tmp_fm)
1038 !WRITE(*,*) 'tolc = ', tolc
1039 tol = tol + tolc
1040 END IF
1041 !WRITE(*,*) 'tol = ', tol
1042
1043 IF (nextra > 0) THEN
1044 !WRITE(*,*) 'new_direction: ', new_direction
1045 IF (new_direction) THEN
1046 line_searches = line_searches + 1
1047 IF (mintol > tol) THEN
1048 mintol = tol
1049 miniter = iter
1050 END IF
1051
1052 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1053 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1054 avg_spread_ii = sum_spread_ii/nstate
1055 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1056 "Iteration", "Avg. Spread_ii", "Tolerance"
1057 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1058 iter, avg_spread_ii, tol
1059 CALL m_flush(unit_nr)
1060 END IF
1061 IF (tol < eps_localization) EXIT
1062
1063 IF (do_cg) THEN
1064 cnorm2_gct = czero
1065 cnorm2_gct_cross = czero
1066 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1067 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1068 gct_old%local_data = grad_ctilde%local_data
1069 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct)
1070 norm2_gct = real(cnorm2_gct, kind=dp)
1071 ! compute beta_pr
1072 beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1073 norm2_old = norm2_gct
1074 beta = max(0.0_dp, beta_pr)
1075 !WRITE(*,*) 'beta = ', beta
1076 ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1077 CALL cp_cfm_scale(beta, skc)
1078 CALL cp_cfm_scale_and_add(cone, skc, cone, gct_old)
1079 CALL cp_cfm_trace(skc, gct_old, cnorm2_gct_cross)
1080 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1081 IF (norm2_gct_cross <= 0.0_dp) THEN ! back to steepest ascent
1082 CALL cp_cfm_scale_and_add(czero, skc, cone, gct_old)
1083 END IF
1084 ELSE
1085 CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1086 END IF
1087 line_search_count = 0
1088 END IF
1089
1090 line_search_count = line_search_count + 1
1091 !WRITE(*,*) 'line_search_count = ', line_search_count
1092 energy(line_search_count) = spread_sum
1093
1094 ! gold line search
1095 new_direction = .false.
1096 IF (line_search_count == 1) THEN
1097 lsl = 1
1098 lsr = 0
1099 lsm = 1
1100 pos(1) = 0.0_dp
1101 pos(2) = ds_min/gold_sec
1102 ds = pos(2)
1103 ELSE
1104 IF (line_search_count == 50) THEN
1105 IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp) THEN
1106 cpwarn("Line search failed to converge properly")
1107 ds_min = 0.1_dp
1108 new_direction = .true.
1109 ds = pos(line_search_count)
1110 line_search_count = 0
1111 ELSE
1112 cpabort("No. of line searches exceeds 50")
1113 END IF
1114 ELSE
1115 IF (lsr == 0) THEN
1116 IF (energy(line_search_count - 1) > energy(line_search_count)) THEN
1117 lsr = line_search_count
1118 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1119 ELSE
1120 lsl = lsm
1121 lsm = line_search_count
1122 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1123 END IF
1124 ELSE
1125 IF (pos(line_search_count) < pos(lsm)) THEN
1126 IF (energy(line_search_count) > energy(lsm)) THEN
1127 lsr = lsm
1128 lsm = line_search_count
1129 ELSE
1130 lsl = line_search_count
1131 END IF
1132 ELSE
1133 IF (energy(line_search_count) > energy(lsm)) THEN
1134 lsl = lsm
1135 lsm = line_search_count
1136 ELSE
1137 lsr = line_search_count
1138 END IF
1139 END IF
1140 IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
1141 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1142 ELSE
1143 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1144 END IF
1145 IF ((pos(lsr) - pos(lsl)) < 1.0e-3_dp*pos(lsr)) THEN
1146 new_direction = .true.
1147 END IF
1148 END IF ! lsr .eq. 0
1149 END IF ! line_search_count .eq. 50
1150 ! now go to the suggested point
1151 ds = pos(line_search_count + 1) - pos(line_search_count)
1152 !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1153 IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1)) THEN
1154 new_direction = .true.
1155 ds_min = 0.5_dp/alpha
1156 ELSEIF (abs(ds) > 10.0_dp) THEN
1157 new_direction = .true.
1158 ds_min = 0.5_dp/alpha
1159 ELSE
1160 ds_min = pos(line_search_count + 1)
1161 END IF
1162 END IF ! first step
1163 ! 'c_tilde = c_tilde + d*skc'
1164 CALL cp_cfm_scale(ds, skc)
1165 CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1166 ELSE
1167 IF (mintol > tol) THEN
1168 mintol = tol
1169 miniter = iter
1170 END IF
1171 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1172 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1173 avg_spread_ii = sum_spread_ii/nstate
1174 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1175 "Iteration", "Avg. Spread_ii", "Tolerance"
1176 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1177 iter, avg_spread_ii, tol
1178 CALL m_flush(unit_nr)
1179 END IF
1180 IF (tol < eps_localization) EXIT
1181 END IF ! nextra > 0
1182
1183 END DO ! iteration
1184
1185 IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1186 WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1187 WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1188 CALL m_flush(unit_nr)
1189 END IF
1190
1191 CALL cp_cfm_to_fm(u, matrix_u)
1192
1193 IF (nextra > 0) THEN
1194 rmat%local_data = real(v%local_data, kind=dp)
1195 CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1196
1197 CALL cp_cfm_release(c_tilde)
1198 CALL cp_cfm_release(grad_ctilde)
1199 CALL cp_cfm_release(gct_old)
1200 CALL cp_cfm_release(skc)
1201 CALL cp_cfm_release(ul)
1202 CALL cp_cfm_release(zdiag)
1203 CALL cp_cfm_release(ctrans_lambda)
1204 CALL cp_fm_release(id_nextra)
1205 CALL cp_fm_release(vectors_all)
1206 CALL cp_cfm_release(v)
1207 CALL cp_fm_release(matrix_v)
1208 CALL cp_fm_release(matrix_v_all)
1209 CALL cp_cfm_release(vl)
1210 DEALLOCATE (arr_zii)
1211 ELSE
1212 rmat%local_data = matrix_u%local_data
1213 CALL rotate_orbitals(rmat, vectors)
1214 END IF
1215 DO idim = 1, dim2
1216 CALL cp_cfm_release(zij_0(idim))
1217 END DO
1218 DEALLOCATE (zij_0)
1219
1220 DO idim = 1, dim2
1221 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
1222 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1223 CALL cp_cfm_release(c_zij(idim))
1224 END DO
1225 DEALLOCATE (c_zij)
1226 CALL cp_fm_release(rmat)
1227 CALL cp_cfm_release(u)
1228 CALL cp_fm_release(matrix_u)
1229 DEALLOCATE (matrix_zii, sum_spread)
1230
1231 CALL timestop(handle)
1232
1233 END SUBROUTINE jacobi_cg_edf_ls
1234
1235! **************************************************************************************************
1236!> \brief ...
1237!> \param vmatrix ...
1238! **************************************************************************************************
1239 SUBROUTINE ortho_vectors(vmatrix)
1240
1241 TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1242
1243 CHARACTER(LEN=*), PARAMETER :: routinen = 'ortho_vectors'
1244
1245 INTEGER :: handle, n, ncol
1246 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1247 TYPE(cp_fm_type) :: overlap_vv
1248
1249 CALL timeset(routinen, handle)
1250
1251 NULLIFY (fm_struct_tmp)
1252
1253 CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1254
1255 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1256 para_env=vmatrix%matrix_struct%para_env, &
1257 context=vmatrix%matrix_struct%context)
1258 CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1259 CALL cp_fm_struct_release(fm_struct_tmp)
1260
1261 CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1262 CALL cp_fm_cholesky_decompose(overlap_vv)
1263 CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.true.)
1264
1265 CALL cp_fm_release(overlap_vv)
1266
1267 CALL timestop(handle)
1268
1269 END SUBROUTINE ortho_vectors
1270
1271! **************************************************************************************************
1272!> \brief ...
1273!> \param istate ...
1274!> \param jstate ...
1275!> \param st ...
1276!> \param ct ...
1277!> \param zij ...
1278! **************************************************************************************************
1279 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1280 INTEGER, INTENT(IN) :: istate, jstate
1281 REAL(kind=dp), INTENT(IN) :: st, ct
1282 TYPE(cp_cfm_type) :: zij(:)
1283
1284 INTEGER :: id
1285
1286! Locals
1287
1288 DO id = 1, SIZE(zij, 1)
1289 CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1290 CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1291 END DO
1292
1293 END SUBROUTINE rotate_zij
1294! **************************************************************************************************
1295!> \brief ...
1296!> \param istate ...
1297!> \param jstate ...
1298!> \param st ...
1299!> \param ct ...
1300!> \param rmat ...
1301! **************************************************************************************************
1302 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1303 INTEGER, INTENT(IN) :: istate, jstate
1304 REAL(kind=dp), INTENT(IN) :: st, ct
1305 TYPE(cp_cfm_type), INTENT(IN) :: rmat
1306
1307 CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1308
1309 END SUBROUTINE rotate_rmat
1310! **************************************************************************************************
1311!> \brief ...
1312!> \param mii ...
1313!> \param mjj ...
1314!> \param mij ...
1315!> \param weights ...
1316!> \param theta ...
1317!> \param grad_ij ...
1318!> \param step ...
1319! **************************************************************************************************
1320 SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1321 COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1322 REAL(kind=dp), INTENT(IN) :: weights(:)
1323 REAL(kind=dp), INTENT(OUT) :: theta
1324 REAL(kind=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1325
1326 COMPLEX(KIND=dp) :: z11, z12, z22
1327 INTEGER :: dim_m, idim
1328 REAL(kind=dp) :: a12, b12, d2, ratio
1329
1330 a12 = 0.0_dp
1331 b12 = 0.0_dp
1332 dim_m = SIZE(mii)
1333 DO idim = 1, dim_m
1334 z11 = mii(idim)
1335 z22 = mjj(idim)
1336 z12 = mij(idim)
1337 a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=dp)
1338 b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1339 0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=dp)
1340 END DO
1341 IF (abs(b12) > 1.e-10_dp) THEN
1342 ratio = -a12/b12
1343 theta = 0.25_dp*atan(ratio)
1344 ELSEIF (abs(b12) < 1.e-10_dp) THEN
1345 b12 = 0.0_dp
1346 theta = 0.0_dp
1347 ELSE
1348 theta = 0.25_dp*pi
1349 END IF
1350 IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1351! Check second derivative info
1352 d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1353 IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1354 IF (theta > 0.0_dp) THEN ! make theta as small as possible
1355 theta = theta - 0.25_dp*pi
1356 ELSE
1357 theta = theta + 0.25_dp*pi
1358 END IF
1359 END IF
1360 END SUBROUTINE get_angle
1361! **************************************************************************************************
1362!> \brief ...
1363!> \param zij ...
1364!> \param weights ...
1365!> \param tolerance ...
1366!> \param grad ...
1367! **************************************************************************************************
1368 SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1369 TYPE(cp_cfm_type) :: zij(:)
1370 REAL(kind=dp), INTENT(IN) :: weights(:)
1371 REAL(kind=dp), INTENT(OUT) :: tolerance
1372 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1373
1374 CHARACTER(len=*), PARAMETER :: routinen = 'check_tolerance'
1375
1376 INTEGER :: handle
1377 TYPE(cp_fm_type) :: force
1378
1379 CALL timeset(routinen, handle)
1380
1381! compute gradient at t=0
1382
1383 CALL cp_fm_create(force, zij(1)%matrix_struct)
1384 CALL cp_fm_set_all(force, 0._dp)
1385 CALL grad_at_0(zij, weights, force)
1386 CALL cp_fm_maxabsval(force, tolerance)
1387 IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1388 CALL cp_fm_release(force)
1389
1390 CALL timestop(handle)
1391
1392 END SUBROUTINE check_tolerance
1393! **************************************************************************************************
1394!> \brief ...
1395!> \param rmat ...
1396!> \param vectors ...
1397! **************************************************************************************************
1398 SUBROUTINE rotate_orbitals(rmat, vectors)
1399 TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1400
1401 INTEGER :: k, n
1402 TYPE(cp_fm_type) :: wf
1403
1404 CALL cp_fm_create(wf, vectors%matrix_struct)
1405 CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1406 CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1407 CALL cp_fm_to_fm(wf, vectors)
1408 CALL cp_fm_release(wf)
1409 END SUBROUTINE rotate_orbitals
1410! **************************************************************************************************
1411!> \brief ...
1412!> \param rmat ...
1413!> \param vec_all ...
1414!> \param vectors ...
1415! **************************************************************************************************
1416 SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1417 TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1418
1419 INTEGER :: k, l, n
1420 TYPE(cp_fm_type) :: wf
1421
1422 CALL cp_fm_create(wf, vectors%matrix_struct)
1423 CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1424 CALL cp_fm_get_info(rmat, ncol_global=l)
1425
1426 CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1427 CALL cp_fm_to_fm(wf, vectors)
1428 CALL cp_fm_release(wf)
1429 END SUBROUTINE rotate_orbitals_edf
1430! **************************************************************************************************
1431!> \brief ...
1432!> \param diag ...
1433!> \param weights ...
1434!> \param matrix ...
1435!> \param ndim ...
1436! **************************************************************************************************
1437 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1438 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1439 REAL(kind=dp), INTENT(IN) :: weights(:)
1440 TYPE(cp_fm_type), INTENT(IN) :: matrix
1441 INTEGER, INTENT(IN) :: ndim
1442
1443 COMPLEX(KIND=dp) :: zii, zjj
1444 INTEGER :: idim, istate, jstate, ncol_local, &
1445 nrow_global, nrow_local
1446 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1447 REAL(kind=dp) :: gradsq_ij
1448
1449 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1450 ncol_local=ncol_local, nrow_global=nrow_global, &
1451 row_indices=row_indices, col_indices=col_indices)
1452
1453 DO istate = 1, nrow_local
1454 DO jstate = 1, ncol_local
1455! get real and imaginary parts
1456 gradsq_ij = 0.0_dp
1457 DO idim = 1, ndim
1458 zii = diag(row_indices(istate), idim)
1459 zjj = diag(col_indices(jstate), idim)
1460 gradsq_ij = gradsq_ij + weights(idim)* &
1461 4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=dp)
1462 END DO
1463 matrix%local_data(istate, jstate) = gradsq_ij
1464 END DO
1465 END DO
1466 END SUBROUTINE gradsq_at_0
1467! **************************************************************************************************
1468!> \brief ...
1469!> \param matrix_p ...
1470!> \param weights ...
1471!> \param matrix ...
1472! **************************************************************************************************
1473 SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1474 TYPE(cp_cfm_type) :: matrix_p(:)
1475 REAL(kind=dp), INTENT(IN) :: weights(:)
1476 TYPE(cp_fm_type), INTENT(IN) :: matrix
1477
1478 COMPLEX(KIND=dp) :: zii, zij, zjj
1479 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1480 INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1481 nrow_global, nrow_local
1482 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1483 REAL(kind=dp) :: grad_ij
1484
1485 NULLIFY (diag)
1486 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1487 ncol_local=ncol_local, nrow_global=nrow_global, &
1488 row_indices=row_indices, col_indices=col_indices)
1489 dim_m = SIZE(matrix_p, 1)
1490 ALLOCATE (diag(nrow_global, dim_m))
1491
1492 DO idim = 1, dim_m
1493 DO istate = 1, nrow_global
1494 CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1495 END DO
1496 END DO
1497
1498 DO istate = 1, nrow_local
1499 DO jstate = 1, ncol_local
1500! get real and imaginary parts
1501 grad_ij = 0.0_dp
1502 DO idim = 1, dim_m
1503 zii = diag(row_indices(istate), idim)
1504 zjj = diag(col_indices(jstate), idim)
1505 zij = matrix_p(idim)%local_data(istate, jstate)
1506 grad_ij = grad_ij + weights(idim)* &
1507 REAL(4.0_dp*conjg(zij)*(zjj - zii), dp)
1508 END DO
1509 matrix%local_data(istate, jstate) = grad_ij
1510 END DO
1511 END DO
1512 DEALLOCATE (diag)
1513 END SUBROUTINE grad_at_0
1514
1515! return energy and maximum gradient in the current point
1516! **************************************************************************************************
1517!> \brief ...
1518!> \param weights ...
1519!> \param zij ...
1520!> \param tolerance ...
1521!> \param value ...
1522! **************************************************************************************************
1523 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1524 REAL(kind=dp), INTENT(IN) :: weights(:)
1525 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1526 REAL(kind=dp) :: tolerance, value
1527
1528 COMPLEX(KIND=dp) :: kii, kij, kjj
1529 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1530 INTEGER :: idim, istate, jstate, ncol_local, &
1531 nrow_global, nrow_local
1532 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1533 REAL(kind=dp) :: grad_ij, ra, rb
1534
1535 NULLIFY (diag)
1536 CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1537 ncol_local=ncol_local, nrow_global=nrow_global, &
1538 row_indices=row_indices, col_indices=col_indices)
1539 ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1540 value = 0.0_dp
1541 DO idim = 1, SIZE(zij, 2)
1542 DO istate = 1, nrow_global
1543 CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1544 CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1545 diag(istate, idim) = cmplx(ra, rb, dp)
1546 value = value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1547 END DO
1548 END DO
1549 tolerance = 0.0_dp
1550 DO istate = 1, nrow_local
1551 DO jstate = 1, ncol_local
1552 grad_ij = 0.0_dp
1553 DO idim = 1, SIZE(zij, 2)
1554 kii = diag(row_indices(istate), idim)
1555 kjj = diag(col_indices(jstate), idim)
1556 ra = zij(1, idim)%local_data(istate, jstate)
1557 rb = zij(2, idim)%local_data(istate, jstate)
1558 kij = cmplx(ra, rb, dp)
1559 grad_ij = grad_ij + weights(idim)* &
1560 REAL(4.0_dp*conjg(kij)*(kjj - kii), dp)
1561 END DO
1562 tolerance = max(abs(grad_ij), tolerance)
1563 END DO
1564 END DO
1565 CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1566
1567 DEALLOCATE (diag)
1568
1569 END SUBROUTINE check_tolerance_new
1570
1571! **************************************************************************************************
1572!> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1573!> and rotates them all at the same time (hoping for the best of course)
1574!> \param weights ...
1575!> \param zij ...
1576!> \param vectors ...
1577!> \param max_iter ...
1578!> \param max_crazy_angle ...
1579!> \param crazy_scale ...
1580!> \param crazy_use_diag ...
1581!> \param eps_localization ...
1582!> \param iterations ...
1583!> \param converged ...
1584! **************************************************************************************************
1585 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1586 eps_localization, iterations, converged)
1587 REAL(kind=dp), INTENT(IN) :: weights(:)
1588 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1589 INTEGER, INTENT(IN) :: max_iter
1590 REAL(kind=dp), INTENT(IN) :: max_crazy_angle
1591 REAL(kind=dp) :: crazy_scale
1592 LOGICAL :: crazy_use_diag
1593 REAL(kind=dp), INTENT(IN) :: eps_localization
1594 INTEGER :: iterations
1595 LOGICAL, INTENT(out), OPTIONAL :: converged
1596
1597 CHARACTER(len=*), PARAMETER :: routinen = 'crazy_rotations'
1598 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1599 czero = (0.0_dp, 0.0_dp)
1600
1601 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1602 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1603 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1604 INTEGER :: dim2, handle, i, icol, idim, irow, &
1605 method, ncol_global, ncol_local, &
1606 norder, nrow_global, nrow_local, &
1607 nsquare, unit_nr
1608 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1609 LOGICAL :: do_emd
1610 REAL(kind=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1611 norm, ra, rb, theta, tolerance, value
1612 REAL(kind=dp), DIMENSION(:), POINTER :: evals
1613 TYPE(cp_cfm_type) :: cmat_a, cmat_r, cmat_t1
1614 TYPE(cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1615
1616 CALL timeset(routinen, handle)
1617 NULLIFY (row_indices, col_indices)
1618 CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1619 ncol_global=ncol_global, &
1620 row_indices=row_indices, col_indices=col_indices, &
1621 nrow_local=nrow_local, ncol_local=ncol_local)
1622
1623 limit_crazy_angle = max_crazy_angle
1624
1625 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1626 dim2 = SIZE(zij, 2)
1627 ALLOCATE (diag_z(nrow_global, dim2))
1628 ALLOCATE (evals(nrow_global))
1629 ALLOCATE (evals_exp(nrow_global))
1630
1631 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1632 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1633 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1634
1635 CALL cp_fm_create(mat_u, zij(1, 1)%matrix_struct)
1636 CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1637 CALL cp_fm_create(mat_r, zij(1, 1)%matrix_struct)
1638
1639 CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1640
1641 CALL cp_fm_set_all(mat_r, 0.0_dp, 1.0_dp)
1642 CALL cp_fm_set_all(mat_t, 0.0_dp)
1643 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1644 DO idim = 1, dim2
1645 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1646 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1647 END DO
1648 CALL cp_fm_syevd(mat_t, mat_u, evals)
1649 DO idim = 1, dim2
1650 ! rotate z's
1651 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1652 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1653 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1654 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1655 END DO
1656 ! collect rotation matrix
1657 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1658 CALL cp_fm_to_fm(mat_t, mat_r)
1659
1660 unit_nr = -1
1661 IF (cmat_a%matrix_struct%para_env%is_source()) THEN
1663 WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1664 "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1665 END IF
1666
1667 iterations = 0
1668 tolerance = 1.0_dp
1669
1670 DO
1671 iterations = iterations + 1
1672 DO idim = 1, dim2
1673 DO i = 1, nrow_global
1674 CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1675 CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1676 diag_z(i, idim) = cmplx(ra, rb, dp)
1677 END DO
1678 END DO
1679 DO irow = 1, nrow_local
1680 DO icol = 1, ncol_local
1681 DO idim = 1, dim2
1682 ra = zij(1, idim)%local_data(irow, icol)
1683 rb = zij(2, idim)%local_data(irow, icol)
1684 mij(idim) = cmplx(ra, rb, dp)
1685 mii(idim) = diag_z(row_indices(irow), idim)
1686 mjj(idim) = diag_z(col_indices(icol), idim)
1687 END DO
1688 IF (row_indices(irow) /= col_indices(icol)) THEN
1689 CALL get_angle(mii, mjj, mij, weights, theta)
1690 theta = crazy_scale*theta
1691 IF (theta > limit_crazy_angle) theta = limit_crazy_angle
1692 IF (theta < -limit_crazy_angle) theta = -limit_crazy_angle
1693 IF (crazy_use_diag) THEN
1694 cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta, dp)
1695 ELSE
1696 mat_theta%local_data(irow, icol) = -theta
1697 END IF
1698 ELSE
1699 IF (crazy_use_diag) THEN
1700 cmat_a%local_data(irow, icol) = czero
1701 ELSE
1702 mat_theta%local_data(irow, icol) = 0.0_dp
1703 END IF
1704 END IF
1705 END DO
1706 END DO
1707
1708 ! construct rotation matrix U based on A using diagonalization
1709 ! alternatively, exp based on repeated squaring could be faster
1710 IF (crazy_use_diag) THEN
1711 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1712 maxeval = maxval(abs(evals))
1713 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1714 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1715 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1716 CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1717 cmat_t1, cmat_r, czero, cmat_a)
1718 mat_u%local_data = real(cmat_a%local_data, kind=dp) ! U is a real matrix
1719 ELSE
1720 do_emd = .false.
1721 method = 2
1722 eps_exp = 1.0_dp*epsilon(eps_exp)
1723 CALL cp_fm_maxabsrownorm(mat_theta, norm)
1724 maxeval = norm ! an upper bound
1725 CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1726 CALL exp_pade_real(mat_u, mat_theta, nsquare, norder)
1727 END IF
1728
1729 DO idim = 1, dim2
1730 ! rotate z's
1731 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1732 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1733 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1734 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1735 END DO
1736 ! collect rotation matrix
1737 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1738 CALL cp_fm_to_fm(mat_t, mat_r)
1739
1740 CALL check_tolerance_new(weights, zij, tolerance, value)
1741
1742 IF (unit_nr > 0) THEN
1743 WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1744 "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1745 CALL m_flush(unit_nr)
1746 END IF
1747 IF (tolerance < eps_localization .OR. iterations >= max_iter) EXIT
1748 END DO
1749
1750 IF (PRESENT(converged)) converged = (tolerance < eps_localization)
1751
1752 CALL cp_cfm_release(cmat_a)
1753 CALL cp_cfm_release(cmat_r)
1754 CALL cp_cfm_release(cmat_t1)
1755
1756 CALL cp_fm_release(mat_u)
1757 CALL cp_fm_release(mat_t)
1758 CALL cp_fm_release(mat_theta)
1759
1760 CALL rotate_orbitals(mat_r, vectors)
1761
1762 CALL cp_fm_release(mat_r)
1763 DEALLOCATE (evals_exp, evals, diag_z)
1764 DEALLOCATE (mii, mij, mjj)
1765
1766 CALL timestop(handle)
1767
1768 END SUBROUTINE crazy_rotations
1769
1770! **************************************************************************************************
1771!> \brief use the exponential parametrization as described in to perform a direct mini
1772!> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1773!> none of the input is modified for the time being, just finds the rotations
1774!> that minimizes, and throws it away afterwards :-)
1775!> apart from being expensive and not cleaned, this works fine
1776!> useful to try different spread functionals
1777!> \param weights ...
1778!> \param zij ...
1779!> \param vectors ...
1780!> \param max_iter ...
1781!> \param eps_localization ...
1782!> \param iterations ...
1783! **************************************************************************************************
1784 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1785 REAL(kind=dp), INTENT(IN) :: weights(:)
1786 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1787 INTEGER, INTENT(IN) :: max_iter
1788 REAL(kind=dp), INTENT(IN) :: eps_localization
1789 INTEGER :: iterations
1790
1791 CHARACTER(len=*), PARAMETER :: routinen = 'direct_mini'
1792 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1793 czero = (0.0_dp, 0.0_dp)
1794 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
1795
1796 COMPLEX(KIND=dp) :: lk, ll, tmp
1797 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1798 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1799 INTEGER :: handle, i, icol, idim, irow, &
1800 line_search_count, line_searches, lsl, &
1801 lsm, lsr, n, ncol_local, ndim, &
1802 nrow_local, output_unit
1803 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1804 LOGICAL :: new_direction
1805 REAL(kind=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1806 fb, fc, nom, normg, normg_cross, &
1807 normg_old, npos, omega, tol, val, x0, &
1808 x1, xa, xb, xc
1809 REAL(kind=dp), DIMENSION(150) :: energy, grad, pos
1810 REAL(kind=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1811 TYPE(cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1812 cmat_t2, cmat_u
1813 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1814 TYPE(cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1815 matrix_g_search, matrix_h, matrix_r, &
1816 matrix_t
1817
1818 NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1819
1820 CALL timeset(routinen, handle)
1821 output_unit = cp_logger_get_default_io_unit()
1822
1823 n = zij(1, 1)%matrix_struct%nrow_global
1824 ndim = (SIZE(zij, 2))
1825
1826 IF (output_unit > 0) THEN
1827 WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1828 WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1829 END IF
1830
1831 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1832 ALLOCATE (c_zij(ndim))
1833
1834 ! create the three complex matrices Z
1835 DO idim = 1, ndim
1836 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1837 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1838 zij(2, idim)%local_data, dp)
1839 END DO
1840
1841 CALL cp_fm_create(matrix_a, zij(1, 1)%matrix_struct)
1842 CALL cp_fm_create(matrix_g, zij(1, 1)%matrix_struct)
1843 CALL cp_fm_create(matrix_t, zij(1, 1)%matrix_struct)
1844 CALL cp_fm_create(matrix_h, zij(1, 1)%matrix_struct)
1845 CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1846 CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1847 CALL cp_fm_create(matrix_r, zij(1, 1)%matrix_struct)
1848 CALL cp_fm_set_all(matrix_r, 0.0_dp, 1.0_dp)
1849
1850 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1851! CALL cp_fm_init_random ( matrix_A )
1852
1853 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1854 CALL cp_cfm_create(cmat_u, zij(1, 1)%matrix_struct)
1855 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1856 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1857 CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1858 CALL cp_cfm_create(cmat_b, zij(1, 1)%matrix_struct)
1859 CALL cp_cfm_create(cmat_m, zij(1, 1)%matrix_struct)
1860
1861 CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1862 row_indices=row_indices, col_indices=col_indices)
1863
1864 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1865 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1866 normg_old = 1.0e30_dp
1867 ds_min = 1.0_dp
1868 new_direction = .true.
1869 iterations = 0
1870 line_searches = 0
1871 line_search_count = 0
1872 DO
1873 iterations = iterations + 1
1874 ! compute U,R,evals given A
1875 cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data, dp) ! cmat_A is hermitian, evals are reals
1876 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1877 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1878 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1879 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1880 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1881 cmat_u%local_data = real(cmat_u%local_data, kind=dp) ! enforce numerics, U is a real matrix
1882
1883 IF (new_direction .AND. mod(line_searches, 20) == 5) THEN ! reset with A .eq. 0
1884 DO idim = 1, ndim
1885 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1886 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1887 END DO
1888 ! collect rotation matrix
1889 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
1890 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1891 CALL cp_fm_to_fm(matrix_t, matrix_r)
1892
1893 CALL cp_cfm_set_all(cmat_u, czero, cone)
1894 CALL cp_cfm_set_all(cmat_r, czero, cone)
1895 CALL cp_cfm_set_all(cmat_a, czero)
1896 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1897 evals(:) = 0.0_dp
1898 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1899 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1900 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1901 normg_old = 1.0e30_dp
1902 END IF
1903
1904 ! compute Omega and M
1905 CALL cp_cfm_set_all(cmat_m, czero)
1906 omega = 0.0_dp
1907 DO idim = 1, ndim
1908 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1) ! t1=ZU
1909 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1910 DO i = 1, n
1911 CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1912 SELECT CASE (2) ! allows for selection of different spread functionals
1913 CASE (1)
1914 fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1915 fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1916 CASE (2) ! corresponds to the jacobi setup
1917 fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1918 fvald(i) = -weights(idim)
1919 END SELECT
1920 omega = omega + fval(i)
1921 END DO
1922 DO icol = 1, ncol_local
1923 DO irow = 1, nrow_local
1924 tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1925 cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1926 + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=dp)
1927 END DO
1928 END DO
1929 END DO
1930
1931 ! compute Hessian diagonal approximation for the preconditioner
1932 IF (.true.) THEN
1933 CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1934 ELSE
1935 CALL cp_fm_set_all(matrix_h, 1.0_dp)
1936 END IF
1937
1938 ! compute B
1939 DO icol = 1, ncol_local
1940 DO irow = 1, nrow_local
1941 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1942 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1943 IF (abs(ll - lk) < 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1944 tmp = 1.0_dp
1945 cmat_b%local_data(irow, icol) = 0.0_dp
1946 DO i = 1, 16
1947 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1948 tmp = tmp*(ll - lk)/(i + 1)
1949 END DO
1950 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1951 ELSE
1952 cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1953 END IF
1954 END DO
1955 END DO
1956 ! compute gradient matrix_G
1957
1958 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1) ! t1=(M^T)(R^T)
1959 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1960 CALL cp_cfm_schur_product(cmat_t2, cmat_b, cmat_t1)
1961 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1962 CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1963 matrix_g%local_data = real(cmat_t1%local_data, kind=dp)
1964 CALL cp_fm_transpose(matrix_g, matrix_t)
1965 CALL cp_fm_scale_and_add(-1.0_dp, matrix_g, 1.0_dp, matrix_t)
1966 CALL cp_fm_maxabsval(matrix_g, tol)
1967
1968 ! from here on, minimizing technology
1969 IF (new_direction) THEN
1970 ! energy converged up to machine precision ?
1971 line_searches = line_searches + 1
1972 ! DO i=1,line_search_count
1973 ! write(15,*) pos(i),energy(i)
1974 ! ENDDO
1975 ! write(15,*) ""
1976 ! CALL m_flush(15)
1977 !write(16,*) evals(:)
1978 !write(17,*) matrix_A%local_data(:,:)
1979 !write(18,*) matrix_G%local_data(:,:)
1980 IF (output_unit > 0) THEN
1981 WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
1982 CALL m_flush(output_unit)
1983 END IF
1984 IF (tol < eps_localization .OR. iterations > max_iter) EXIT
1985
1986 IF (.true.) THEN ! do conjugate gradient CG
1987 CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
1988 normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
1989 ! apply the preconditioner
1990 DO icol = 1, ncol_local
1991 DO irow = 1, nrow_local
1992 matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
1993 END DO
1994 END DO
1995 CALL cp_fm_trace(matrix_g, matrix_g_old, normg)
1996 normg = normg*0.5_dp
1997 beta_pr = (normg - normg_cross)/normg_old
1998 normg_old = normg
1999 beta_pr = max(beta_pr, 0.0_dp)
2000 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2001 CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2002 IF (normg_cross >= 0) THEN ! back to SD
2003 IF (matrix_a%matrix_struct%para_env%is_source()) THEN
2004 WRITE (cp_logger_get_default_unit_nr(), *) "!"
2005 END IF
2006 beta_pr = 0.0_dp
2007 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2008 END IF
2009 ELSE ! SD
2010 CALL cp_fm_scale_and_add(0.0_dp, matrix_g_search, -1.0_dp, matrix_g)
2011 END IF
2012 ! ds_min=1.0E-4_dp
2013 line_search_count = 0
2014 END IF
2015 line_search_count = line_search_count + 1
2016 energy(line_search_count) = omega
2017
2018 ! line search section
2019 SELECT CASE (3)
2020 CASE (1) ! two point line search
2021 SELECT CASE (line_search_count)
2022 CASE (1)
2023 pos(1) = 0.0_dp
2024 pos(2) = ds_min
2025 CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2026 grad(1) = grad(1)/2.0_dp
2027 new_direction = .false.
2028 CASE (2)
2029 new_direction = .true.
2030 x0 = pos(1) ! 0.0_dp
2031 c = energy(1)
2032 b = grad(1)
2033 x1 = pos(2)
2034 a = (energy(2) - b*x1 - c)/(x1**2)
2035 IF (a <= 0.0_dp) a = 1.0e-15_dp
2036 npos = -b/(2.0_dp*a)
2037 val = a*npos**2 + b*npos + c
2038 IF (val < energy(1) .AND. val <= energy(2)) THEN
2039 ! we go to a minimum, but ...
2040 ! we take a guard against too large steps
2041 pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2042 ELSE ! just take an extended step
2043 pos(3) = maxval(pos(1:2))*2.0_dp
2044 END IF
2045 END SELECT
2046 CASE (2) ! 3 point line search
2047 SELECT CASE (line_search_count)
2048 CASE (1)
2049 new_direction = .false.
2050 pos(1) = 0.0_dp
2051 pos(2) = ds_min*0.8_dp
2052 CASE (2)
2053 new_direction = .false.
2054 IF (energy(2) > energy(1)) THEN
2055 pos(3) = ds_min*0.7_dp
2056 ELSE
2057 pos(3) = ds_min*1.4_dp
2058 END IF
2059 CASE (3)
2060 new_direction = .true.
2061 xa = pos(1)
2062 xb = pos(2)
2063 xc = pos(3)
2064 fa = energy(1)
2065 fb = energy(2)
2066 fc = energy(3)
2067 nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2068 denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2069 IF (abs(denom) <= 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa))) THEN
2070 npos = xb
2071 ELSE
2072 npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2073 END IF
2074 val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2075 (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2076 (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2077 IF (val < fa .AND. val <= fb .AND. val <= fc) THEN ! OK, we go to a minimum
2078 ! we take a guard against too large steps
2079 pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2080 min(npos, maxval(pos(1:3))*4.0_dp))
2081 ELSE ! just take an extended step
2082 pos(4) = maxval(pos(1:3))*2.0_dp
2083 END IF
2084 END SELECT
2085 CASE (3) ! golden section hunt
2086 new_direction = .false.
2087 IF (line_search_count == 1) THEN
2088 lsl = 1
2089 lsr = 0
2090 lsm = 1
2091 pos(1) = 0.0_dp
2092 pos(2) = ds_min/gold_sec
2093 ELSE
2094 IF (line_search_count == 150) cpabort("Too many")
2095 IF (lsr == 0) THEN
2096 IF (energy(line_search_count - 1) < energy(line_search_count)) THEN
2097 lsr = line_search_count
2098 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2099 ELSE
2100 lsl = lsm
2101 lsm = line_search_count
2102 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2103 END IF
2104 ELSE
2105 IF (pos(line_search_count) < pos(lsm)) THEN
2106 IF (energy(line_search_count) < energy(lsm)) THEN
2107 lsr = lsm
2108 lsm = line_search_count
2109 ELSE
2110 lsl = line_search_count
2111 END IF
2112 ELSE
2113 IF (energy(line_search_count) < energy(lsm)) THEN
2114 lsl = lsm
2115 lsm = line_search_count
2116 ELSE
2117 lsr = line_search_count
2118 END IF
2119 END IF
2120 IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
2121 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2122 ELSE
2123 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2124 END IF
2125 IF ((pos(lsr) - pos(lsl)) < 1.0e-3_dp*pos(lsr)) THEN
2126 new_direction = .true.
2127 END IF
2128 END IF ! lsr .eq. 0
2129 END IF ! first step
2130 END SELECT
2131 ! now go to the suggested point
2132 ds_min = pos(line_search_count + 1)
2133 ds = pos(line_search_count + 1) - pos(line_search_count)
2134 CALL cp_fm_scale_and_add(1.0_dp, matrix_a, ds, matrix_g_search)
2135 END DO
2136
2137 ! collect rotation matrix
2138 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
2139 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2140 CALL cp_fm_to_fm(matrix_t, matrix_r)
2141 CALL rotate_orbitals(matrix_r, vectors)
2142 CALL cp_fm_release(matrix_r)
2143
2144 CALL cp_fm_release(matrix_a)
2145 CALL cp_fm_release(matrix_g)
2146 CALL cp_fm_release(matrix_h)
2147 CALL cp_fm_release(matrix_t)
2148 CALL cp_fm_release(matrix_g_search)
2149 CALL cp_fm_release(matrix_g_old)
2150 CALL cp_cfm_release(cmat_a)
2151 CALL cp_cfm_release(cmat_u)
2152 CALL cp_cfm_release(cmat_r)
2153 CALL cp_cfm_release(cmat_t1)
2154 CALL cp_cfm_release(cmat_t2)
2155 CALL cp_cfm_release(cmat_b)
2156 CALL cp_cfm_release(cmat_m)
2157
2158 DEALLOCATE (evals, evals_exp, fval, fvald)
2159
2160 DO idim = 1, SIZE(c_zij)
2161 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
2162 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2163 CALL cp_cfm_release(c_zij(idim))
2164 END DO
2165 DEALLOCATE (c_zij)
2166 DEALLOCATE (diag_z)
2167
2168 CALL timestop(handle)
2169
2170 END SUBROUTINE direct_mini
2171
2172! **************************************************************************************************
2173!> \brief Parallel algorithm for jacobi rotations
2174!> \param weights ...
2175!> \param zij ...
2176!> \param vectors ...
2177!> \param para_env ...
2178!> \param max_iter ...
2179!> \param eps_localization ...
2180!> \param sweeps ...
2181!> \param out_each ...
2182!> \param target_time ...
2183!> \param start_time ...
2184!> \param restricted ...
2185!> \par History
2186!> use allgather for improved performance
2187!> \author MI (11.2009)
2188! **************************************************************************************************
2189 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2190 sweeps, out_each, target_time, start_time, restricted)
2191
2192 REAL(kind=dp), INTENT(IN) :: weights(:)
2193 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2194 TYPE(mp_para_env_type), POINTER :: para_env
2195 INTEGER, INTENT(IN) :: max_iter
2196 REAL(kind=dp), INTENT(IN) :: eps_localization
2197 INTEGER :: sweeps
2198 INTEGER, INTENT(IN) :: out_each
2199 REAL(dp) :: target_time, start_time
2200 INTEGER :: restricted
2201
2202 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para'
2203
2204 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2205 nblock, nblock_max, ns_me, nstate, &
2206 output_unit
2207 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2208 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2209 REAL(kind=dp) :: xstate
2210 TYPE(cp_fm_type) :: rmat
2211 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2212
2213 CALL timeset(routinen, handle)
2214
2215 output_unit = cp_logger_get_default_io_unit()
2216
2217 NULLIFY (cz_ij_loc)
2218
2219 dim2 = SIZE(zij, 2)
2220
2221 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2222 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2223
2224 CALL cp_fm_get_info(rmat, nrow_global=nstate)
2225
2226 IF (restricted > 0) THEN
2227 IF (output_unit > 0) THEN
2228 WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2229 END IF
2230 nstate = nstate - restricted
2231 END IF
2232
2233 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2234 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2235 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2236 DO ip = 1, para_env%num_pe
2237 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2238 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2239 END DO
2240 nblock_max = 0
2241 DO ip = 0, para_env%num_pe - 1
2242 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2243 nblock_max = max(nblock_max, nblock)
2244 END DO
2245
2246 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2247 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2248 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2249 ALLOCATE (cz_ij_loc(dim2))
2250 DO idim = 1, dim2
2251 DO ip = 0, para_env%num_pe - 1
2252 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2253 CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2254 CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2255 IF (para_env%mepos == ip) THEN
2256 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2257 DO i = 1, nblock
2258 DO j = 1, nstate
2259 cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2260 END DO
2261 END DO
2262 END IF
2263 END DO ! ip
2264 END DO
2265 DEALLOCATE (z_ij_loc_re)
2266 DEALLOCATE (z_ij_loc_im)
2267
2268 ALLOCATE (rotmat(nstate, 2*nblock_max))
2269
2270 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2271 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2272 target_time=target_time, start_time=start_time)
2273
2274 ilow1 = ns_bound(para_env%mepos, 1)
2275 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2276 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2277 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2278 DO idim = 1, dim2
2279 DO ip = 0, para_env%num_pe - 1
2280 z_ij_loc_re = 0.0_dp
2281 z_ij_loc_im = 0.0_dp
2282 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2283 IF (ip == para_env%mepos) THEN
2284 ns_me = nblock
2285 DO i = 1, ns_me
2286 ii = ilow1 + i - 1
2287 DO j = 1, nstate
2288 z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i), dp)
2289 z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2290 END DO
2291 END DO
2292 END IF
2293 CALL para_env%bcast(z_ij_loc_re, ip)
2294 CALL para_env%bcast(z_ij_loc_im, ip)
2295 CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2296 CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2297 END DO ! ip
2298 END DO
2299
2300 DO ip = 0, para_env%num_pe - 1
2301 z_ij_loc_re = 0.0_dp
2302 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2303 IF (ip == para_env%mepos) THEN
2304 ns_me = nblock
2305 DO i = 1, ns_me
2306 ii = ilow1 + i - 1
2307 DO j = 1, nstate
2308 z_ij_loc_re(j, i) = rotmat(j, i)
2309 END DO
2310 END DO
2311 END IF
2312 CALL para_env%bcast(z_ij_loc_re, ip)
2313 CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2314 END DO
2315
2316 DEALLOCATE (z_ij_loc_re)
2317 DEALLOCATE (z_ij_loc_im)
2318 DO idim = 1, dim2
2319 DEALLOCATE (cz_ij_loc(idim)%c_array)
2320 END DO
2321 DEALLOCATE (cz_ij_loc)
2322
2323 CALL para_env%sync()
2324 CALL rotate_orbitals(rmat, vectors)
2325 CALL cp_fm_release(rmat)
2326
2327 DEALLOCATE (rotmat)
2328 DEALLOCATE (ns_bound)
2329
2330 CALL timestop(handle)
2331
2332 END SUBROUTINE jacobi_rot_para
2333
2334! **************************************************************************************************
2335!> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2336!> \param weights ...
2337!> \param czij ...
2338!> \param para_env ...
2339!> \param max_iter ...
2340!> \param rmat ...
2341!> \param tol_out ...
2342!> \author Soumya Ghosh (08/21)
2343! **************************************************************************************************
2344 SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2345
2346 REAL(kind=dp), INTENT(IN) :: weights(:)
2347 TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2348 TYPE(mp_para_env_type), POINTER :: para_env
2349 INTEGER, INTENT(IN) :: max_iter
2350 TYPE(cp_cfm_type), INTENT(IN) :: rmat
2351 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2352
2353 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para_1'
2354
2355 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2356 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2357 nblock, nblock_max, ns_me, nstate, &
2358 sweeps
2359 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2360 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2361 REAL(kind=dp) :: xstate
2362 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2363
2364 CALL timeset(routinen, handle)
2365
2366 dim2 = SIZE(czij)
2367
2368 CALL cp_cfm_set_all(rmat, cmplx(0._dp, 0._dp, dp), cmplx(1._dp, 0._dp, dp))
2369
2370 CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2371
2372 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2373 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2374 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2375 DO ip = 1, para_env%num_pe
2376 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2377 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2378 END DO
2379 nblock_max = 0
2380 DO ip = 0, para_env%num_pe - 1
2381 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2382 nblock_max = max(nblock_max, nblock)
2383 END DO
2384
2385 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2386 ALLOCATE (czij_array(nstate, nblock_max))
2387 ALLOCATE (cz_ij_loc(dim2))
2388 DO idim = 1, dim2
2389 DO ip = 0, para_env%num_pe - 1
2390 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2391 ! cfm --> allocatable
2392 CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2393 IF (para_env%mepos == ip) THEN
2394 ns_me = nblock
2395 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2396 DO i = 1, ns_me
2397 DO j = 1, nstate
2398 cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2399 END DO
2400 END DO
2401 END IF
2402 END DO ! ip
2403 END DO
2404 DEALLOCATE (czij_array)
2405
2406 ALLOCATE (rotmat(nstate, 2*nblock_max))
2407
2408 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2409 cz_ij_loc, rotmat, 0, tol_out=tol_out)
2410
2411 ilow1 = ns_bound(para_env%mepos, 1)
2412 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2413 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2414
2415 DO ip = 0, para_env%num_pe - 1
2416 z_ij_loc_re = 0.0_dp
2417 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2418 IF (ip == para_env%mepos) THEN
2419 ns_me = nblock
2420 DO i = 1, ns_me
2421 ii = ilow1 + i - 1
2422 DO j = 1, nstate
2423 z_ij_loc_re(j, i) = rotmat(j, i)
2424 END DO
2425 END DO
2426 END IF
2427 CALL para_env%bcast(z_ij_loc_re, ip)
2428 CALL cp_cfm_set_submatrix(rmat, cmplx(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2429 END DO
2430
2431 DEALLOCATE (z_ij_loc_re)
2432 DO idim = 1, dim2
2433 DEALLOCATE (cz_ij_loc(idim)%c_array)
2434 END DO
2435 DEALLOCATE (cz_ij_loc)
2436
2437 CALL para_env%sync()
2438
2439 DEALLOCATE (rotmat)
2440 DEALLOCATE (ns_bound)
2441
2442 CALL timestop(handle)
2443
2444 END SUBROUTINE jacobi_rot_para_1
2445
2446! **************************************************************************************************
2447!> \brief Parallel algorithm for jacobi rotations
2448!> \param weights ...
2449!> \param para_env ...
2450!> \param max_iter ...
2451!> \param sweeps ...
2452!> \param out_each ...
2453!> \param dim2 ...
2454!> \param nstate ...
2455!> \param nblock_max ...
2456!> \param ns_bound ...
2457!> \param cz_ij_loc ...
2458!> \param rotmat ...
2459!> \param output_unit ...
2460!> \param tol_out ...
2461!> \param eps_localization ...
2462!> \param target_time ...
2463!> \param start_time ...
2464!> \par History
2465!> split out to reuse with different input types
2466!> \author HF (05.2022)
2467! **************************************************************************************************
2468 SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2469 ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2470
2471 REAL(kind=dp), INTENT(IN) :: weights(:)
2472 TYPE(mp_para_env_type), POINTER :: para_env
2473 INTEGER, INTENT(IN) :: max_iter
2474 INTEGER, INTENT(OUT) :: sweeps
2475 INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2476 INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2477 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2478 REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2479 INTEGER, INTENT(IN) :: output_unit
2480 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2481 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
2482 REAL(dp), OPTIONAL :: target_time, start_time
2483
2484 COMPLEX(KIND=dp) :: zi, zj
2485 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2486 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2487 INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2488 ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2489 iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2490 ns_recv_from, ns_recv_partner
2491 INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2492 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2493 LOGICAL :: should_stop
2494 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2495 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2496 REAL(kind=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2497 t2, theta, tolerance, zc, zr
2498 TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2499 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2500
2501 NULLIFY (zdiag_all, zdiag_me)
2502 NULLIFY (xyz_mix, xyz_mix_ns)
2503 NULLIFY (mii, mij, mjj)
2504
2505 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2506
2507 ALLOCATE (rcount(para_env%num_pe), stat=istat)
2508 ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2509
2510 tolerance = 1.0e10_dp
2511 sweeps = 0
2512
2513 ! number of processor pairs and number of permutations
2514 npair = (para_env%num_pe + 1)/2
2515 nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2516 ALLOCATE (list_pair(2, npair))
2517
2518 ! initialize rotation matrix
2519 rotmat = 0.0_dp
2520 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2521 ii = i - ns_bound(para_env%mepos, 1) + 1
2522 rotmat(i, ii) = 1.0_dp
2523 END DO
2524
2525 ALLOCATE (xyz_mix(dim2))
2526 ALLOCATE (xyz_mix_ns(dim2))
2527 ALLOCATE (zdiag_me(dim2))
2528 ALLOCATE (zdiag_all(dim2))
2529
2530 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2531 IF (ns_me /= 0) THEN
2532 ALLOCATE (c_array_me(nstate, ns_me, dim2))
2533 DO idim = 1, dim2
2534 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2535 END DO
2536 ALLOCATE (gmat(nstate, ns_me))
2537 END IF
2538
2539 DO idim = 1, dim2
2540 ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2541 zdiag_me(idim)%c_array = z_zero
2542 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2543 zdiag_all(idim)%c_array = z_zero
2544 END DO
2545 ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2546 ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2547
2548 ! buffer for message passing
2549 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2550
2551 IF (output_unit > 0) THEN
2552 WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2553 WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2554 END IF
2555
2556 DO lsweep = 1, max_iter + 1
2557 sweeps = lsweep
2558 IF (sweeps == max_iter + 1) THEN
2559 IF (output_unit > 0) THEN
2560 WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2561 WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2562 END IF
2563 EXIT
2564 END IF
2565 t1 = m_walltime()
2566
2567 DO iperm = 1, nperm
2568
2569 ! fix partners for this permutation, and get the number of states
2570 CALL eberlein(iperm, para_env, list_pair)
2571 ip_partner = -1
2572 ns_partner = 0
2573 DO ipair = 1, npair
2574 IF (list_pair(1, ipair) == para_env%mepos) THEN
2575 ip_partner = list_pair(2, ipair)
2576 EXIT
2577 ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2578 ip_partner = list_pair(1, ipair)
2579 EXIT
2580 END IF
2581 END DO
2582 IF (ip_partner >= 0) THEN
2583 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2584 ELSE
2585 ns_partner = 0
2586 END IF
2587
2588 ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2589 IF (ns_partner*ns_me /= 0) THEN
2590
2591 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2592 rmat_loc = 0.0_dp
2593 DO i = 1, ns_me + ns_partner
2594 rmat_loc(i, i) = 1.0_dp
2595 END DO
2596
2597 ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2598
2599 DO idim = 1, dim2
2600 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2601 DO i = 1, ns_me
2602 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2603 END DO
2604 END DO
2605
2606 CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2607 msgout=c_array_partner, source=ip_partner)
2608
2609 n1 = ns_me
2610 n2 = ns_partner
2611 ilow1 = ns_bound(para_env%mepos, 1)
2612 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2613 ilow2 = ns_bound(ip_partner, 1)
2614 iup2 = ns_bound(ip_partner, 1) + n2 - 1
2615 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2616 il1 = 1
2617 iu1 = n1
2618 iu1 = n1
2619 il2 = 1 + n1
2620 iu2 = n1 + n2
2621 ELSE
2622 il1 = 1 + n2
2623 iu1 = n1 + n2
2624 iu1 = n1 + n2
2625 il2 = 1
2626 iu2 = n2
2627 END IF
2628
2629 DO idim = 1, dim2
2630 DO i = 1, n1
2631 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2632 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2633 END DO
2634 DO i = 1, n2
2635 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2636 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2637 END DO
2638 END DO
2639
2640 DO istate = 1, n1 + n2
2641 DO jstate = istate + 1, n1 + n2
2642 DO idim = 1, dim2
2643 mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2644 mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2645 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2646 END DO
2647 CALL get_angle(mii, mjj, mij, weights, theta)
2648 st = sin(theta)
2649 ct = cos(theta)
2650 DO idim = 1, dim2
2651 DO i = 1, n1 + n2
2652 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2653 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2654 xyz_mix(idim)%c_array(i, istate) = zi
2655 xyz_mix(idim)%c_array(i, jstate) = zj
2656 END DO
2657 DO i = 1, n1 + n2
2658 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2659 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2660 xyz_mix(idim)%c_array(istate, i) = zi
2661 xyz_mix(idim)%c_array(jstate, i) = zj
2662 END DO
2663 END DO
2664
2665 DO i = 1, n1 + n2
2666 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2667 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2668 rmat_loc(i, istate) = ri
2669 rmat_loc(i, jstate) = rj
2670 END DO
2671 END DO
2672 END DO
2673
2674 k = nblock_max + 1
2675 CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2676 rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2677
2678 IF (ilow1 < ilow2) THEN
2679 ! no longer compiles in official sdgb:
2680 !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2681 ! probably inefficient:
2682 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2683 n2, 0.0_dp, gmat(:, :), nstate)
2684 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2685 n1 + n2, 1.0_dp, gmat(:, :), nstate)
2686 ELSE
2687 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2688 rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2689 ! no longer compiles in official sdgb:
2690 !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2691 ! probably inefficient:
2692 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2693 n1, 1.0_dp, gmat(:, :), nstate)
2694 END IF
2695
2696 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2697
2698 DO idim = 1, dim2
2699 DO i = 1, n1
2700 xyz_mix_ns(idim)%c_array(1:nstate, i) = z_zero
2701 END DO
2702
2703 DO istate = 1, n1
2704 DO jstate = 1, nstate
2705 DO i = 1, n2
2706 xyz_mix_ns(idim)%c_array(jstate, istate) = &
2707 xyz_mix_ns(idim)%c_array(jstate, istate) + &
2708 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2709 END DO
2710 END DO
2711 END DO
2712 DO istate = 1, n1
2713 DO jstate = 1, nstate
2714 DO i = 1, n1
2715 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2716 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2717 END DO
2718 END DO
2719 END DO
2720 END DO ! idim
2721
2722 DEALLOCATE (c_array_partner)
2723
2724 ELSE ! save my data
2725 DO idim = 1, dim2
2726 DO i = 1, ns_me
2727 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2728 END DO
2729 END DO
2730 END IF
2731
2732 DO idim = 1, dim2
2733 DO i = 1, ns_me
2734 cz_ij_loc(idim)%c_array(1:nstate, i) = z_zero
2735 END DO
2736 END DO
2737
2738 IF (ns_partner*ns_me /= 0) THEN
2739 ! transpose rotation matrix rmat_loc
2740 DO i = 1, ns_me + ns_partner
2741 DO j = i + 1, ns_me + ns_partner
2742 ri = rmat_loc(i, j)
2743 rmat_loc(i, j) = rmat_loc(j, i)
2744 rmat_loc(j, i) = ri
2745 END DO
2746 END DO
2747
2748 ! prepare for distribution
2749 DO i = 1, n1
2750 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2751 END DO
2752 ik = nblock_max
2753 DO i = 1, n2
2754 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2755 END DO
2756 ELSE
2757 rmat_send = 0.0_dp
2758 END IF
2759
2760 ! collect data from all tasks (this takes some significant time)
2761 CALL para_env%allgather(rmat_send, rmat_recv_all)
2762
2763 ! update blocks everywhere
2764 DO ip = 0, para_env%num_pe - 1
2765
2766 ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2767 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2768
2769 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2770
2771 IF (ns_me /= 0) THEN
2772 IF (ns_recv_from /= 0) THEN
2773 !look for the partner of ip_recv_from
2774 ip_recv_partner = -1
2775 ns_recv_partner = 0
2776 DO ipair = 1, npair
2777 IF (list_pair(1, ipair) == ip_recv_from) THEN
2778 ip_recv_partner = list_pair(2, ipair)
2779 EXIT
2780 ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2781 ip_recv_partner = list_pair(1, ipair)
2782 EXIT
2783 END IF
2784 END DO
2785
2786 IF (ip_recv_partner >= 0) THEN
2787 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2788 END IF
2789 IF (ns_recv_partner > 0) THEN
2790 il1 = ns_bound(para_env%mepos, 1)
2791 il_recv = ns_bound(ip_recv_from, 1)
2792 il_recv_partner = ns_bound(ip_recv_partner, 1)
2793 ik = nblock_max
2794
2795 DO idim = 1, dim2
2796 DO i = 1, ns_recv_from
2797 ii = il_recv + i - 1
2798 DO j = 1, ns_me
2799 jj = j
2800 DO k = 1, ns_recv_from
2801 kk = il_recv + k - 1
2802 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2803 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2804 END DO
2805 END DO
2806 END DO
2807 DO i = 1, ns_recv_from
2808 ii = il_recv + i - 1
2809 DO j = 1, ns_me
2810 jj = j
2811 DO k = 1, ns_recv_partner
2812 kk = il_recv_partner + k - 1
2813 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2814 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2815 END DO
2816 END DO
2817 END DO
2818 END DO ! idim
2819 ELSE
2820 il1 = ns_bound(para_env%mepos, 1)
2821 il_recv = ns_bound(ip_recv_from, 1)
2822 DO idim = 1, dim2
2823 DO j = 1, ns_me
2824 jj = j
2825 DO i = 1, ns_recv_from
2826 ii = il_recv + i - 1
2827 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2828 END DO
2829 END DO
2830 END DO ! idim
2831 END IF
2832 END IF
2833 END IF ! ns_me
2834 END DO ! ip
2835
2836 IF (ns_partner*ns_me /= 0) THEN
2837 DEALLOCATE (rmat_loc)
2838 DO idim = 1, dim2
2839 DEALLOCATE (xyz_mix(idim)%c_array)
2840 END DO
2841 END IF
2842
2843 END DO ! iperm
2844
2845 ! calculate the max gradient
2846 DO idim = 1, dim2
2847 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2848 ii = i - ns_bound(para_env%mepos, 1) + 1
2849 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2850 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2851 END DO
2852 rcount(:) = SIZE(zdiag_me(idim)%c_array)
2853 rdispl(1) = 0
2854 DO ip = 2, para_env%num_pe
2855 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2856 END DO
2857 ! collect all the diagonal elements in a replicated 1d array
2858 CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2859 END DO
2860
2861 gmax = 0.0_dp
2862 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2863 k = j - ns_bound(para_env%mepos, 1) + 1
2864 DO i = 1, j - 1
2865 ! find the location of the diagonal element (i,i)
2866 DO ip = 0, para_env%num_pe - 1
2867 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2868 ip_has_i = ip
2869 EXIT
2870 END IF
2871 END DO
2872 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2873 ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2874 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2875 grad = 0.0_dp
2876 DO idim = 1, dim2
2877 zi = zdiag_all(idim)%c_array(ii)
2878 zj = zdiag_all(idim)%c_array(jj)
2879 grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2880 END DO
2881 gmax = max(gmax, abs(grad))
2882 END DO
2883 END DO
2884
2885 CALL para_env%max(gmax)
2886 tolerance = gmax
2887 IF (PRESENT(tol_out)) tol_out = tolerance
2888
2889 func = 0.0_dp
2890 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2891 k = i - ns_bound(para_env%mepos, 1) + 1
2892 DO idim = 1, dim2
2893 zr = real(cz_ij_loc(idim)%c_array(i, k), dp)
2894 zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2895 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2896 END DO
2897 END DO
2898 CALL para_env%sum(func)
2899 t2 = m_walltime()
2900
2901 IF (output_unit > 0 .AND. modulo(sweeps, out_each) == 0) THEN
2902 WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2903 CALL m_flush(output_unit)
2904 END IF
2905 IF (PRESENT(eps_localization)) THEN
2906 IF (tolerance < eps_localization) EXIT
2907 END IF
2908 IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2909 CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2910 IF (should_stop) EXIT
2911 END IF
2912
2913 END DO ! lsweep
2914
2915 ! buffer for message passing
2916 DEALLOCATE (rmat_recv_all)
2917
2918 DEALLOCATE (rmat_recv)
2919 DEALLOCATE (rmat_send)
2920 IF (ns_me > 0) THEN
2921 DEALLOCATE (c_array_me)
2922 END IF
2923 DO idim = 1, dim2
2924 DEALLOCATE (zdiag_me(idim)%c_array)
2925 DEALLOCATE (zdiag_all(idim)%c_array)
2926 END DO
2927 DEALLOCATE (zdiag_me)
2928 DEALLOCATE (zdiag_all)
2929 DEALLOCATE (xyz_mix)
2930 DO idim = 1, dim2
2931 IF (ns_me /= 0) THEN
2932 DEALLOCATE (xyz_mix_ns(idim)%c_array)
2933 END IF
2934 END DO
2935 DEALLOCATE (xyz_mix_ns)
2936 IF (ns_me /= 0) THEN
2937 DEALLOCATE (gmat)
2938 END IF
2939 DEALLOCATE (mii)
2940 DEALLOCATE (mij)
2941 DEALLOCATE (mjj)
2942 DEALLOCATE (list_pair)
2943
2944 END SUBROUTINE jacobi_rot_para_core
2945
2946! **************************************************************************************************
2947!> \brief ...
2948!> \param iperm ...
2949!> \param para_env ...
2950!> \param list_pair ...
2951! **************************************************************************************************
2952 SUBROUTINE eberlein(iperm, para_env, list_pair)
2953 INTEGER, INTENT(IN) :: iperm
2954 TYPE(mp_para_env_type), POINTER :: para_env
2955 INTEGER, DIMENSION(:, :) :: list_pair
2956
2957 INTEGER :: i, ii, jj, npair
2958
2959 npair = (para_env%num_pe + 1)/2
2960 IF (iperm == 1) THEN
2961!..set up initial ordering
2962 DO i = 0, para_env%num_pe - 1
2963 ii = ((i + 1) + 1)/2
2964 jj = mod((i + 1) + 1, 2) + 1
2965 list_pair(jj, ii) = i
2966 END DO
2967 IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2968 ELSEIF (mod(iperm, 2) == 0) THEN
2969!..a type shift
2970 jj = list_pair(1, npair)
2971 DO i = npair, 3, -1
2972 list_pair(1, i) = list_pair(1, i - 1)
2973 END DO
2974 list_pair(1, 2) = list_pair(2, 1)
2975 list_pair(2, 1) = jj
2976 ELSE
2977!..b type shift
2978 jj = list_pair(2, 1)
2979 DO i = 1, npair - 1
2980 list_pair(2, i) = list_pair(2, i + 1)
2981 END DO
2982 list_pair(2, npair) = jj
2983 END IF
2984
2985 END SUBROUTINE eberlein
2986
2987! **************************************************************************************************
2988!> \brief ...
2989!> \param vectors ...
2990!> \param op_sm_set ...
2991!> \param zij_fm_set ...
2992! **************************************************************************************************
2993 SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
2994
2995 TYPE(cp_fm_type), INTENT(IN) :: vectors
2996 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
2997 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
2998
2999 CHARACTER(len=*), PARAMETER :: routinen = 'zij_matrix'
3000
3001 INTEGER :: handle, i, j, nao, nmoloc
3002 TYPE(cp_fm_type) :: opvec
3003
3004 CALL timeset(routinen, handle)
3005
3006 ! get rows and cols of the input
3007 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3008 ! replicate the input kind of matrix
3009 CALL cp_fm_create(opvec, vectors%matrix_struct)
3010
3011 ! Compute zij here
3012 DO i = 1, SIZE(zij_fm_set, 2)
3013 DO j = 1, SIZE(zij_fm_set, 1)
3014 CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3015 CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3016 CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3017 zij_fm_set(j, i))
3018 END DO
3019 END DO
3020
3021 CALL cp_fm_release(opvec)
3022 CALL timestop(handle)
3023
3024 END SUBROUTINE zij_matrix
3025
3026! **************************************************************************************************
3027!> \brief ...
3028!> \param vectors ...
3029! **************************************************************************************************
3030 SUBROUTINE scdm_qrfact(vectors)
3031
3032 TYPE(cp_fm_type), INTENT(IN) :: vectors
3033
3034 CHARACTER(len=*), PARAMETER :: routinen = 'scdm_qrfact'
3035
3036 INTEGER :: handle, ncolt, nrowt
3037 REAL(kind=dp), DIMENSION(:), POINTER :: tau
3038 TYPE(cp_fm_struct_type), POINTER :: cstruct
3039 TYPE(cp_fm_type) :: ctp, qf, tmp
3040
3041 CALL timeset(routinen, handle)
3042
3043 ! Create Transpose of Coefficient Matrix vectors
3044 nrowt = vectors%matrix_struct%ncol_global
3045 ncolt = vectors%matrix_struct%nrow_global
3046
3047 CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, &
3048 nrow_global=nrowt, ncol_global=ncolt)
3049 CALL cp_fm_create(ctp, cstruct)
3050 CALL cp_fm_struct_release(cstruct)
3051
3052 ALLOCATE (tau(nrowt))
3053
3054 CALL cp_fm_transpose(vectors, ctp)
3055
3056 ! Get QR decomposition of CTs
3057 CALL cp_fm_pdgeqpf(ctp, tau, nrowt, ncolt, 1, 1)
3058
3059 ! Construction of Q from the scalapack output
3060 CALL cp_fm_struct_create(cstruct, para_env=ctp%matrix_struct%para_env, &
3061 context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3062 ncol_global=ctp%matrix_struct%nrow_global)
3063 CALL cp_fm_create(qf, cstruct)
3064 CALL cp_fm_struct_release(cstruct)
3065 CALL cp_fm_to_fm_submat(ctp, qf, nrowt, nrowt, 1, 1, 1, 1)
3066
3067 ! Get Q
3068 CALL cp_fm_pdorgqr(qf, tau, nrowt, 1, 1)
3069
3070 ! Transform original coefficient matrix vectors
3071 CALL cp_fm_create(tmp, vectors%matrix_struct)
3072 CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3073 CALL cp_fm_to_fm(vectors, tmp)
3074 CALL parallel_gemm('N', 'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3075
3076 ! Cleanup
3077 CALL cp_fm_release(ctp)
3078 CALL cp_fm_release(tmp)
3079 CALL cp_fm_release(qf)
3080 DEALLOCATE (tau)
3081
3082 CALL timestop(handle)
3083
3084 END SUBROUTINE scdm_qrfact
3085
3086END MODULE qs_localization_methods
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
Definition cell_types.F:15
methods related to the blacs parallel environment
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
Definition cp_cfm_diag.F:14
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Definition cp_cfm_diag.F:60
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
Definition cp_fm_diag.F:443
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:136
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:153
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
real(kind=dp), parameter, public twopi
complex(kind=dp), parameter, public z_zero
Routines for calculating a complex matrix exponential.
Definition matrix_exp.F:13
subroutine, public exp_pade_real(exp_h, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
Definition matrix_exp.F:570
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
Definition matrix_exp.F:244
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public approx_l1_norm_sd(c, iterations, eps, converged, sweeps)
...
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.
Definition xc.F:17
Type defining parameters related to the simulation cell.
Definition cell_types.F:60
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Represent a complex full matrix.
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment