(git:374b731)
Loading...
Searching...
No Matches
qs_localization_methods.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Localization methods such as 2x2 Jacobi rotations
10!> Steepest Decents
11!> Conjugate Gradient
12!> \par History
13!> Initial parallellization of jacobi (JVDV 07.2003)
14!> direct minimization using exponential parametrization (JVDV 09.2003)
15!> crazy rotations go fast (JVDV 10.2003)
16!> \author CJM (04.2003)
17! **************************************************************************************************
19 USE cell_types, ONLY: cell_type
28 USE cp_cfm_diag, ONLY: cp_cfm_heevd
29 USE cp_cfm_types, ONLY: &
44 USE cp_fm_diag, ONLY: cp_fm_syevd
49 USE cp_fm_types, ONLY: &
55 USE dbcsr_api, ONLY: dbcsr_p_type
56 USE kahan_sum, ONLY: accurate_sum
57 USE kinds, ONLY: dp
58 USE machine, ONLY: m_flush,&
60 USE mathconstants, ONLY: pi,&
61 twopi
62 USE matrix_exp, ONLY: exp_pade_real,&
66#include "./base/base_uses.f90"
67
68 IMPLICIT NONE
72
73 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
74
75 PRIVATE
76
77 TYPE set_c_1d_type
78 COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array
79 END TYPE
80 TYPE set_c_2d_type
81 COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array
82 END TYPE
83
84CONTAINS
85! **************************************************************************************************
86!> \brief ...
87!> \param C ...
88!> \param iterations ...
89!> \param eps ...
90!> \param converged ...
91!> \param sweeps ...
92! **************************************************************************************************
93 SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
94 TYPE(cp_fm_type), INTENT(IN) :: c
95 INTEGER, INTENT(IN) :: iterations
96 REAL(kind=dp), INTENT(IN) :: eps
97 LOGICAL, INTENT(INOUT) :: converged
98 INTEGER, INTENT(INOUT) :: sweeps
99
100 CHARACTER(len=*), PARAMETER :: routinen = 'approx_l1_norm_sd'
101 INTEGER, PARAMETER :: taylor_order = 100
102 REAL(kind=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
103
104 INTEGER :: handle, i, istep, k, n, ncol_local, &
105 nrow_local, output_unit, p
106 REAL(kind=dp) :: expfactor, f2, f2old, gnorm, tnorm
107 TYPE(cp_blacs_env_type), POINTER :: context
108 TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
109 TYPE(cp_fm_type) :: ctmp, g, gp1, gp2, u
110 TYPE(mp_para_env_type), POINTER :: para_env
111
112 CALL timeset(routinen, handle)
113
114 NULLIFY (context, para_env, fm_struct_k_k)
115
116 output_unit = cp_logger_get_default_io_unit()
117
118 CALL cp_fm_struct_get(c%matrix_struct, nrow_global=n, ncol_global=k, &
119 nrow_local=nrow_local, ncol_local=ncol_local, &
120 para_env=para_env, context=context)
121 CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
122 nrow_global=k, ncol_global=k)
123 CALL cp_fm_create(ctmp, c%matrix_struct)
124 CALL cp_fm_create(u, fm_struct_k_k)
125 CALL cp_fm_create(g, fm_struct_k_k)
126 CALL cp_fm_create(gp1, fm_struct_k_k)
127 CALL cp_fm_create(gp2, fm_struct_k_k)
128 !
129 ! printing
130 IF (output_unit > 0) THEN
131 WRITE (output_unit, '(1X)')
132 WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
133 WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
134 WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
135 WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
136 WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
137 WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
138 WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
139 END IF
140 !
141 f2old = 0.0_dp
142 converged = .false.
143 !
144 ! Start the steepest descent
145 DO istep = 1, iterations
146 !
147 !-------------------------------------------------------------------
148 ! compute f_2
149 ! f_2(x)=(x^2+eps)^1/2
150 f2 = 0.0_dp
151 DO p = 1, ncol_local ! p
152 DO i = 1, nrow_local ! i
153 f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
154 END DO
155 END DO
156 CALL c%matrix_struct%para_env%sum(f2)
157 !write(*,*) 'qs_localize: f_2=',f2
158 !-------------------------------------------------------------------
159 ! compute the derivative of f_2
160 ! f_2(x)=(x^2+eps)^1/2
161 DO p = 1, ncol_local ! p
162 DO i = 1, nrow_local ! i
163 ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
164 END DO
165 END DO
166 CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
167 ! antisymmetrize
168 CALL cp_fm_transpose(g, u)
169 CALL cp_fm_scale_and_add(-0.5_dp, g, 0.5_dp, u)
170 !
171 !-------------------------------------------------------------------
172 !
173 gnorm = cp_fm_frobenius_norm(g)
174 !write(*,*) 'qs_localize: norm(G)=',gnorm
175 !
176 ! rescale for steepest descent
177 CALL cp_fm_scale(-alpha, g)
178 !
179 ! compute unitary transform
180 ! zeroth order
181 CALL cp_fm_set_all(u, 0.0_dp, 1.0_dp)
182 ! first order
183 expfactor = 1.0_dp
184 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, g)
185 tnorm = cp_fm_frobenius_norm(g)
186 !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
187 IF (tnorm .GT. 1.0e-10_dp) THEN
188 ! other orders
189 CALL cp_fm_to_fm(g, gp1)
190 DO i = 2, taylor_order
191 ! new power of G
192 CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
193 CALL cp_fm_to_fm(gp2, gp1)
194 ! add to the taylor expansion so far
195 expfactor = expfactor/real(i, kind=dp)
196 CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, gp1)
197 tnorm = cp_fm_frobenius_norm(gp1)
198 !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
199 IF (tnorm*expfactor .LT. 1.0e-10_dp) EXIT
200 END DO
201 END IF
202 !
203 ! incrementaly rotate the MOs
204 CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
205 CALL cp_fm_to_fm(ctmp, c)
206 !
207 ! printing
208 IF (output_unit .GT. 0) THEN
209 WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
210 END IF
211 !
212 ! Are we done?
213 sweeps = istep
214 !IF(gnorm.LE.grad_thresh.AND.ABS((f2-f2old)/f2).LE.f2_thresh.AND.istep.GT.1) THEN
215 IF (abs((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1) THEN
216 converged = .true.
217 EXIT
218 END IF
219 f2old = f2
220 END DO
221 !
222 ! here we should do one refine step to enforce C'*S*C=1 for any case
223 !
224 ! Print the final result
225 IF (output_unit .GT. 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
226 !
227 ! sparsity
228 !DO i=1,size(thresh,1)
229 ! gnorm = 0.0_dp
230 ! DO o=1,ncol_local
231 ! DO p=1,nrow_local
232 ! IF(ABS(C%local_data(p,o)).GT.thresh(i)) THEN
233 ! gnorm = gnorm + 1.0_dp
234 ! ENDIF
235 ! ENDDO
236 ! ENDDO
237 ! CALL C%matrix_struct%para_env%sum(gnorm)
238 ! IF(output_unit.GT.0) THEN
239 ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
240 ! ENDIF
241 !ENDDO
242 !
243 ! deallocate
244 CALL cp_fm_struct_release(fm_struct_k_k)
245 CALL cp_fm_release(ctmp)
246 CALL cp_fm_release(u)
247 CALL cp_fm_release(g)
248 CALL cp_fm_release(gp1)
249 CALL cp_fm_release(gp2)
250
251 CALL timestop(handle)
252
253 END SUBROUTINE approx_l1_norm_sd
254! **************************************************************************************************
255!> \brief ...
256!> \param cell ...
257!> \param weights ...
258! **************************************************************************************************
259 SUBROUTINE initialize_weights(cell, weights)
260
261 TYPE(cell_type), POINTER :: cell
262 REAL(kind=dp), DIMENSION(:) :: weights
263
264 REAL(kind=dp), DIMENSION(3, 3) :: metric
265
266 cpassert(ASSOCIATED(cell))
267
268 metric = 0.0_dp
269 CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
270
271 weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
272 weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
273 weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
274 weights(4) = metric(1, 2)
275 weights(5) = metric(1, 3)
276 weights(6) = metric(2, 3)
277
278 END SUBROUTINE initialize_weights
279
280! **************************************************************************************************
281!> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
282!> can deal with serial para_envs.
283!> \param weights ...
284!> \param zij ...
285!> \param vectors ...
286!> \param para_env ...
287!> \param max_iter ...
288!> \param eps_localization ...
289!> \param sweeps ...
290!> \param out_each ...
291!> \param target_time ...
292!> \param start_time ...
293!> \param restricted ...
294!> \par History
295!> \author Joost VandeVondele (02.2010)
296! **************************************************************************************************
297 SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
298 eps_localization, sweeps, out_each, target_time, start_time, restricted)
299
300 REAL(kind=dp), INTENT(IN) :: weights(:)
301 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
302 TYPE(mp_para_env_type), POINTER :: para_env
303 INTEGER, INTENT(IN) :: max_iter
304 REAL(kind=dp), INTENT(IN) :: eps_localization
305 INTEGER :: sweeps
306 INTEGER, INTENT(IN) :: out_each
307 REAL(dp) :: target_time, start_time
308 INTEGER :: restricted
309
310 IF (para_env%num_pe == 1) THEN
311 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
312 sweeps, out_each, restricted=restricted)
313 ELSE
314 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
315 sweeps, out_each, target_time, start_time, restricted=restricted)
316 END IF
317
318 END SUBROUTINE jacobi_rotations
319
320! **************************************************************************************************
321!> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
322!> while the routine below works in parallel, it is too slow to be useful
323!> \param weights ...
324!> \param zij ...
325!> \param vectors ...
326!> \param max_iter ...
327!> \param eps_localization ...
328!> \param sweeps ...
329!> \param out_each ...
330!> \param restricted ...
331! **************************************************************************************************
332 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
333 out_each, restricted)
334 REAL(kind=dp), INTENT(IN) :: weights(:)
335 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
336 INTEGER, INTENT(IN) :: max_iter
337 REAL(kind=dp), INTENT(IN) :: eps_localization
338 INTEGER :: sweeps
339 INTEGER, INTENT(IN) :: out_each
340 INTEGER :: restricted
341
342 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial'
343
344 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
345 INTEGER :: dim2, handle, idim, istate, jstate, &
346 nstate, unit_nr
347 REAL(kind=dp) :: ct, st, t1, t2, theta, tolerance
348 TYPE(cp_cfm_type) :: c_rmat
349 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
350 TYPE(cp_fm_type) :: rmat
351
352 CALL timeset(routinen, handle)
353
354 dim2 = SIZE(zij, 2)
355 ALLOCATE (c_zij(dim2))
356 NULLIFY (mii, mij, mjj)
357 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
358
359 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
360 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
361
362 CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
363 CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
364 DO idim = 1, dim2
365 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
366 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
367 zij(2, idim)%local_data, dp)
368 END DO
369
370 CALL cp_fm_get_info(rmat, nrow_global=nstate)
371 tolerance = 1.0e10_dp
372
373 sweeps = 0
374 unit_nr = -1
375 IF (rmat%matrix_struct%para_env%is_source()) THEN
377 WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
378 END IF
379
380 IF (restricted > 0) THEN
382 WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
383 nstate = nstate - restricted
384 END IF
385
386 ! do jacobi sweeps until converged
387 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
388 sweeps = sweeps + 1
389 t1 = m_walltime()
390
391 DO istate = 1, nstate
392 DO jstate = istate + 1, nstate
393 DO idim = 1, dim2
394 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
395 CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
396 CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
397 END DO
398 CALL get_angle(mii, mjj, mij, weights, theta)
399 st = sin(theta)
400 ct = cos(theta)
401 CALL rotate_zij(istate, jstate, st, ct, c_zij)
402
403 CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
404 END DO
405 END DO
406
407 CALL check_tolerance(c_zij, weights, tolerance)
408
409 t2 = m_walltime()
410 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
411 WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
412 "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
413 CALL m_flush(unit_nr)
414 END IF
415
416 END DO
417
418 DO idim = 1, dim2
419 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
420 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
421 CALL cp_cfm_release(c_zij(idim))
422 END DO
423 DEALLOCATE (c_zij)
424 DEALLOCATE (mii, mij, mjj)
425 rmat%local_data = real(c_rmat%local_data, dp)
426 CALL cp_cfm_release(c_rmat)
427 CALL rotate_orbitals(rmat, vectors)
428 CALL cp_fm_release(rmat)
429
430 CALL timestop(handle)
431
432 END SUBROUTINE jacobi_rotations_serial
433! **************************************************************************************************
434!> \brief very similar to jacobi_rotations_serial with some extra output options
435!> \param weights ...
436!> \param c_zij ...
437!> \param max_iter ...
438!> \param c_rmat ...
439!> \param eps_localization ...
440!> \param tol_out ...
441!> \param jsweeps ...
442!> \param out_each ...
443!> \param c_zij_out ...
444!> \param grad_final ...
445! **************************************************************************************************
446 SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
447 tol_out, jsweeps, out_each, c_zij_out, grad_final)
448 REAL(kind=dp), INTENT(IN) :: weights(:)
449 TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
450 INTEGER, INTENT(IN) :: max_iter
451 TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
452 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
453 REAL(kind=dp), INTENT(OUT), OPTIONAL :: tol_out
454 INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
455 INTEGER, INTENT(IN), OPTIONAL :: out_each
456 TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
457 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
458
459 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial_1'
460
461 COMPLEX(KIND=dp) :: mzii
462 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
463 INTEGER :: dim2, handle, idim, istate, jstate, &
464 nstate, sweeps, unit_nr
465 REAL(kind=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
466 sum_spread_ii, t1, t2, theta, tolerance
467 TYPE(cp_cfm_type) :: c_rmat_local
468 TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
469
470 CALL timeset(routinen, handle)
471
472 dim2 = SIZE(c_zij)
473 NULLIFY (mii, mij, mjj)
474 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
475
476 ALLOCATE (c_zij_local(dim2))
477 CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
478 CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
479 DO idim = 1, dim2
480 CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
481 c_zij_local(idim)%local_data = c_zij(idim)%local_data
482 END DO
483
484 CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
485 tolerance = 1.0e10_dp
486
487 IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
488
489 sweeps = 0
490 IF (PRESENT(out_each)) THEN
491 unit_nr = -1
492 IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
494 END IF
495 alpha = 0.0_dp
496 DO idim = 1, dim2
497 alpha = alpha + weights(idim)
498 END DO
499 END IF
500
501 ! do jacobi sweeps until converged
502 DO WHILE (sweeps < max_iter)
503 sweeps = sweeps + 1
504 IF (PRESENT(eps_localization)) THEN
505 IF (tolerance < eps_localization) EXIT
506 END IF
507 IF (PRESENT(out_each)) t1 = m_walltime()
508
509 DO istate = 1, nstate
510 DO jstate = istate + 1, nstate
511 DO idim = 1, dim2
512 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
513 CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
514 CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
515 END DO
516 CALL get_angle(mii, mjj, mij, weights, theta)
517 st = sin(theta)
518 ct = cos(theta)
519 CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
520
521 CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
522 END DO
523 END DO
524
525 IF (PRESENT(grad_final)) THEN
526 CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
527 ELSE
528 CALL check_tolerance(c_zij_local, weights, tolerance)
529 END IF
530 IF (PRESENT(tol_out)) tol_out = tolerance
531
532 IF (PRESENT(out_each)) THEN
533 t2 = m_walltime()
534 IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
535 sum_spread_ii = 0.0_dp
536 DO istate = 1, nstate
537 spread_ii = 0.0_dp
538 DO idim = 1, dim2
539 CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
540 spread_ii = spread_ii + weights(idim)* &
541 abs(mzii)**2/twopi/twopi
542 END DO
543 sum_spread_ii = sum_spread_ii + spread_ii
544 END DO
545 sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
546 avg_spread_ii = sum_spread_ii/nstate
547 WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
548 "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
549 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
550 sweeps, avg_spread_ii, tolerance, t2 - t1
551 CALL m_flush(unit_nr)
552 END IF
553 IF (PRESENT(jsweeps)) jsweeps = sweeps
554 END IF
555
556 END DO
557
558 IF (PRESENT(c_zij_out)) THEN
559 DO idim = 1, dim2
560 CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
561 END DO
562 END IF
563 CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
564
565 DEALLOCATE (mii, mij, mjj)
566 DO idim = 1, dim2
567 CALL cp_cfm_release(c_zij_local(idim))
568 END DO
569 DEALLOCATE (c_zij_local)
570 CALL cp_cfm_release(c_rmat_local)
571
572 CALL timestop(handle)
573
574 END SUBROUTINE jacobi_rotations_serial_1
575! **************************************************************************************************
576!> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
577!> for partially occupied wannier functions
578!> \param para_env ...
579!> \param weights ...
580!> \param zij ...
581!> \param vectors ...
582!> \param max_iter ...
583!> \param eps_localization ...
584!> \param iter ...
585!> \param out_each ...
586!> \param nextra ...
587!> \param do_cg ...
588!> \param nmo ...
589!> \param vectors_2 ...
590!> \param mos_guess ...
591! **************************************************************************************************
592 SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
593 iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
594 TYPE(mp_para_env_type), POINTER :: para_env
595 REAL(kind=dp), INTENT(IN) :: weights(:)
596 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
597 INTEGER, INTENT(IN) :: max_iter
598 REAL(kind=dp), INTENT(IN) :: eps_localization
599 INTEGER :: iter
600 INTEGER, INTENT(IN) :: out_each, nextra
601 LOGICAL, INTENT(IN) :: do_cg
602 INTEGER, INTENT(IN), OPTIONAL :: nmo
603 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
604
605 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_cg_edf_ls'
606 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
607 czero = (0.0_dp, 0.0_dp)
608 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
609
610 COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
611 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
612 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
613 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
614 INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
615 lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
616 INTEGER, DIMENSION(1) :: iloc
617 LOGICAL :: do_cinit_mo, do_cinit_random, &
618 do_u_guess_mo, new_direction
619 REAL(kind=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
620 norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
621 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
622 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
623 REAL(kind=dp), DIMENSION(50) :: energy, pos
624 REAL(kind=dp), DIMENSION(:), POINTER :: tmp_arr
625 TYPE(cp_blacs_env_type), POINTER :: context
626 TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
627 grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
628 tmp_cfm_2, u, ul, v, vl, zdiag
629 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
630 TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
631 TYPE(cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
632 matrix_v_all, rmat, tmp_fm, vectors_all
633
634 CALL timeset(routinen, handle)
635
636 dim2 = SIZE(zij, 2)
637 NULLIFY (context)
638 NULLIFY (matrix_zii, arr_zii)
639 NULLIFY (tmp_fm_struct)
640 NULLIFY (tmp_arr)
641
642 ALLOCATE (c_zij(dim2))
643
644 CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
645
646 ALLOCATE (sum_spread(nstate))
647 ALLOCATE (matrix_zii(nstate, dim2))
648 matrix_zii = czero
649 sum_spread = 0.0_dp
650
651 alpha = 0.0_dp
652 DO idim = 1, dim2
653 alpha = alpha + weights(idim)
654 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
655 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
656 zij(2, idim)%local_data, dp)
657 END DO
658
659 ALLOCATE (zij_0(dim2))
660
661 CALL cp_cfm_create(u, zij(1, 1)%matrix_struct)
662 CALL cp_fm_create(matrix_u, zij(1, 1)%matrix_struct)
663
664 CALL cp_cfm_set_all(u, czero, cone)
665 CALL cp_fm_set_all(matrix_u, 0.0_dp, 1.0_dp)
666
667 CALL cp_fm_get_info(vectors, nrow_global=nao)
668 IF (nextra > 0) THEN
669 IF (PRESENT(mos_guess)) THEN
670 do_cinit_random = .false.
671 do_cinit_mo = .true.
672 CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
673 ELSE
674 do_cinit_random = .true.
675 do_cinit_mo = .false.
676 ndummy = nstate
677 END IF
678
679 IF (do_cinit_random) THEN
680 icinit = 1
681 do_u_guess_mo = .false.
682 ELSEIF (do_cinit_mo) THEN
683 icinit = 2
684 do_u_guess_mo = .true.
685 END IF
686
687 nocc = nstate - nextra
688 northo = nmo - nocc
689 norextra = nmo - nstate
690 CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
691
692 ALLOCATE (tmp_cmat(nstate, nstate))
693 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
694 para_env=para_env, context=context)
695 DO idim = 1, dim2
696 CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
697 CALL cp_cfm_set_all(zij_0(idim), czero, cone)
698 CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
699 CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
700 END DO
701 CALL cp_fm_struct_release(tmp_fm_struct)
702 DEALLOCATE (tmp_cmat)
703
704 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
705 para_env=para_env, context=context)
706 CALL cp_cfm_create(v, tmp_fm_struct)
707 CALL cp_fm_create(matrix_v, tmp_fm_struct)
708 CALL cp_cfm_create(zdiag, tmp_fm_struct)
709 CALL cp_fm_create(rmat, tmp_fm_struct)
710 CALL cp_fm_struct_release(tmp_fm_struct)
711 CALL cp_cfm_set_all(v, czero, cone)
712 CALL cp_fm_set_all(matrix_v, 0.0_dp, 1.0_dp)
713
714 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
715 para_env=para_env, context=context)
716 CALL cp_fm_create(matrix_v_all, tmp_fm_struct)
717 CALL cp_fm_struct_release(tmp_fm_struct)
718 CALL cp_fm_set_all(matrix_v_all, 0._dp, 1._dp)
719
720 ALLOCATE (arr_zii(nstate))
721
722 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
723 para_env=para_env, context=context)
724 CALL cp_cfm_create(c_tilde, tmp_fm_struct)
725 CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
726 CALL cp_cfm_create(gct_old, tmp_fm_struct)
727 CALL cp_cfm_create(skc, tmp_fm_struct)
728 CALL cp_fm_struct_release(tmp_fm_struct)
729 CALL cp_cfm_set_all(c_tilde, czero)
730 CALL cp_cfm_set_all(gct_old, czero)
731 CALL cp_cfm_set_all(skc, czero)
732
733 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
734 para_env=para_env, context=context)
735 CALL cp_cfm_create(vl, tmp_fm_struct)
736 CALL cp_cfm_set_all(vl, czero)
737 CALL cp_fm_struct_release(tmp_fm_struct)
738
739 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
740 para_env=para_env, context=context)
741 CALL cp_fm_create(id_nextra, tmp_fm_struct)
742 CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
743 CALL cp_fm_struct_release(tmp_fm_struct)
744 CALL cp_cfm_set_all(ctrans_lambda, czero)
745 CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
746
747 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
748 para_env=para_env, context=context)
749 CALL cp_cfm_create(ul, tmp_fm_struct)
750 CALL cp_fm_struct_release(tmp_fm_struct)
751 CALL cp_cfm_set_all(ul, czero)
752
753 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
754 para_env=para_env, context=context)
755 CALL cp_fm_create(vectors_all, tmp_fm_struct)
756 CALL cp_fm_struct_release(tmp_fm_struct)
757 ALLOCATE (tmp_mat(nao, nstate))
758 CALL cp_fm_get_submatrix(vectors, tmp_mat)
759 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
760 DEALLOCATE (tmp_mat)
761 ALLOCATE (tmp_mat(nao, norextra))
762 CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
763 CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
764 DEALLOCATE (tmp_mat)
765
766 ! initialize c_tilde
767 SELECT CASE (icinit)
768 CASE (1) ! random coefficients
769 !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
770 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
771 CALL cp_fm_init_random(tmp_fm, nextra)
772 CALL ortho_vectors(tmp_fm)
773 c_tilde%local_data = tmp_fm%local_data
774 CALL cp_fm_release(tmp_fm)
775 ALLOCATE (tmp_cmat(northo, nextra))
776 CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
777 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
778 DEALLOCATE (tmp_cmat)
779 CASE (2) ! MO based coeffs
780 CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
781 ALLOCATE (tmp_arr(nmo))
782 ALLOCATE (tmp_mat(nmo, ndummy))
783 ALLOCATE (tmp_mat_1(nmo, nstate))
784 ! normalize matrix_V_all
785 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat)
786 DO istate = 1, ndummy
787 tmp_arr(:) = tmp_mat(:, istate)
788 norm = norm2(tmp_arr)
789 tmp_arr(:) = tmp_arr(:)/norm
790 tmp_mat(:, istate) = tmp_arr(:)
791 END DO
792 CALL cp_fm_set_submatrix(matrix_v_all, tmp_mat)
793 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat_1, 1, 1, nmo, nstate)
794 CALL cp_fm_set_submatrix(matrix_v, tmp_mat_1)
795 DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
796 CALL cp_fm_to_cfm(msourcer=matrix_v, mtarget=v)
797 ALLOCATE (tmp_mat(northo, ndummy))
798 ALLOCATE (tmp_mat_1(northo, nextra))
799 CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat, nocc + 1, 1, northo, ndummy)
800 ALLOCATE (tmp_arr(ndummy))
801 tmp_arr = 0.0_dp
802 DO istate = 1, ndummy
803 tmp_arr(istate) = norm2(tmp_mat(:, istate))
804 END DO
805 ! find edfs
806 DO istate = 1, nextra
807 iloc = maxloc(tmp_arr)
808 tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
809 tmp_arr(iloc(1)) = 0.0_dp
810 END DO
811
812 DEALLOCATE (tmp_arr, tmp_mat)
813
814 CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
815 para_env=para_env, context=context)
816 CALL cp_fm_create(tmp_fm, tmp_fm_struct)
817 CALL cp_fm_struct_release(tmp_fm_struct)
818 CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
819 DEALLOCATE (tmp_mat_1)
820 CALL ortho_vectors(tmp_fm)
821 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
822 CALL cp_fm_release(tmp_fm)
823 ! initialize U
824 IF (do_u_guess_mo) THEN
825 ALLOCATE (tmp_cmat(nocc, nstate))
826 CALL cp_cfm_get_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
827 CALL cp_cfm_set_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
828 DEALLOCATE (tmp_cmat)
829 ALLOCATE (tmp_cmat(northo, nstate))
830 CALL cp_cfm_get_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
831 CALL cp_cfm_set_submatrix(vl, tmp_cmat, 1, 1, northo, nstate)
832 DEALLOCATE (tmp_cmat)
833 CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
834 ALLOCATE (tmp_cmat(nextra, nstate))
835 CALL cp_cfm_get_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
836 CALL cp_cfm_set_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
837 DEALLOCATE (tmp_cmat)
838 CALL cp_fm_create(tmp_fm, u%matrix_struct)
839 tmp_fm%local_data = real(u%local_data, kind=dp)
840 CALL ortho_vectors(tmp_fm)
841 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=u)
842 CALL cp_fm_release(tmp_fm)
843 CALL cp_cfm_to_fm(u, matrix_u)
844 END IF
845 ! reevaluate V
846 ALLOCATE (tmp_cmat(nocc, nstate))
847 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
848 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
849 DEALLOCATE (tmp_cmat)
850 ALLOCATE (tmp_cmat(nextra, nstate))
851 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
852 CALL cp_cfm_set_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
853 DEALLOCATE (tmp_cmat)
854 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
855 ALLOCATE (tmp_cmat(northo, nstate))
856 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
857 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
858 DEALLOCATE (tmp_cmat)
859 END SELECT
860 ELSE
861 DO idim = 1, dim2
862 CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
863 CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
864 END DO
865 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
866 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
867 END IF
868
869 unit_nr = -1
870 IF (rmat%matrix_struct%para_env%is_source()) THEN
872 WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
873 END IF
874
875 norm2_old = 1.0e30_dp
876 ds_min = 1.0_dp
877 new_direction = .true.
878 iter = 0
879 line_searches = 0
880 line_search_count = 0
881 tol = 1.0e+20_dp
882 mintol = 1.0e+10_dp
883 miniter = 0
884
885 !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
886 ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
887
888 ! do conjugate gradient until converged
889 DO WHILE (iter < max_iter)
890 iter = iter + 1
891 !WRITE(*,*) 'iter = ', iter
892 t1 = m_walltime()
893
894 IF (iter > 1) THEN
895 ! comput U
896 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
897 CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
898 IF (para_env%num_pe == 1) THEN
899 CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
900 ELSE
901 CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
902 END IF
903 CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
904 CALL cp_cfm_to_cfm(tmp_cfm, u)
905 CALL cp_cfm_release(tmp_cfm)
906 CALL cp_cfm_release(tmp_cfm_2)
907 END IF
908
909 IF (nextra > 0) THEN
910 ALLOCATE (tmp_cmat(nextra, nstate))
911 CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
912 CALL cp_cfm_set_submatrix(ul, tmp_cmat)
913 DEALLOCATE (tmp_cmat)
914 IF (iter > 1) THEN
915 ! orthonormalize c_tilde
916 CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
917 tmp_fm%local_data = real(c_tilde%local_data, kind=dp)
918 CALL ortho_vectors(tmp_fm)
919 CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
920 CALL cp_fm_release(tmp_fm)
921
922 ALLOCATE (tmp_cmat(nocc, nstate))
923 CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
924 CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
925 DEALLOCATE (tmp_cmat)
926 CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
927 ALLOCATE (tmp_cmat(northo, nstate))
928 CALL cp_cfm_get_submatrix(vl, tmp_cmat)
929 CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
930 DEALLOCATE (tmp_cmat)
931 END IF
932
933 ! reset if new_direction
934 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5) THEN
935 CALL cp_cfm_set_all(skc, czero)
936 CALL cp_cfm_set_all(gct_old, czero)
937 norm2_old = 1.0e30_dp
938 END IF
939
940 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
941 CALL cp_cfm_to_cfm(v, tmp_cfm)
942 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
943 ndummy = nmo
944 ELSE
945 CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
946 CALL cp_cfm_to_cfm(u, tmp_cfm)
947 CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
948 ndummy = nstate
949 END IF
950 ! update z_ij
951 DO idim = 1, dim2
952 ! 'tmp_cfm_1 = zij_0*tmp_cfm'
953 CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
954 tmp_cfm, czero, tmp_cfm_1)
955 ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
956 CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
957 czero, c_zij(idim))
958 END DO
959 CALL cp_cfm_release(tmp_cfm)
960 CALL cp_cfm_release(tmp_cfm_1)
961 ! compute spread
962 DO istate = 1, nstate
963 spread_ii = 0.0_dp
964 DO idim = 1, dim2
965 CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
966 spread_ii = spread_ii + weights(idim)* &
967 abs(mzii)**2/twopi/twopi
968 matrix_zii(istate, idim) = mzii
969 END DO
970 !WRITE(*,*) 'spread_ii', spread_ii
971 sum_spread(istate) = spread_ii
972 END DO
973 CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
974 spread_sum = accurate_sum(sum_spread)
975
976 IF (nextra > 0) THEN
977 ! update c_tilde
978 CALL cp_cfm_set_all(zdiag, czero)
979 CALL cp_cfm_set_all(grad_ctilde, czero)
980 CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
981 CALL cp_cfm_set_all(tmp_cfm, czero)
982 CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
983 CALL cp_cfm_set_all(tmp_cfm_1, czero)
984 ALLOCATE (tmp_cmat(northo, nstate))
985 DO idim = 1, dim2
986 weight = weights(idim)
987 arr_zii = matrix_zii(:, idim)
988 ! tmp_cfm = zij_0*V
989 CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
990 zij_0(idim), v, czero, tmp_cfm)
991 ! tmp_cfm = tmp_cfm*diag_zij_dagg
992 CALL cp_cfm_column_scale(tmp_cfm, conjg(arr_zii))
993 ! tmp_cfm_1 = tmp_cfm*U_dagg
994 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
995 u, czero, tmp_cfm_1)
996 CALL cp_cfm_scale(weight, tmp_cfm_1)
997 ! zdiag = zdiag + tmp_cfm_1'
998 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
999
1000 ! tmp_cfm = zij_0_dagg*V
1001 CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1002 zij_0(idim), v, czero, tmp_cfm)
1003
1004 ! tmp_cfm = tmp_cfm*diag_zij
1005 CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1006 ! tmp_cfm_1 = tmp_cfm*U_dagg
1007 CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1008 u, czero, tmp_cfm_1)
1009 CALL cp_cfm_scale(weight, tmp_cfm_1)
1010 ! zdiag = zdiag + tmp_cfm_1'
1011 CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1012 END DO ! idim
1013 CALL cp_cfm_release(tmp_cfm)
1014 CALL cp_cfm_release(tmp_cfm_1)
1015 DEALLOCATE (tmp_cmat)
1016 ALLOCATE (tmp_cmat(northo, nextra))
1017 CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1018 northo, nextra, .false.)
1019 ! 'grad_ctilde'
1020 CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1021 DEALLOCATE (tmp_cmat)
1022 ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1023 CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1024 !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1025 ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1026 CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1027 END IF ! nextra > 0
1028
1029 ! tolerance
1030 IF (nextra > 0) THEN
1031 tolc = 0.0_dp
1032 CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1033 CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1034 CALL cp_fm_maxabsval(tmp_fm, tolc)
1035 CALL cp_fm_release(tmp_fm)
1036 !WRITE(*,*) 'tolc = ', tolc
1037 tol = tol + tolc
1038 END IF
1039 !WRITE(*,*) 'tol = ', tol
1040
1041 IF (nextra > 0) THEN
1042 !WRITE(*,*) 'new_direction: ', new_direction
1043 IF (new_direction) THEN
1044 line_searches = line_searches + 1
1045 IF (mintol > tol) THEN
1046 mintol = tol
1047 miniter = iter
1048 END IF
1049
1050 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1051 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1052 avg_spread_ii = sum_spread_ii/nstate
1053 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1054 "Iteration", "Avg. Spread_ii", "Tolerance"
1055 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1056 iter, avg_spread_ii, tol
1057 CALL m_flush(unit_nr)
1058 END IF
1059 IF (tol < eps_localization) EXIT
1060
1061 IF (do_cg) THEN
1062 cnorm2_gct = czero
1063 cnorm2_gct_cross = czero
1064 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1065 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1066 gct_old%local_data = grad_ctilde%local_data
1067 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct)
1068 norm2_gct = real(cnorm2_gct, kind=dp)
1069 ! compute beta_pr
1070 beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1071 norm2_old = norm2_gct
1072 beta = max(0.0_dp, beta_pr)
1073 !WRITE(*,*) 'beta = ', beta
1074 ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1075 CALL cp_cfm_scale(beta, skc)
1076 CALL cp_cfm_scale_and_add(cone, skc, cone, gct_old)
1077 CALL cp_cfm_trace(skc, gct_old, cnorm2_gct_cross)
1078 norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1079 IF (norm2_gct_cross .LE. 0.0_dp) THEN ! back to steepest ascent
1080 CALL cp_cfm_scale_and_add(czero, skc, cone, gct_old)
1081 END IF
1082 ELSE
1083 CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1084 END IF
1085 line_search_count = 0
1086 END IF
1087
1088 line_search_count = line_search_count + 1
1089 !WRITE(*,*) 'line_search_count = ', line_search_count
1090 energy(line_search_count) = spread_sum
1091
1092 ! gold line search
1093 new_direction = .false.
1094 IF (line_search_count .EQ. 1) THEN
1095 lsl = 1
1096 lsr = 0
1097 lsm = 1
1098 pos(1) = 0.0_dp
1099 pos(2) = ds_min/gold_sec
1100 ds = pos(2)
1101 ELSE
1102 IF (line_search_count .EQ. 50) THEN
1103 IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp) THEN
1104 cpwarn("Line search failed to converge properly")
1105 ds_min = 0.1_dp
1106 new_direction = .true.
1107 ds = pos(line_search_count)
1108 line_search_count = 0
1109 ELSE
1110 cpabort("No. of line searches exceeds 50")
1111 END IF
1112 ELSE
1113 IF (lsr .EQ. 0) THEN
1114 IF (energy(line_search_count - 1) .GT. energy(line_search_count)) THEN
1115 lsr = line_search_count
1116 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1117 ELSE
1118 lsl = lsm
1119 lsm = line_search_count
1120 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1121 END IF
1122 ELSE
1123 IF (pos(line_search_count) .LT. pos(lsm)) THEN
1124 IF (energy(line_search_count) .GT. energy(lsm)) THEN
1125 lsr = lsm
1126 lsm = line_search_count
1127 ELSE
1128 lsl = line_search_count
1129 END IF
1130 ELSE
1131 IF (energy(line_search_count) .GT. energy(lsm)) THEN
1132 lsl = lsm
1133 lsm = line_search_count
1134 ELSE
1135 lsr = line_search_count
1136 END IF
1137 END IF
1138 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
1139 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1140 ELSE
1141 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1142 END IF
1143 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr)) THEN
1144 new_direction = .true.
1145 END IF
1146 END IF ! lsr .eq. 0
1147 END IF ! line_search_count .eq. 50
1148 ! now go to the suggested point
1149 ds = pos(line_search_count + 1) - pos(line_search_count)
1150 !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1151 IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1)) THEN
1152 new_direction = .true.
1153 ds_min = 0.5_dp/alpha
1154 ELSEIF (abs(ds) > 10.0_dp) THEN
1155 new_direction = .true.
1156 ds_min = 0.5_dp/alpha
1157 ELSE
1158 ds_min = pos(line_search_count + 1)
1159 END IF
1160 END IF ! first step
1161 ! 'c_tilde = c_tilde + d*skc'
1162 CALL cp_cfm_scale(ds, skc)
1163 CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1164 ELSE
1165 IF (mintol > tol) THEN
1166 mintol = tol
1167 miniter = iter
1168 END IF
1169 IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1170 sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1171 avg_spread_ii = sum_spread_ii/nstate
1172 WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1173 "Iteration", "Avg. Spread_ii", "Tolerance"
1174 WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1175 iter, avg_spread_ii, tol
1176 CALL m_flush(unit_nr)
1177 END IF
1178 IF (tol < eps_localization) EXIT
1179 END IF ! nextra > 0
1180
1181 END DO ! iteration
1182
1183 IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1184 WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1185 WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1186 CALL m_flush(unit_nr)
1187 END IF
1188
1189 CALL cp_cfm_to_fm(u, matrix_u)
1190
1191 IF (nextra > 0) THEN
1192 rmat%local_data = real(v%local_data, kind=dp)
1193 CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1194
1195 CALL cp_cfm_release(c_tilde)
1196 CALL cp_cfm_release(grad_ctilde)
1197 CALL cp_cfm_release(gct_old)
1198 CALL cp_cfm_release(skc)
1199 CALL cp_cfm_release(ul)
1200 CALL cp_cfm_release(zdiag)
1201 CALL cp_cfm_release(ctrans_lambda)
1202 CALL cp_fm_release(id_nextra)
1203 CALL cp_fm_release(vectors_all)
1204 CALL cp_cfm_release(v)
1205 CALL cp_fm_release(matrix_v)
1206 CALL cp_fm_release(matrix_v_all)
1207 CALL cp_cfm_release(vl)
1208 DEALLOCATE (arr_zii)
1209 ELSE
1210 rmat%local_data = matrix_u%local_data
1211 CALL rotate_orbitals(rmat, vectors)
1212 END IF
1213 DO idim = 1, dim2
1214 CALL cp_cfm_release(zij_0(idim))
1215 END DO
1216 DEALLOCATE (zij_0)
1217
1218 DO idim = 1, dim2
1219 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
1220 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1221 CALL cp_cfm_release(c_zij(idim))
1222 END DO
1223 DEALLOCATE (c_zij)
1224 CALL cp_fm_release(rmat)
1225 CALL cp_cfm_release(u)
1226 CALL cp_fm_release(matrix_u)
1227 DEALLOCATE (matrix_zii, sum_spread)
1228
1229 CALL timestop(handle)
1230
1231 END SUBROUTINE jacobi_cg_edf_ls
1232
1233! **************************************************************************************************
1234!> \brief ...
1235!> \param vmatrix ...
1236! **************************************************************************************************
1237 SUBROUTINE ortho_vectors(vmatrix)
1238
1239 TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1240
1241 CHARACTER(LEN=*), PARAMETER :: routinen = 'ortho_vectors'
1242
1243 INTEGER :: handle, n, ncol
1244 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1245 TYPE(cp_fm_type) :: overlap_vv
1246
1247 CALL timeset(routinen, handle)
1248
1249 NULLIFY (fm_struct_tmp)
1250
1251 CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1252
1253 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1254 para_env=vmatrix%matrix_struct%para_env, &
1255 context=vmatrix%matrix_struct%context)
1256 CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1257 CALL cp_fm_struct_release(fm_struct_tmp)
1258
1259 CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1260 CALL cp_fm_cholesky_decompose(overlap_vv)
1261 CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.true.)
1262
1263 CALL cp_fm_release(overlap_vv)
1264
1265 CALL timestop(handle)
1266
1267 END SUBROUTINE ortho_vectors
1268
1269! **************************************************************************************************
1270!> \brief ...
1271!> \param istate ...
1272!> \param jstate ...
1273!> \param st ...
1274!> \param ct ...
1275!> \param zij ...
1276! **************************************************************************************************
1277 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1278 INTEGER, INTENT(IN) :: istate, jstate
1279 REAL(kind=dp), INTENT(IN) :: st, ct
1280 TYPE(cp_cfm_type) :: zij(:)
1281
1282 INTEGER :: id
1283
1284! Locals
1285
1286 DO id = 1, SIZE(zij, 1)
1287 CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1288 CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1289 END DO
1290
1291 END SUBROUTINE rotate_zij
1292! **************************************************************************************************
1293!> \brief ...
1294!> \param istate ...
1295!> \param jstate ...
1296!> \param st ...
1297!> \param ct ...
1298!> \param rmat ...
1299! **************************************************************************************************
1300 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1301 INTEGER, INTENT(IN) :: istate, jstate
1302 REAL(kind=dp), INTENT(IN) :: st, ct
1303 TYPE(cp_cfm_type), INTENT(IN) :: rmat
1304
1305 CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1306
1307 END SUBROUTINE rotate_rmat
1308! **************************************************************************************************
1309!> \brief ...
1310!> \param mii ...
1311!> \param mjj ...
1312!> \param mij ...
1313!> \param weights ...
1314!> \param theta ...
1315!> \param grad_ij ...
1316!> \param step ...
1317! **************************************************************************************************
1318 SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1319 COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1320 REAL(kind=dp), INTENT(IN) :: weights(:)
1321 REAL(kind=dp), INTENT(OUT) :: theta
1322 REAL(kind=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1323
1324 COMPLEX(KIND=dp) :: z11, z12, z22
1325 INTEGER :: dim_m, idim
1326 REAL(kind=dp) :: a12, b12, d2, ratio
1327
1328 a12 = 0.0_dp
1329 b12 = 0.0_dp
1330 dim_m = SIZE(mii)
1331 DO idim = 1, dim_m
1332 z11 = mii(idim)
1333 z22 = mjj(idim)
1334 z12 = mij(idim)
1335 a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=dp)
1336 b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1337 0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=dp)
1338 END DO
1339 IF (abs(b12) > 1.e-10_dp) THEN
1340 ratio = -a12/b12
1341 theta = 0.25_dp*atan(ratio)
1342 ELSEIF (abs(b12) < 1.e-10_dp) THEN
1343 b12 = 0.0_dp
1344 theta = 0.0_dp
1345 ELSE
1346 theta = 0.25_dp*pi
1347 END IF
1348 IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1349! Check second derivative info
1350 d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1351 IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1352 IF (theta > 0.0_dp) THEN ! make theta as small as possible
1353 theta = theta - 0.25_dp*pi
1354 ELSE
1355 theta = theta + 0.25_dp*pi
1356 END IF
1357 END IF
1358 END SUBROUTINE get_angle
1359! **************************************************************************************************
1360!> \brief ...
1361!> \param zij ...
1362!> \param weights ...
1363!> \param tolerance ...
1364!> \param grad ...
1365! **************************************************************************************************
1366 SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1367 TYPE(cp_cfm_type) :: zij(:)
1368 REAL(kind=dp), INTENT(IN) :: weights(:)
1369 REAL(kind=dp), INTENT(OUT) :: tolerance
1370 TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1371
1372 CHARACTER(len=*), PARAMETER :: routinen = 'check_tolerance'
1373
1374 INTEGER :: handle
1375 TYPE(cp_fm_type) :: force
1376
1377 CALL timeset(routinen, handle)
1378
1379! compute gradient at t=0
1380
1381 CALL cp_fm_create(force, zij(1)%matrix_struct)
1382 CALL cp_fm_set_all(force, 0._dp)
1383 CALL grad_at_0(zij, weights, force)
1384 CALL cp_fm_maxabsval(force, tolerance)
1385 IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1386 CALL cp_fm_release(force)
1387
1388 CALL timestop(handle)
1389
1390 END SUBROUTINE check_tolerance
1391! **************************************************************************************************
1392!> \brief ...
1393!> \param rmat ...
1394!> \param vectors ...
1395! **************************************************************************************************
1396 SUBROUTINE rotate_orbitals(rmat, vectors)
1397 TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1398
1399 INTEGER :: k, n
1400 TYPE(cp_fm_type) :: wf
1401
1402 CALL cp_fm_create(wf, vectors%matrix_struct)
1403 CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1404 CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1405 CALL cp_fm_to_fm(wf, vectors)
1406 CALL cp_fm_release(wf)
1407 END SUBROUTINE rotate_orbitals
1408! **************************************************************************************************
1409!> \brief ...
1410!> \param rmat ...
1411!> \param vec_all ...
1412!> \param vectors ...
1413! **************************************************************************************************
1414 SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1415 TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1416
1417 INTEGER :: k, l, n
1418 TYPE(cp_fm_type) :: wf
1419
1420 CALL cp_fm_create(wf, vectors%matrix_struct)
1421 CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1422 CALL cp_fm_get_info(rmat, ncol_global=l)
1423
1424 CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1425 CALL cp_fm_to_fm(wf, vectors)
1426 CALL cp_fm_release(wf)
1427 END SUBROUTINE rotate_orbitals_edf
1428! **************************************************************************************************
1429!> \brief ...
1430!> \param diag ...
1431!> \param weights ...
1432!> \param matrix ...
1433!> \param ndim ...
1434! **************************************************************************************************
1435 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1436 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1437 REAL(kind=dp), INTENT(IN) :: weights(:)
1438 TYPE(cp_fm_type), INTENT(IN) :: matrix
1439 INTEGER, INTENT(IN) :: ndim
1440
1441 COMPLEX(KIND=dp) :: zii, zjj
1442 INTEGER :: idim, istate, jstate, ncol_local, &
1443 nrow_global, nrow_local
1444 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1445 REAL(kind=dp) :: gradsq_ij
1446
1447 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1448 ncol_local=ncol_local, nrow_global=nrow_global, &
1449 row_indices=row_indices, col_indices=col_indices)
1450
1451 DO istate = 1, nrow_local
1452 DO jstate = 1, ncol_local
1453! get real and imaginary parts
1454 gradsq_ij = 0.0_dp
1455 DO idim = 1, ndim
1456 zii = diag(row_indices(istate), idim)
1457 zjj = diag(col_indices(jstate), idim)
1458 gradsq_ij = gradsq_ij + weights(idim)* &
1459 4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=dp)
1460 END DO
1461 matrix%local_data(istate, jstate) = gradsq_ij
1462 END DO
1463 END DO
1464 END SUBROUTINE gradsq_at_0
1465! **************************************************************************************************
1466!> \brief ...
1467!> \param matrix_p ...
1468!> \param weights ...
1469!> \param matrix ...
1470! **************************************************************************************************
1471 SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1472 TYPE(cp_cfm_type) :: matrix_p(:)
1473 REAL(kind=dp), INTENT(IN) :: weights(:)
1474 TYPE(cp_fm_type), INTENT(IN) :: matrix
1475
1476 COMPLEX(KIND=dp) :: zii, zij, zjj
1477 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1478 INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1479 nrow_global, nrow_local
1480 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1481 REAL(kind=dp) :: grad_ij
1482
1483 NULLIFY (diag)
1484 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1485 ncol_local=ncol_local, nrow_global=nrow_global, &
1486 row_indices=row_indices, col_indices=col_indices)
1487 dim_m = SIZE(matrix_p, 1)
1488 ALLOCATE (diag(nrow_global, dim_m))
1489
1490 DO idim = 1, dim_m
1491 DO istate = 1, nrow_global
1492 CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1493 END DO
1494 END DO
1495
1496 DO istate = 1, nrow_local
1497 DO jstate = 1, ncol_local
1498! get real and imaginary parts
1499 grad_ij = 0.0_dp
1500 DO idim = 1, dim_m
1501 zii = diag(row_indices(istate), idim)
1502 zjj = diag(col_indices(jstate), idim)
1503 zij = matrix_p(idim)%local_data(istate, jstate)
1504 grad_ij = grad_ij + weights(idim)* &
1505 REAL(4.0_dp*conjg(zij)*(zjj - zii), dp)
1506 END DO
1507 matrix%local_data(istate, jstate) = grad_ij
1508 END DO
1509 END DO
1510 DEALLOCATE (diag)
1511 END SUBROUTINE grad_at_0
1512
1513! return energy and maximum gradient in the current point
1514! **************************************************************************************************
1515!> \brief ...
1516!> \param weights ...
1517!> \param zij ...
1518!> \param tolerance ...
1519!> \param value ...
1520! **************************************************************************************************
1521 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1522 REAL(kind=dp), INTENT(IN) :: weights(:)
1523 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1524 REAL(kind=dp) :: tolerance, value
1525
1526 COMPLEX(KIND=dp) :: kii, kij, kjj
1527 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1528 INTEGER :: idim, istate, jstate, ncol_local, &
1529 nrow_global, nrow_local
1530 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1531 REAL(kind=dp) :: grad_ij, ra, rb
1532
1533 NULLIFY (diag)
1534 CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1535 ncol_local=ncol_local, nrow_global=nrow_global, &
1536 row_indices=row_indices, col_indices=col_indices)
1537 ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1538 value = 0.0_dp
1539 DO idim = 1, SIZE(zij, 2)
1540 DO istate = 1, nrow_global
1541 CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1542 CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1543 diag(istate, idim) = cmplx(ra, rb, dp)
1544 value = value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1545 END DO
1546 END DO
1547 tolerance = 0.0_dp
1548 DO istate = 1, nrow_local
1549 DO jstate = 1, ncol_local
1550 grad_ij = 0.0_dp
1551 DO idim = 1, SIZE(zij, 2)
1552 kii = diag(row_indices(istate), idim)
1553 kjj = diag(col_indices(jstate), idim)
1554 ra = zij(1, idim)%local_data(istate, jstate)
1555 rb = zij(2, idim)%local_data(istate, jstate)
1556 kij = cmplx(ra, rb, dp)
1557 grad_ij = grad_ij + weights(idim)* &
1558 REAL(4.0_dp*conjg(kij)*(kjj - kii), dp)
1559 END DO
1560 tolerance = max(abs(grad_ij), tolerance)
1561 END DO
1562 END DO
1563 CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1564
1565 DEALLOCATE (diag)
1566
1567 END SUBROUTINE check_tolerance_new
1568
1569! **************************************************************************************************
1570!> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1571!> and rotates them all at the same time (hoping for the best of course)
1572!> \param weights ...
1573!> \param zij ...
1574!> \param vectors ...
1575!> \param max_iter ...
1576!> \param max_crazy_angle ...
1577!> \param crazy_scale ...
1578!> \param crazy_use_diag ...
1579!> \param eps_localization ...
1580!> \param iterations ...
1581!> \param converged ...
1582! **************************************************************************************************
1583 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1584 eps_localization, iterations, converged)
1585 REAL(kind=dp), INTENT(IN) :: weights(:)
1586 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1587 INTEGER, INTENT(IN) :: max_iter
1588 REAL(kind=dp), INTENT(IN) :: max_crazy_angle
1589 REAL(kind=dp) :: crazy_scale
1590 LOGICAL :: crazy_use_diag
1591 REAL(kind=dp), INTENT(IN) :: eps_localization
1592 INTEGER :: iterations
1593 LOGICAL, INTENT(out), OPTIONAL :: converged
1594
1595 CHARACTER(len=*), PARAMETER :: routinen = 'crazy_rotations'
1596 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1597 czero = (0.0_dp, 0.0_dp)
1598
1599 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1600 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1601 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1602 INTEGER :: dim2, handle, i, icol, idim, irow, &
1603 method, ncol_global, ncol_local, &
1604 norder, nrow_global, nrow_local, &
1605 nsquare, unit_nr
1606 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1607 LOGICAL :: do_emd
1608 REAL(kind=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1609 norm, ra, rb, theta, tolerance, value
1610 REAL(kind=dp), DIMENSION(:), POINTER :: evals
1611 TYPE(cp_cfm_type) :: cmat_a, cmat_r, cmat_t1
1612 TYPE(cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1613
1614 CALL timeset(routinen, handle)
1615 NULLIFY (row_indices, col_indices)
1616 CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1617 ncol_global=ncol_global, &
1618 row_indices=row_indices, col_indices=col_indices, &
1619 nrow_local=nrow_local, ncol_local=ncol_local)
1620
1621 limit_crazy_angle = max_crazy_angle
1622
1623 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1624 dim2 = SIZE(zij, 2)
1625 ALLOCATE (diag_z(nrow_global, dim2))
1626 ALLOCATE (evals(nrow_global))
1627 ALLOCATE (evals_exp(nrow_global))
1628
1629 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1630 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1631 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1632
1633 CALL cp_fm_create(mat_u, zij(1, 1)%matrix_struct)
1634 CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1635 CALL cp_fm_create(mat_r, zij(1, 1)%matrix_struct)
1636
1637 CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1638
1639 CALL cp_fm_set_all(mat_r, 0.0_dp, 1.0_dp)
1640 CALL cp_fm_set_all(mat_t, 0.0_dp)
1641 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1642 DO idim = 1, dim2
1643 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1644 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1645 END DO
1646 CALL cp_fm_syevd(mat_t, mat_u, evals)
1647 DO idim = 1, dim2
1648 ! rotate z's
1649 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1650 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1651 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1652 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1653 END DO
1654 ! collect rotation matrix
1655 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1656 CALL cp_fm_to_fm(mat_t, mat_r)
1657
1658 unit_nr = -1
1659 IF (cmat_a%matrix_struct%para_env%is_source()) THEN
1661 WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1662 "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1663 END IF
1664
1665 iterations = 0
1666 tolerance = 1.0_dp
1667
1668 DO
1669 iterations = iterations + 1
1670 DO idim = 1, dim2
1671 DO i = 1, nrow_global
1672 CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1673 CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1674 diag_z(i, idim) = cmplx(ra, rb, dp)
1675 END DO
1676 END DO
1677 DO irow = 1, nrow_local
1678 DO icol = 1, ncol_local
1679 DO idim = 1, dim2
1680 ra = zij(1, idim)%local_data(irow, icol)
1681 rb = zij(2, idim)%local_data(irow, icol)
1682 mij(idim) = cmplx(ra, rb, dp)
1683 mii(idim) = diag_z(row_indices(irow), idim)
1684 mjj(idim) = diag_z(col_indices(icol), idim)
1685 END DO
1686 IF (row_indices(irow) .NE. col_indices(icol)) THEN
1687 CALL get_angle(mii, mjj, mij, weights, theta)
1688 theta = crazy_scale*theta
1689 IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
1690 IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
1691 IF (crazy_use_diag) THEN
1692 cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta, dp)
1693 ELSE
1694 mat_theta%local_data(irow, icol) = -theta
1695 END IF
1696 ELSE
1697 IF (crazy_use_diag) THEN
1698 cmat_a%local_data(irow, icol) = czero
1699 ELSE
1700 mat_theta%local_data(irow, icol) = 0.0_dp
1701 END IF
1702 END IF
1703 END DO
1704 END DO
1705
1706 ! construct rotation matrix U based on A using diagonalization
1707 ! alternatively, exp based on repeated squaring could be faster
1708 IF (crazy_use_diag) THEN
1709 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1710 maxeval = maxval(abs(evals))
1711 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1712 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1713 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1714 CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1715 cmat_t1, cmat_r, czero, cmat_a)
1716 mat_u%local_data = real(cmat_a%local_data, kind=dp) ! U is a real matrix
1717 ELSE
1718 do_emd = .false.
1719 method = 2
1720 eps_exp = 1.0_dp*epsilon(eps_exp)
1721 CALL cp_fm_maxabsrownorm(mat_theta, norm)
1722 maxeval = norm ! an upper bound
1723 CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1724 CALL exp_pade_real(mat_u, mat_theta, nsquare, norder)
1725 END IF
1726
1727 DO idim = 1, dim2
1728 ! rotate z's
1729 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1730 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1731 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1732 CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1733 END DO
1734 ! collect rotation matrix
1735 CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1736 CALL cp_fm_to_fm(mat_t, mat_r)
1737
1738 CALL check_tolerance_new(weights, zij, tolerance, value)
1739
1740 IF (unit_nr > 0) THEN
1741 WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1742 "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1743 CALL m_flush(unit_nr)
1744 END IF
1745 IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter) EXIT
1746 END DO
1747
1748 IF (PRESENT(converged)) converged = (tolerance .LT. eps_localization)
1749
1750 CALL cp_cfm_release(cmat_a)
1751 CALL cp_cfm_release(cmat_r)
1752 CALL cp_cfm_release(cmat_t1)
1753
1754 CALL cp_fm_release(mat_u)
1755 CALL cp_fm_release(mat_t)
1756 CALL cp_fm_release(mat_theta)
1757
1758 CALL rotate_orbitals(mat_r, vectors)
1759
1760 CALL cp_fm_release(mat_r)
1761 DEALLOCATE (evals_exp, evals, diag_z)
1762 DEALLOCATE (mii, mij, mjj)
1763
1764 CALL timestop(handle)
1765
1766 END SUBROUTINE crazy_rotations
1767
1768! **************************************************************************************************
1769!> \brief use the exponential parametrization as described in to perform a direct mini
1770!> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1771!> none of the input is modified for the time being, just finds the rotations
1772!> that minimizes, and throws it away afterwards :-)
1773!> apart from being expensive and not cleaned, this works fine
1774!> useful to try different spread functionals
1775!> \param weights ...
1776!> \param zij ...
1777!> \param vectors ...
1778!> \param max_iter ...
1779!> \param eps_localization ...
1780!> \param iterations ...
1781! **************************************************************************************************
1782 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1783 REAL(kind=dp), INTENT(IN) :: weights(:)
1784 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1785 INTEGER, INTENT(IN) :: max_iter
1786 REAL(kind=dp), INTENT(IN) :: eps_localization
1787 INTEGER :: iterations
1788
1789 CHARACTER(len=*), PARAMETER :: routinen = 'direct_mini'
1790 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1791 czero = (0.0_dp, 0.0_dp)
1792 REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
1793
1794 COMPLEX(KIND=dp) :: lk, ll, tmp
1795 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1796 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1797 INTEGER :: handle, i, icol, idim, irow, &
1798 line_search_count, line_searches, lsl, &
1799 lsm, lsr, n, ncol_local, ndim, &
1800 nrow_local, output_unit
1801 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1802 LOGICAL :: new_direction
1803 REAL(kind=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1804 fb, fc, nom, normg, normg_cross, &
1805 normg_old, npos, omega, tol, val, x0, &
1806 x1, xa, xb, xc
1807 REAL(kind=dp), DIMENSION(150) :: energy, grad, pos
1808 REAL(kind=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1809 TYPE(cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1810 cmat_t2, cmat_u
1811 TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1812 TYPE(cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1813 matrix_g_search, matrix_h, matrix_r, &
1814 matrix_t
1815
1816 NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1817
1818 CALL timeset(routinen, handle)
1819 output_unit = cp_logger_get_default_io_unit()
1820
1821 n = zij(1, 1)%matrix_struct%nrow_global
1822 ndim = (SIZE(zij, 2))
1823
1824 IF (output_unit > 0) THEN
1825 WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1826 WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1827 END IF
1828
1829 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1830 ALLOCATE (c_zij(ndim))
1831
1832 ! create the three complex matrices Z
1833 DO idim = 1, ndim
1834 CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1835 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1836 zij(2, idim)%local_data, dp)
1837 END DO
1838
1839 CALL cp_fm_create(matrix_a, zij(1, 1)%matrix_struct)
1840 CALL cp_fm_create(matrix_g, zij(1, 1)%matrix_struct)
1841 CALL cp_fm_create(matrix_t, zij(1, 1)%matrix_struct)
1842 CALL cp_fm_create(matrix_h, zij(1, 1)%matrix_struct)
1843 CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1844 CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1845 CALL cp_fm_create(matrix_r, zij(1, 1)%matrix_struct)
1846 CALL cp_fm_set_all(matrix_r, 0.0_dp, 1.0_dp)
1847
1848 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1849! CALL cp_fm_init_random ( matrix_A )
1850
1851 CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1852 CALL cp_cfm_create(cmat_u, zij(1, 1)%matrix_struct)
1853 CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1854 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1855 CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1856 CALL cp_cfm_create(cmat_b, zij(1, 1)%matrix_struct)
1857 CALL cp_cfm_create(cmat_m, zij(1, 1)%matrix_struct)
1858
1859 CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1860 row_indices=row_indices, col_indices=col_indices)
1861
1862 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1863 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1864 normg_old = 1.0e30_dp
1865 ds_min = 1.0_dp
1866 new_direction = .true.
1867 iterations = 0
1868 line_searches = 0
1869 line_search_count = 0
1870 DO
1871 iterations = iterations + 1
1872 ! compute U,R,evals given A
1873 cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data, dp) ! cmat_A is hermitian, evals are reals
1874 CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1875 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1876 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1877 CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1878 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1879 cmat_u%local_data = real(cmat_u%local_data, kind=dp) ! enforce numerics, U is a real matrix
1880
1881 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5) THEN ! reset with A .eq. 0
1882 DO idim = 1, ndim
1883 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1884 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1885 END DO
1886 ! collect rotation matrix
1887 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
1888 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1889 CALL cp_fm_to_fm(matrix_t, matrix_r)
1890
1891 CALL cp_cfm_set_all(cmat_u, czero, cone)
1892 CALL cp_cfm_set_all(cmat_r, czero, cone)
1893 CALL cp_cfm_set_all(cmat_a, czero)
1894 CALL cp_fm_set_all(matrix_a, 0.0_dp)
1895 evals(:) = 0.0_dp
1896 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1897 CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1898 CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1899 normg_old = 1.0e30_dp
1900 END IF
1901
1902 ! compute Omega and M
1903 CALL cp_cfm_set_all(cmat_m, czero)
1904 omega = 0.0_dp
1905 DO idim = 1, ndim
1906 CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1) ! t1=ZU
1907 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1908 DO i = 1, n
1909 CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1910 SELECT CASE (2) ! allows for selection of different spread functionals
1911 CASE (1)
1912 fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1913 fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1914 CASE (2) ! corresponds to the jacobi setup
1915 fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1916 fvald(i) = -weights(idim)
1917 END SELECT
1918 omega = omega + fval(i)
1919 END DO
1920 DO icol = 1, ncol_local
1921 DO irow = 1, nrow_local
1922 tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1923 cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1924 + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=dp)
1925 END DO
1926 END DO
1927 END DO
1928
1929 ! compute Hessian diagonal approximation for the preconditioner
1930 IF (.true.) THEN
1931 CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1932 ELSE
1933 CALL cp_fm_set_all(matrix_h, 1.0_dp)
1934 END IF
1935
1936 ! compute B
1937 DO icol = 1, ncol_local
1938 DO irow = 1, nrow_local
1939 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1940 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1941 IF (abs(ll - lk) .LT. 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1942 tmp = 1.0_dp
1943 cmat_b%local_data(irow, icol) = 0.0_dp
1944 DO i = 1, 16
1945 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1946 tmp = tmp*(ll - lk)/(i + 1)
1947 END DO
1948 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1949 ELSE
1950 cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1951 END IF
1952 END DO
1953 END DO
1954 ! compute gradient matrix_G
1955
1956 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1) ! t1=(M^T)(R^T)
1957 CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1958 CALL cp_cfm_schur_product(cmat_t2, cmat_b, cmat_t1)
1959 CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1960 CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1961 matrix_g%local_data = real(cmat_t1%local_data, kind=dp)
1962 CALL cp_fm_transpose(matrix_g, matrix_t)
1963 CALL cp_fm_scale_and_add(-1.0_dp, matrix_g, 1.0_dp, matrix_t)
1964 CALL cp_fm_maxabsval(matrix_g, tol)
1965
1966 ! from here on, minimizing technology
1967 IF (new_direction) THEN
1968 ! energy converged up to machine precision ?
1969 line_searches = line_searches + 1
1970 ! DO i=1,line_search_count
1971 ! write(15,*) pos(i),energy(i)
1972 ! ENDDO
1973 ! write(15,*) ""
1974 ! CALL m_flush(15)
1975 !write(16,*) evals(:)
1976 !write(17,*) matrix_A%local_data(:,:)
1977 !write(18,*) matrix_G%local_data(:,:)
1978 IF (output_unit > 0) THEN
1979 WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
1980 CALL m_flush(output_unit)
1981 END IF
1982 IF (tol < eps_localization .OR. iterations > max_iter) EXIT
1983
1984 IF (.true.) THEN ! do conjugate gradient CG
1985 CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
1986 normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
1987 ! apply the preconditioner
1988 DO icol = 1, ncol_local
1989 DO irow = 1, nrow_local
1990 matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
1991 END DO
1992 END DO
1993 CALL cp_fm_trace(matrix_g, matrix_g_old, normg)
1994 normg = normg*0.5_dp
1995 beta_pr = (normg - normg_cross)/normg_old
1996 normg_old = normg
1997 beta_pr = max(beta_pr, 0.0_dp)
1998 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
1999 CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2000 IF (normg_cross .GE. 0) THEN ! back to SD
2001 IF (matrix_a%matrix_struct%para_env%is_source()) THEN
2002 WRITE (cp_logger_get_default_unit_nr(), *) "!"
2003 END IF
2004 beta_pr = 0.0_dp
2005 CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2006 END IF
2007 ELSE ! SD
2008 CALL cp_fm_scale_and_add(0.0_dp, matrix_g_search, -1.0_dp, matrix_g)
2009 END IF
2010 ! ds_min=1.0E-4_dp
2011 line_search_count = 0
2012 END IF
2013 line_search_count = line_search_count + 1
2014 energy(line_search_count) = omega
2015
2016 ! line search section
2017 SELECT CASE (3)
2018 CASE (1) ! two point line search
2019 SELECT CASE (line_search_count)
2020 CASE (1)
2021 pos(1) = 0.0_dp
2022 pos(2) = ds_min
2023 CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2024 grad(1) = grad(1)/2.0_dp
2025 new_direction = .false.
2026 CASE (2)
2027 new_direction = .true.
2028 x0 = pos(1) ! 0.0_dp
2029 c = energy(1)
2030 b = grad(1)
2031 x1 = pos(2)
2032 a = (energy(2) - b*x1 - c)/(x1**2)
2033 IF (a .LE. 0.0_dp) a = 1.0e-15_dp
2034 npos = -b/(2.0_dp*a)
2035 val = a*npos**2 + b*npos + c
2036 IF (val .LT. energy(1) .AND. val .LE. energy(2)) THEN
2037 ! we go to a minimum, but ...
2038 ! we take a guard against too large steps
2039 pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2040 ELSE ! just take an extended step
2041 pos(3) = maxval(pos(1:2))*2.0_dp
2042 END IF
2043 END SELECT
2044 CASE (2) ! 3 point line search
2045 SELECT CASE (line_search_count)
2046 CASE (1)
2047 new_direction = .false.
2048 pos(1) = 0.0_dp
2049 pos(2) = ds_min*0.8_dp
2050 CASE (2)
2051 new_direction = .false.
2052 IF (energy(2) .GT. energy(1)) THEN
2053 pos(3) = ds_min*0.7_dp
2054 ELSE
2055 pos(3) = ds_min*1.4_dp
2056 END IF
2057 CASE (3)
2058 new_direction = .true.
2059 xa = pos(1)
2060 xb = pos(2)
2061 xc = pos(3)
2062 fa = energy(1)
2063 fb = energy(2)
2064 fc = energy(3)
2065 nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2066 denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2067 IF (abs(denom) .LE. 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa))) THEN
2068 npos = xb
2069 ELSE
2070 npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2071 END IF
2072 val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2073 (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2074 (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2075 IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc) THEN ! OK, we go to a minimum
2076 ! we take a guard against too large steps
2077 pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2078 min(npos, maxval(pos(1:3))*4.0_dp))
2079 ELSE ! just take an extended step
2080 pos(4) = maxval(pos(1:3))*2.0_dp
2081 END IF
2082 END SELECT
2083 CASE (3) ! golden section hunt
2084 new_direction = .false.
2085 IF (line_search_count .EQ. 1) THEN
2086 lsl = 1
2087 lsr = 0
2088 lsm = 1
2089 pos(1) = 0.0_dp
2090 pos(2) = ds_min/gold_sec
2091 ELSE
2092 IF (line_search_count .EQ. 150) cpabort("Too many")
2093 IF (lsr .EQ. 0) THEN
2094 IF (energy(line_search_count - 1) .LT. energy(line_search_count)) THEN
2095 lsr = line_search_count
2096 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2097 ELSE
2098 lsl = lsm
2099 lsm = line_search_count
2100 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2101 END IF
2102 ELSE
2103 IF (pos(line_search_count) .LT. pos(lsm)) THEN
2104 IF (energy(line_search_count) .LT. energy(lsm)) THEN
2105 lsr = lsm
2106 lsm = line_search_count
2107 ELSE
2108 lsl = line_search_count
2109 END IF
2110 ELSE
2111 IF (energy(line_search_count) .LT. energy(lsm)) THEN
2112 lsl = lsm
2113 lsm = line_search_count
2114 ELSE
2115 lsr = line_search_count
2116 END IF
2117 END IF
2118 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
2119 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2120 ELSE
2121 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2122 END IF
2123 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr)) THEN
2124 new_direction = .true.
2125 END IF
2126 END IF ! lsr .eq. 0
2127 END IF ! first step
2128 END SELECT
2129 ! now go to the suggested point
2130 ds_min = pos(line_search_count + 1)
2131 ds = pos(line_search_count + 1) - pos(line_search_count)
2132 CALL cp_fm_scale_and_add(1.0_dp, matrix_a, ds, matrix_g_search)
2133 END DO
2134
2135 ! collect rotation matrix
2136 matrix_h%local_data = real(cmat_u%local_data, kind=dp)
2137 CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2138 CALL cp_fm_to_fm(matrix_t, matrix_r)
2139 CALL rotate_orbitals(matrix_r, vectors)
2140 CALL cp_fm_release(matrix_r)
2141
2142 CALL cp_fm_release(matrix_a)
2143 CALL cp_fm_release(matrix_g)
2144 CALL cp_fm_release(matrix_h)
2145 CALL cp_fm_release(matrix_t)
2146 CALL cp_fm_release(matrix_g_search)
2147 CALL cp_fm_release(matrix_g_old)
2148 CALL cp_cfm_release(cmat_a)
2149 CALL cp_cfm_release(cmat_u)
2150 CALL cp_cfm_release(cmat_r)
2151 CALL cp_cfm_release(cmat_t1)
2152 CALL cp_cfm_release(cmat_t2)
2153 CALL cp_cfm_release(cmat_b)
2154 CALL cp_cfm_release(cmat_m)
2155
2156 DEALLOCATE (evals, evals_exp, fval, fvald)
2157
2158 DO idim = 1, SIZE(c_zij)
2159 zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
2160 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2161 CALL cp_cfm_release(c_zij(idim))
2162 END DO
2163 DEALLOCATE (c_zij)
2164 DEALLOCATE (diag_z)
2165
2166 CALL timestop(handle)
2167
2168 END SUBROUTINE
2169
2170! **************************************************************************************************
2171!> \brief Parallel algorithm for jacobi rotations
2172!> \param weights ...
2173!> \param zij ...
2174!> \param vectors ...
2175!> \param para_env ...
2176!> \param max_iter ...
2177!> \param eps_localization ...
2178!> \param sweeps ...
2179!> \param out_each ...
2180!> \param target_time ...
2181!> \param start_time ...
2182!> \param restricted ...
2183!> \par History
2184!> use allgather for improved performance
2185!> \author MI (11.2009)
2186! **************************************************************************************************
2187 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2188 sweeps, out_each, target_time, start_time, restricted)
2189
2190 REAL(kind=dp), INTENT(IN) :: weights(:)
2191 TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2192 TYPE(mp_para_env_type), POINTER :: para_env
2193 INTEGER, INTENT(IN) :: max_iter
2194 REAL(kind=dp), INTENT(IN) :: eps_localization
2195 INTEGER :: sweeps
2196 INTEGER, INTENT(IN) :: out_each
2197 REAL(dp) :: target_time, start_time
2198 INTEGER :: restricted
2199
2200 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para'
2201
2202 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2203 nblock, nblock_max, ns_me, nstate, &
2204 output_unit
2205 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2206 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2207 REAL(kind=dp) :: xstate
2208 TYPE(cp_fm_type) :: rmat
2209 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2210
2211 CALL timeset(routinen, handle)
2212
2213 output_unit = cp_logger_get_default_io_unit()
2214
2215 NULLIFY (cz_ij_loc)
2216
2217 dim2 = SIZE(zij, 2)
2218
2219 CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2220 CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2221
2222 CALL cp_fm_get_info(rmat, nrow_global=nstate)
2223
2224 IF (restricted > 0) THEN
2225 IF (output_unit > 0) THEN
2226 WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2227 END IF
2228 nstate = nstate - restricted
2229 END IF
2230
2231 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2232 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2233 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2234 DO ip = 1, para_env%num_pe
2235 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2236 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2237 END DO
2238 nblock_max = 0
2239 DO ip = 0, para_env%num_pe - 1
2240 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2241 nblock_max = max(nblock_max, nblock)
2242 END DO
2243
2244 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2245 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2246 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2247 ALLOCATE (cz_ij_loc(dim2))
2248 DO idim = 1, dim2
2249 DO ip = 0, para_env%num_pe - 1
2250 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2251 CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2252 CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2253 IF (para_env%mepos == ip) THEN
2254 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2255 DO i = 1, nblock
2256 DO j = 1, nstate
2257 cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2258 END DO
2259 END DO
2260 END IF
2261 END DO ! ip
2262 END DO
2263 DEALLOCATE (z_ij_loc_re)
2264 DEALLOCATE (z_ij_loc_im)
2265
2266 ALLOCATE (rotmat(nstate, 2*nblock_max))
2267
2268 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2269 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2270 target_time=target_time, start_time=start_time)
2271
2272 ilow1 = ns_bound(para_env%mepos, 1)
2273 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2274 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2275 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2276 DO idim = 1, dim2
2277 DO ip = 0, para_env%num_pe - 1
2278 z_ij_loc_re = 0.0_dp
2279 z_ij_loc_im = 0.0_dp
2280 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2281 IF (ip == para_env%mepos) THEN
2282 ns_me = nblock
2283 DO i = 1, ns_me
2284 ii = ilow1 + i - 1
2285 DO j = 1, nstate
2286 z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i), dp)
2287 z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2288 END DO
2289 END DO
2290 END IF
2291 CALL para_env%bcast(z_ij_loc_re, ip)
2292 CALL para_env%bcast(z_ij_loc_im, ip)
2293 CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2294 CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2295 END DO ! ip
2296 END DO
2297
2298 DO ip = 0, para_env%num_pe - 1
2299 z_ij_loc_re = 0.0_dp
2300 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2301 IF (ip == para_env%mepos) THEN
2302 ns_me = nblock
2303 DO i = 1, ns_me
2304 ii = ilow1 + i - 1
2305 DO j = 1, nstate
2306 z_ij_loc_re(j, i) = rotmat(j, i)
2307 END DO
2308 END DO
2309 END IF
2310 CALL para_env%bcast(z_ij_loc_re, ip)
2311 CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2312 END DO
2313
2314 DEALLOCATE (z_ij_loc_re)
2315 DEALLOCATE (z_ij_loc_im)
2316 DO idim = 1, dim2
2317 DEALLOCATE (cz_ij_loc(idim)%c_array)
2318 END DO
2319 DEALLOCATE (cz_ij_loc)
2320
2321 CALL para_env%sync()
2322 CALL rotate_orbitals(rmat, vectors)
2323 CALL cp_fm_release(rmat)
2324
2325 DEALLOCATE (rotmat)
2326 DEALLOCATE (ns_bound)
2327
2328 CALL timestop(handle)
2329
2330 END SUBROUTINE jacobi_rot_para
2331
2332! **************************************************************************************************
2333!> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2334!> \param weights ...
2335!> \param czij ...
2336!> \param para_env ...
2337!> \param max_iter ...
2338!> \param rmat ...
2339!> \param tol_out ...
2340!> \author Soumya Ghosh (08/21)
2341! **************************************************************************************************
2342 SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2343
2344 REAL(kind=dp), INTENT(IN) :: weights(:)
2345 TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2346 TYPE(mp_para_env_type), POINTER :: para_env
2347 INTEGER, INTENT(IN) :: max_iter
2348 TYPE(cp_cfm_type), INTENT(IN) :: rmat
2349 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2350
2351 CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para_1'
2352
2353 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2354 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2355 nblock, nblock_max, ns_me, nstate, &
2356 sweeps
2357 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2358 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2359 REAL(kind=dp) :: xstate
2360 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2361
2362 CALL timeset(routinen, handle)
2363
2364 dim2 = SIZE(czij)
2365
2366 CALL cp_cfm_set_all(rmat, cmplx(0._dp, 0._dp, dp), cmplx(1._dp, 0._dp, dp))
2367
2368 CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2369
2370 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2371 xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2372 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2373 DO ip = 1, para_env%num_pe
2374 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2375 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2376 END DO
2377 nblock_max = 0
2378 DO ip = 0, para_env%num_pe - 1
2379 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2380 nblock_max = max(nblock_max, nblock)
2381 END DO
2382
2383 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2384 ALLOCATE (czij_array(nstate, nblock_max))
2385 ALLOCATE (cz_ij_loc(dim2))
2386 DO idim = 1, dim2
2387 DO ip = 0, para_env%num_pe - 1
2388 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2389 ! cfm --> allocatable
2390 CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2391 IF (para_env%mepos == ip) THEN
2392 ns_me = nblock
2393 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2394 DO i = 1, ns_me
2395 DO j = 1, nstate
2396 cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2397 END DO
2398 END DO
2399 END IF
2400 END DO ! ip
2401 END DO
2402 DEALLOCATE (czij_array)
2403
2404 ALLOCATE (rotmat(nstate, 2*nblock_max))
2405
2406 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2407 cz_ij_loc, rotmat, 0, tol_out=tol_out)
2408
2409 ilow1 = ns_bound(para_env%mepos, 1)
2410 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2411 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2412
2413 DO ip = 0, para_env%num_pe - 1
2414 z_ij_loc_re = 0.0_dp
2415 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2416 IF (ip == para_env%mepos) THEN
2417 ns_me = nblock
2418 DO i = 1, ns_me
2419 ii = ilow1 + i - 1
2420 DO j = 1, nstate
2421 z_ij_loc_re(j, i) = rotmat(j, i)
2422 END DO
2423 END DO
2424 END IF
2425 CALL para_env%bcast(z_ij_loc_re, ip)
2426 CALL cp_cfm_set_submatrix(rmat, cmplx(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2427 END DO
2428
2429 DEALLOCATE (z_ij_loc_re)
2430 DO idim = 1, dim2
2431 DEALLOCATE (cz_ij_loc(idim)%c_array)
2432 END DO
2433 DEALLOCATE (cz_ij_loc)
2434
2435 CALL para_env%sync()
2436
2437 DEALLOCATE (rotmat)
2438 DEALLOCATE (ns_bound)
2439
2440 CALL timestop(handle)
2441
2442 END SUBROUTINE jacobi_rot_para_1
2443
2444! **************************************************************************************************
2445!> \brief Parallel algorithm for jacobi rotations
2446!> \param weights ...
2447!> \param para_env ...
2448!> \param max_iter ...
2449!> \param sweeps ...
2450!> \param out_each ...
2451!> \param dim2 ...
2452!> \param nstate ...
2453!> \param nblock_max ...
2454!> \param ns_bound ...
2455!> \param cz_ij_loc ...
2456!> \param rotmat ...
2457!> \param output_unit ...
2458!> \param tol_out ...
2459!> \param eps_localization ...
2460!> \param target_time ...
2461!> \param start_time ...
2462!> \par History
2463!> split out to reuse with different input types
2464!> \author HF (05.2022)
2465! **************************************************************************************************
2466 SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2467 ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2468
2469 REAL(kind=dp), INTENT(IN) :: weights(:)
2470 TYPE(mp_para_env_type), POINTER :: para_env
2471 INTEGER, INTENT(IN) :: max_iter
2472 INTEGER, INTENT(OUT) :: sweeps
2473 INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2474 INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2475 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2476 REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2477 INTEGER, INTENT(IN) :: output_unit
2478 REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2479 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
2480 REAL(dp), OPTIONAL :: target_time, start_time
2481
2482 COMPLEX(KIND=dp) :: zi, zj
2483 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2484 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2485 INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2486 ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2487 iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2488 ns_recv_from, ns_recv_partner
2489 INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2490 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2491 LOGICAL :: should_stop
2492 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2493 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2494 REAL(kind=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2495 t2, theta, tolerance, zc, zr
2496 TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2497 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2498
2499 NULLIFY (zdiag_all, zdiag_me)
2500 NULLIFY (xyz_mix, xyz_mix_ns)
2501 NULLIFY (mii, mij, mjj)
2502
2503 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2504
2505 ALLOCATE (rcount(para_env%num_pe), stat=istat)
2506 ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2507
2508 tolerance = 1.0e10_dp
2509 sweeps = 0
2510
2511 ! number of processor pairs and number of permutations
2512 npair = (para_env%num_pe + 1)/2
2513 nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2514 ALLOCATE (list_pair(2, npair))
2515
2516 ! initialize rotation matrix
2517 rotmat = 0.0_dp
2518 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2519 ii = i - ns_bound(para_env%mepos, 1) + 1
2520 rotmat(i, ii) = 1.0_dp
2521 END DO
2522
2523 ALLOCATE (xyz_mix(dim2))
2524 ALLOCATE (xyz_mix_ns(dim2))
2525 ALLOCATE (zdiag_me(dim2))
2526 ALLOCATE (zdiag_all(dim2))
2527
2528 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2529 IF (ns_me /= 0) THEN
2530 ALLOCATE (c_array_me(nstate, ns_me, dim2))
2531 DO idim = 1, dim2
2532 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2533 END DO
2534 ALLOCATE (gmat(nstate, ns_me))
2535 END IF
2536
2537 DO idim = 1, dim2
2538 ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2539 zdiag_me(idim)%c_array = cmplx(0.0_dp, 0.0_dp, dp)
2540 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2541 zdiag_all(idim)%c_array = cmplx(0.0_dp, 0.0_dp, dp)
2542 END DO
2543 ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2544 ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2545
2546 ! buffer for message passing
2547 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2548
2549 IF (output_unit > 0) THEN
2550 WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2551 WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2552 END IF
2553
2554 DO lsweep = 1, max_iter + 1
2555 sweeps = lsweep
2556 IF (sweeps == max_iter + 1) THEN
2557 IF (output_unit > 0) THEN
2558 WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2559 WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2560 END IF
2561 EXIT
2562 END IF
2563 t1 = m_walltime()
2564
2565 DO iperm = 1, nperm
2566
2567 ! fix partners for this permutation, and get the number of states
2568 CALL eberlein(iperm, para_env, list_pair)
2569 ip_partner = -1
2570 ns_partner = 0
2571 DO ipair = 1, npair
2572 IF (list_pair(1, ipair) == para_env%mepos) THEN
2573 ip_partner = list_pair(2, ipair)
2574 EXIT
2575 ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2576 ip_partner = list_pair(1, ipair)
2577 EXIT
2578 END IF
2579 END DO
2580 IF (ip_partner >= 0) THEN
2581 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2582 ELSE
2583 ns_partner = 0
2584 END IF
2585
2586 ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2587 IF (ns_partner*ns_me /= 0) THEN
2588
2589 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2590 rmat_loc = 0.0_dp
2591 DO i = 1, ns_me + ns_partner
2592 rmat_loc(i, i) = 1.0_dp
2593 END DO
2594
2595 ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2596
2597 DO idim = 1, dim2
2598 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2599 DO i = 1, ns_me
2600 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2601 END DO
2602 END DO
2603
2604 CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2605 msgout=c_array_partner, source=ip_partner)
2606
2607 n1 = ns_me
2608 n2 = ns_partner
2609 ilow1 = ns_bound(para_env%mepos, 1)
2610 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2611 ilow2 = ns_bound(ip_partner, 1)
2612 iup2 = ns_bound(ip_partner, 1) + n2 - 1
2613 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2614 il1 = 1
2615 iu1 = n1
2616 iu1 = n1
2617 il2 = 1 + n1
2618 iu2 = n1 + n2
2619 ELSE
2620 il1 = 1 + n2
2621 iu1 = n1 + n2
2622 iu1 = n1 + n2
2623 il2 = 1
2624 iu2 = n2
2625 END IF
2626
2627 DO idim = 1, dim2
2628 DO i = 1, n1
2629 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2630 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2631 END DO
2632 DO i = 1, n2
2633 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2634 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2635 END DO
2636 END DO
2637
2638 DO istate = 1, n1 + n2
2639 DO jstate = istate + 1, n1 + n2
2640 DO idim = 1, dim2
2641 mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2642 mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2643 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2644 END DO
2645 CALL get_angle(mii, mjj, mij, weights, theta)
2646 st = sin(theta)
2647 ct = cos(theta)
2648 DO idim = 1, dim2
2649 DO i = 1, n1 + n2
2650 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2651 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2652 xyz_mix(idim)%c_array(i, istate) = zi
2653 xyz_mix(idim)%c_array(i, jstate) = zj
2654 END DO
2655 DO i = 1, n1 + n2
2656 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2657 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2658 xyz_mix(idim)%c_array(istate, i) = zi
2659 xyz_mix(idim)%c_array(jstate, i) = zj
2660 END DO
2661 END DO
2662
2663 DO i = 1, n1 + n2
2664 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2665 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2666 rmat_loc(i, istate) = ri
2667 rmat_loc(i, jstate) = rj
2668 END DO
2669 END DO
2670 END DO
2671
2672 k = nblock_max + 1
2673 CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2674 rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2675
2676 IF (ilow1 < ilow2) THEN
2677 ! no longer compiles in official sdgb:
2678 !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2679 ! probably inefficient:
2680 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2681 n2, 0.0_dp, gmat(:, :), nstate)
2682 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2683 n1 + n2, 1.0_dp, gmat(:, :), nstate)
2684 ELSE
2685 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2686 rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2687 ! no longer compiles in official sdgb:
2688 !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2689 ! probably inefficient:
2690 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2691 n1, 1.0_dp, gmat(:, :), nstate)
2692 END IF
2693
2694 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2695
2696 DO idim = 1, dim2
2697 DO i = 1, n1
2698 xyz_mix_ns(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp, dp)
2699 END DO
2700
2701 DO istate = 1, n1
2702 DO jstate = 1, nstate
2703 DO i = 1, n2
2704 xyz_mix_ns(idim)%c_array(jstate, istate) = &
2705 xyz_mix_ns(idim)%c_array(jstate, istate) + &
2706 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2707 END DO
2708 END DO
2709 END DO
2710 DO istate = 1, n1
2711 DO jstate = 1, nstate
2712 DO i = 1, n1
2713 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2714 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2715 END DO
2716 END DO
2717 END DO
2718 END DO ! idim
2719
2720 DEALLOCATE (c_array_partner)
2721
2722 ELSE ! save my data
2723 DO idim = 1, dim2
2724 DO i = 1, ns_me
2725 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2726 END DO
2727 END DO
2728 END IF
2729
2730 DO idim = 1, dim2
2731 DO i = 1, ns_me
2732 cz_ij_loc(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp, dp)
2733 END DO
2734 END DO
2735
2736 IF (ns_partner*ns_me /= 0) THEN
2737 ! transpose rotation matrix rmat_loc
2738 DO i = 1, ns_me + ns_partner
2739 DO j = i + 1, ns_me + ns_partner
2740 ri = rmat_loc(i, j)
2741 rmat_loc(i, j) = rmat_loc(j, i)
2742 rmat_loc(j, i) = ri
2743 END DO
2744 END DO
2745
2746 ! prepare for distribution
2747 DO i = 1, n1
2748 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2749 END DO
2750 ik = nblock_max
2751 DO i = 1, n2
2752 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2753 END DO
2754 ELSE
2755 rmat_send = 0.0_dp
2756 END IF
2757
2758 ! collect data from all tasks (this takes some significant time)
2759 CALL para_env%allgather(rmat_send, rmat_recv_all)
2760
2761 ! update blocks everywhere
2762 DO ip = 0, para_env%num_pe - 1
2763
2764 ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2765 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2766
2767 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2768
2769 IF (ns_me /= 0) THEN
2770 IF (ns_recv_from /= 0) THEN
2771 !look for the partner of ip_recv_from
2772 ip_recv_partner = -1
2773 ns_recv_partner = 0
2774 DO ipair = 1, npair
2775 IF (list_pair(1, ipair) == ip_recv_from) THEN
2776 ip_recv_partner = list_pair(2, ipair)
2777 EXIT
2778 ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2779 ip_recv_partner = list_pair(1, ipair)
2780 EXIT
2781 END IF
2782 END DO
2783
2784 IF (ip_recv_partner >= 0) THEN
2785 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2786 END IF
2787 IF (ns_recv_partner > 0) THEN
2788 il1 = ns_bound(para_env%mepos, 1)
2789 il_recv = ns_bound(ip_recv_from, 1)
2790 il_recv_partner = ns_bound(ip_recv_partner, 1)
2791 ik = nblock_max
2792
2793 DO idim = 1, dim2
2794 DO i = 1, ns_recv_from
2795 ii = il_recv + i - 1
2796 DO j = 1, ns_me
2797 jj = j
2798 DO k = 1, ns_recv_from
2799 kk = il_recv + k - 1
2800 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2801 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2802 END DO
2803 END DO
2804 END DO
2805 DO i = 1, ns_recv_from
2806 ii = il_recv + i - 1
2807 DO j = 1, ns_me
2808 jj = j
2809 DO k = 1, ns_recv_partner
2810 kk = il_recv_partner + k - 1
2811 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2812 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2813 END DO
2814 END DO
2815 END DO
2816 END DO ! idim
2817 ELSE
2818 il1 = ns_bound(para_env%mepos, 1)
2819 il_recv = ns_bound(ip_recv_from, 1)
2820 DO idim = 1, dim2
2821 DO j = 1, ns_me
2822 jj = j
2823 DO i = 1, ns_recv_from
2824 ii = il_recv + i - 1
2825 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2826 END DO
2827 END DO
2828 END DO ! idim
2829 END IF
2830 END IF
2831 END IF ! ns_me
2832 END DO ! ip
2833
2834 IF (ns_partner*ns_me /= 0) THEN
2835 DEALLOCATE (rmat_loc)
2836 DO idim = 1, dim2
2837 DEALLOCATE (xyz_mix(idim)%c_array)
2838 END DO
2839 END IF
2840
2841 END DO ! iperm
2842
2843 ! calculate the max gradient
2844 DO idim = 1, dim2
2845 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2846 ii = i - ns_bound(para_env%mepos, 1) + 1
2847 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2848 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2849 END DO
2850 rcount(:) = SIZE(zdiag_me(idim)%c_array)
2851 rdispl(1) = 0
2852 DO ip = 2, para_env%num_pe
2853 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2854 END DO
2855 ! collect all the diagonal elements in a replicated 1d array
2856 CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2857 END DO
2858
2859 gmax = 0.0_dp
2860 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2861 k = j - ns_bound(para_env%mepos, 1) + 1
2862 DO i = 1, j - 1
2863 ! find the location of the diagonal element (i,i)
2864 DO ip = 0, para_env%num_pe - 1
2865 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2866 ip_has_i = ip
2867 EXIT
2868 END IF
2869 END DO
2870 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2871 ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2872 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2873 grad = 0.0_dp
2874 DO idim = 1, dim2
2875 zi = zdiag_all(idim)%c_array(ii)
2876 zj = zdiag_all(idim)%c_array(jj)
2877 grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2878 END DO
2879 gmax = max(gmax, abs(grad))
2880 END DO
2881 END DO
2882
2883 CALL para_env%max(gmax)
2884 tolerance = gmax
2885 IF (PRESENT(tol_out)) tol_out = tolerance
2886
2887 func = 0.0_dp
2888 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2889 k = i - ns_bound(para_env%mepos, 1) + 1
2890 DO idim = 1, dim2
2891 zr = real(cz_ij_loc(idim)%c_array(i, k), dp)
2892 zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2893 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2894 END DO
2895 END DO
2896 CALL para_env%sum(func)
2897 t2 = m_walltime()
2898
2899 IF (output_unit > 0 .AND. modulo(sweeps, out_each) == 0) THEN
2900 WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2901 CALL m_flush(output_unit)
2902 END IF
2903 IF (PRESENT(eps_localization)) THEN
2904 IF (tolerance < eps_localization) EXIT
2905 END IF
2906 IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2907 CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2908 IF (should_stop) EXIT
2909 END IF
2910
2911 END DO ! lsweep
2912
2913 ! buffer for message passing
2914 DEALLOCATE (rmat_recv_all)
2915
2916 DEALLOCATE (rmat_recv)
2917 DEALLOCATE (rmat_send)
2918 IF (ns_me > 0) THEN
2919 DEALLOCATE (c_array_me)
2920 END IF
2921 DO idim = 1, dim2
2922 DEALLOCATE (zdiag_me(idim)%c_array)
2923 DEALLOCATE (zdiag_all(idim)%c_array)
2924 END DO
2925 DEALLOCATE (zdiag_me)
2926 DEALLOCATE (zdiag_all)
2927 DEALLOCATE (xyz_mix)
2928 DO idim = 1, dim2
2929 IF (ns_me /= 0) THEN
2930 DEALLOCATE (xyz_mix_ns(idim)%c_array)
2931 END IF
2932 END DO
2933 DEALLOCATE (xyz_mix_ns)
2934 IF (ns_me /= 0) THEN
2935 DEALLOCATE (gmat)
2936 END IF
2937 DEALLOCATE (mii)
2938 DEALLOCATE (mij)
2939 DEALLOCATE (mjj)
2940 DEALLOCATE (list_pair)
2941
2942 END SUBROUTINE jacobi_rot_para_core
2943
2944! **************************************************************************************************
2945!> \brief ...
2946!> \param iperm ...
2947!> \param para_env ...
2948!> \param list_pair ...
2949! **************************************************************************************************
2950 SUBROUTINE eberlein(iperm, para_env, list_pair)
2951 INTEGER, INTENT(IN) :: iperm
2952 TYPE(mp_para_env_type), POINTER :: para_env
2953 INTEGER, DIMENSION(:, :) :: list_pair
2954
2955 INTEGER :: i, ii, jj, npair
2956
2957 npair = (para_env%num_pe + 1)/2
2958 IF (iperm == 1) THEN
2959!..set up initial ordering
2960 DO i = 0, para_env%num_pe - 1
2961 ii = ((i + 1) + 1)/2
2962 jj = mod((i + 1) + 1, 2) + 1
2963 list_pair(jj, ii) = i
2964 END DO
2965 IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2966 ELSEIF (mod(iperm, 2) == 0) THEN
2967!..a type shift
2968 jj = list_pair(1, npair)
2969 DO i = npair, 3, -1
2970 list_pair(1, i) = list_pair(1, i - 1)
2971 END DO
2972 list_pair(1, 2) = list_pair(2, 1)
2973 list_pair(2, 1) = jj
2974 ELSE
2975!..b type shift
2976 jj = list_pair(2, 1)
2977 DO i = 1, npair - 1
2978 list_pair(2, i) = list_pair(2, i + 1)
2979 END DO
2980 list_pair(2, npair) = jj
2981 END IF
2982
2983 END SUBROUTINE eberlein
2984
2985! **************************************************************************************************
2986!> \brief ...
2987!> \param vectors ...
2988!> \param op_sm_set ...
2989!> \param zij_fm_set ...
2990! **************************************************************************************************
2991 SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
2992
2993 TYPE(cp_fm_type), INTENT(IN) :: vectors
2994 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
2995 TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
2996
2997 CHARACTER(len=*), PARAMETER :: routinen = 'zij_matrix'
2998
2999 INTEGER :: handle, i, j, nao, nmoloc
3000 TYPE(cp_fm_type) :: opvec
3001
3002 CALL timeset(routinen, handle)
3003
3004 ! get rows and cols of the input
3005 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3006 ! replicate the input kind of matrix
3007 CALL cp_fm_create(opvec, vectors%matrix_struct)
3008
3009 ! Compute zij here
3010 DO i = 1, SIZE(zij_fm_set, 2)
3011 DO j = 1, SIZE(zij_fm_set, 1)
3012 CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3013 CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3014 CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3015 zij_fm_set(j, i))
3016 END DO
3017 END DO
3018
3019 CALL cp_fm_release(opvec)
3020 CALL timestop(handle)
3021
3022 END SUBROUTINE zij_matrix
3023
3024! **************************************************************************************************
3025!> \brief ...
3026!> \param vectors ...
3027! **************************************************************************************************
3028 SUBROUTINE scdm_qrfact(vectors)
3029
3030 TYPE(cp_fm_type), INTENT(IN) :: vectors
3031
3032 CHARACTER(len=*), PARAMETER :: routinen = 'scdm_qrfact'
3033
3034 INTEGER :: handle, ncolt, nrowt
3035 REAL(kind=dp), DIMENSION(:), POINTER :: tau
3036 TYPE(cp_fm_struct_type), POINTER :: cstruct
3037 TYPE(cp_fm_type) :: ctp, qf, tmp
3038
3039 CALL timeset(routinen, handle)
3040
3041 ! Create Transpose of Coefficient Matrix vectors
3042 nrowt = vectors%matrix_struct%ncol_global
3043 ncolt = vectors%matrix_struct%nrow_global
3044
3045 CALL cp_fm_struct_create(cstruct, vectors%matrix_struct%para_env, &
3046 vectors%matrix_struct%context, nrowt, ncolt, vectors%matrix_struct%ncol_block, &
3047 vectors%matrix_struct%nrow_block, first_p_pos=vectors%matrix_struct%first_p_pos)
3048 CALL cp_fm_create(ctp, cstruct)
3049 CALL cp_fm_struct_release(cstruct)
3050
3051 ALLOCATE (tau(nrowt))
3052
3053 CALL cp_fm_transpose(vectors, ctp)
3054
3055 ! Get QR decomposition of CTs
3056 CALL cp_fm_pdgeqpf(ctp, tau, nrowt, ncolt, 1, 1)
3057
3058 ! Construction of Q from the scalapack output
3059 CALL cp_fm_struct_create(cstruct, para_env=ctp%matrix_struct%para_env, &
3060 context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3061 ncol_global=ctp%matrix_struct%nrow_global)
3062 CALL cp_fm_create(qf, cstruct)
3063 CALL cp_fm_struct_release(cstruct)
3064 CALL cp_fm_to_fm_submat(ctp, qf, nrowt, nrowt, 1, 1, 1, 1)
3065
3066 ! Get Q
3067 CALL cp_fm_pdorgqr(qf, tau, nrowt, 1, 1)
3068
3069 ! Transform original coefficient matrix vectors
3070 CALL cp_fm_create(tmp, vectors%matrix_struct)
3071 CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3072 CALL cp_fm_to_fm(vectors, tmp)
3073 CALL parallel_gemm('N', 'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3074
3075 ! Cleanup
3076 CALL cp_fm_release(ctp)
3077 CALL cp_fm_release(tmp)
3078 CALL cp_fm_release(qf)
3079 DEALLOCATE (tau)
3080
3081 CALL timestop(handle)
3082
3083 END SUBROUTINE scdm_qrfact
3084
3085END MODULE qs_localization_methods
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
Definition cell_types.F:15
methods related to the blacs parallel environment
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
Definition cp_cfm_diag.F:14
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Definition cp_cfm_diag.F:52
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
basic linear algebra operations for full matrices
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
Definition cp_fm_diag.F:413
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:106
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:123
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
real(kind=dp), parameter, public twopi
Routines for calculating a complex matrix exponential.
Definition matrix_exp.F:13
subroutine, public exp_pade_real(exp_h, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
Definition matrix_exp.F:570
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
Definition matrix_exp.F:244
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public approx_l1_norm_sd(c, iterations, eps, converged, sweeps)
...
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.
Definition xc.F:17
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Represent a complex full matrix.
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment