(git:ccc2433)
qs_localization_methods.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Localization methods such as 2x2 Jacobi rotations
10 !> Steepest Decents
11 !> Conjugate Gradient
12 !> \par History
13 !> Initial parallellization of jacobi (JVDV 07.2003)
14 !> direct minimization using exponential parametrization (JVDV 09.2003)
15 !> crazy rotations go fast (JVDV 10.2003)
16 !> \author CJM (04.2003)
17 ! **************************************************************************************************
19  USE cell_types, ONLY: cell_type
20  USE cp_blacs_env, ONLY: cp_blacs_env_type
24  cp_cfm_scale,&
28  USE cp_cfm_diag, ONLY: cp_cfm_heevd
29  USE cp_cfm_types, ONLY: &
31  cp_cfm_set_all, cp_cfm_set_submatrix, cp_cfm_to_cfm, cp_cfm_to_fm, cp_cfm_type, &
38  cp_fm_scale,&
40  cp_fm_trace,&
44  USE cp_fm_diag, ONLY: cp_fm_syevd
48  cp_fm_struct_type
49  USE cp_fm_types, ONLY: &
52  cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type
55  USE dbcsr_api, ONLY: dbcsr_p_type
56  USE kahan_sum, ONLY: accurate_sum
57  USE kinds, ONLY: dp
58  USE machine, ONLY: m_flush,&
60  USE mathconstants, ONLY: pi,&
61  twopi
62  USE matrix_exp, ONLY: exp_pade_real,&
64  USE message_passing, ONLY: mp_para_env_type
65  USE parallel_gemm_api, ONLY: parallel_gemm
66 #include "./base/base_uses.f90"
67 
68  IMPLICIT NONE
72 
73  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
74 
75  PRIVATE
76 
77  TYPE set_c_1d_type
78  COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array
79  END TYPE
80  TYPE set_c_2d_type
81  COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array
82  END TYPE
83 
84 CONTAINS
85 ! **************************************************************************************************
86 !> \brief ...
87 !> \param C ...
88 !> \param iterations ...
89 !> \param eps ...
90 !> \param converged ...
91 !> \param sweeps ...
92 ! **************************************************************************************************
93  SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
94  TYPE(cp_fm_type), INTENT(IN) :: c
95  INTEGER, INTENT(IN) :: iterations
96  REAL(kind=dp), INTENT(IN) :: eps
97  LOGICAL, INTENT(INOUT) :: converged
98  INTEGER, INTENT(INOUT) :: sweeps
99 
100  CHARACTER(len=*), PARAMETER :: routinen = 'approx_l1_norm_sd'
101  INTEGER, PARAMETER :: taylor_order = 100
102  REAL(kind=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
103 
104  INTEGER :: handle, i, istep, k, n, ncol_local, &
105  nrow_local, output_unit, p
106  REAL(kind=dp) :: expfactor, f2, f2old, gnorm, tnorm
107  TYPE(cp_blacs_env_type), POINTER :: context
108  TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
109  TYPE(cp_fm_type) :: ctmp, g, gp1, gp2, u
110  TYPE(mp_para_env_type), POINTER :: para_env
111 
112  CALL timeset(routinen, handle)
113 
114  NULLIFY (context, para_env, fm_struct_k_k)
115 
116  output_unit = cp_logger_get_default_io_unit()
117 
118  CALL cp_fm_struct_get(c%matrix_struct, nrow_global=n, ncol_global=k, &
119  nrow_local=nrow_local, ncol_local=ncol_local, &
120  para_env=para_env, context=context)
121  CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
122  nrow_global=k, ncol_global=k)
123  CALL cp_fm_create(ctmp, c%matrix_struct)
124  CALL cp_fm_create(u, fm_struct_k_k)
125  CALL cp_fm_create(g, fm_struct_k_k)
126  CALL cp_fm_create(gp1, fm_struct_k_k)
127  CALL cp_fm_create(gp2, fm_struct_k_k)
128  !
129  ! printing
130  IF (output_unit > 0) THEN
131  WRITE (output_unit, '(1X)')
132  WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
133  WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
134  WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
135  WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
136  WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
137  WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
138  WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
139  END IF
140  !
141  f2old = 0.0_dp
142  converged = .false.
143  !
144  ! Start the steepest descent
145  DO istep = 1, iterations
146  !
147  !-------------------------------------------------------------------
148  ! compute f_2
149  ! f_2(x)=(x^2+eps)^1/2
150  f2 = 0.0_dp
151  DO p = 1, ncol_local ! p
152  DO i = 1, nrow_local ! i
153  f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
154  END DO
155  END DO
156  CALL c%matrix_struct%para_env%sum(f2)
157  !write(*,*) 'qs_localize: f_2=',f2
158  !-------------------------------------------------------------------
159  ! compute the derivative of f_2
160  ! f_2(x)=(x^2+eps)^1/2
161  DO p = 1, ncol_local ! p
162  DO i = 1, nrow_local ! i
163  ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
164  END DO
165  END DO
166  CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
167  ! antisymmetrize
168  CALL cp_fm_transpose(g, u)
169  CALL cp_fm_scale_and_add(-0.5_dp, g, 0.5_dp, u)
170  !
171  !-------------------------------------------------------------------
172  !
173  gnorm = cp_fm_frobenius_norm(g)
174  !write(*,*) 'qs_localize: norm(G)=',gnorm
175  !
176  ! rescale for steepest descent
177  CALL cp_fm_scale(-alpha, g)
178  !
179  ! compute unitary transform
180  ! zeroth order
181  CALL cp_fm_set_all(u, 0.0_dp, 1.0_dp)
182  ! first order
183  expfactor = 1.0_dp
184  CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, g)
185  tnorm = cp_fm_frobenius_norm(g)
186  !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
187  IF (tnorm .GT. 1.0e-10_dp) THEN
188  ! other orders
189  CALL cp_fm_to_fm(g, gp1)
190  DO i = 2, taylor_order
191  ! new power of G
192  CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
193  CALL cp_fm_to_fm(gp2, gp1)
194  ! add to the taylor expansion so far
195  expfactor = expfactor/real(i, kind=dp)
196  CALL cp_fm_scale_and_add(1.0_dp, u, expfactor, gp1)
197  tnorm = cp_fm_frobenius_norm(gp1)
198  !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
199  IF (tnorm*expfactor .LT. 1.0e-10_dp) EXIT
200  END DO
201  END IF
202  !
203  ! incrementaly rotate the MOs
204  CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
205  CALL cp_fm_to_fm(ctmp, c)
206  !
207  ! printing
208  IF (output_unit .GT. 0) THEN
209  WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
210  END IF
211  !
212  ! Are we done?
213  sweeps = istep
214  !IF(gnorm.LE.grad_thresh.AND.ABS((f2-f2old)/f2).LE.f2_thresh.AND.istep.GT.1) THEN
215  IF (abs((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1) THEN
216  converged = .true.
217  EXIT
218  END IF
219  f2old = f2
220  END DO
221  !
222  ! here we should do one refine step to enforce C'*S*C=1 for any case
223  !
224  ! Print the final result
225  IF (output_unit .GT. 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
226  !
227  ! sparsity
228  !DO i=1,size(thresh,1)
229  ! gnorm = 0.0_dp
230  ! DO o=1,ncol_local
231  ! DO p=1,nrow_local
232  ! IF(ABS(C%local_data(p,o)).GT.thresh(i)) THEN
233  ! gnorm = gnorm + 1.0_dp
234  ! ENDIF
235  ! ENDDO
236  ! ENDDO
237  ! CALL C%matrix_struct%para_env%sum(gnorm)
238  ! IF(output_unit.GT.0) THEN
239  ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
240  ! ENDIF
241  !ENDDO
242  !
243  ! deallocate
244  CALL cp_fm_struct_release(fm_struct_k_k)
245  CALL cp_fm_release(ctmp)
246  CALL cp_fm_release(u)
247  CALL cp_fm_release(g)
248  CALL cp_fm_release(gp1)
249  CALL cp_fm_release(gp2)
250 
251  CALL timestop(handle)
252 
253  END SUBROUTINE approx_l1_norm_sd
254 ! **************************************************************************************************
255 !> \brief ...
256 !> \param cell ...
257 !> \param weights ...
258 ! **************************************************************************************************
259  SUBROUTINE initialize_weights(cell, weights)
260 
261  TYPE(cell_type), POINTER :: cell
262  REAL(kind=dp), DIMENSION(:) :: weights
263 
264  REAL(kind=dp), DIMENSION(3, 3) :: metric
265 
266  cpassert(ASSOCIATED(cell))
267 
268  metric = 0.0_dp
269  CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
270 
271  weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
272  weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
273  weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
274  weights(4) = metric(1, 2)
275  weights(5) = metric(1, 3)
276  weights(6) = metric(2, 3)
277 
278  END SUBROUTINE initialize_weights
279 
280 ! **************************************************************************************************
281 !> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
282 !> can deal with serial para_envs.
283 !> \param weights ...
284 !> \param zij ...
285 !> \param vectors ...
286 !> \param para_env ...
287 !> \param max_iter ...
288 !> \param eps_localization ...
289 !> \param sweeps ...
290 !> \param out_each ...
291 !> \param target_time ...
292 !> \param start_time ...
293 !> \param restricted ...
294 !> \par History
295 !> \author Joost VandeVondele (02.2010)
296 ! **************************************************************************************************
297  SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
298  eps_localization, sweeps, out_each, target_time, start_time, restricted)
299 
300  REAL(kind=dp), INTENT(IN) :: weights(:)
301  TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
302  TYPE(mp_para_env_type), POINTER :: para_env
303  INTEGER, INTENT(IN) :: max_iter
304  REAL(kind=dp), INTENT(IN) :: eps_localization
305  INTEGER :: sweeps
306  INTEGER, INTENT(IN) :: out_each
307  REAL(dp) :: target_time, start_time
308  INTEGER :: restricted
309 
310  IF (para_env%num_pe == 1) THEN
311  CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
312  sweeps, out_each, restricted=restricted)
313  ELSE
314  CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
315  sweeps, out_each, target_time, start_time, restricted=restricted)
316  END IF
317 
318  END SUBROUTINE jacobi_rotations
319 
320 ! **************************************************************************************************
321 !> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
322 !> while the routine below works in parallel, it is too slow to be useful
323 !> \param weights ...
324 !> \param zij ...
325 !> \param vectors ...
326 !> \param max_iter ...
327 !> \param eps_localization ...
328 !> \param sweeps ...
329 !> \param out_each ...
330 !> \param restricted ...
331 ! **************************************************************************************************
332  SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
333  out_each, restricted)
334  REAL(kind=dp), INTENT(IN) :: weights(:)
335  TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
336  INTEGER, INTENT(IN) :: max_iter
337  REAL(kind=dp), INTENT(IN) :: eps_localization
338  INTEGER :: sweeps
339  INTEGER, INTENT(IN) :: out_each
340  INTEGER :: restricted
341 
342  CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial'
343 
344  COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
345  INTEGER :: dim2, handle, idim, istate, jstate, &
346  nstate, unit_nr
347  REAL(kind=dp) :: ct, st, t1, t2, theta, tolerance
348  TYPE(cp_cfm_type) :: c_rmat
349  TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
350  TYPE(cp_fm_type) :: rmat
351 
352  CALL timeset(routinen, handle)
353 
354  dim2 = SIZE(zij, 2)
355  ALLOCATE (c_zij(dim2))
356  NULLIFY (mii, mij, mjj)
357  ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
358 
359  CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
360  CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
361 
362  CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
363  CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
364  DO idim = 1, dim2
365  CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
366  c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
367  zij(2, idim)%local_data, dp)
368  END DO
369 
370  CALL cp_fm_get_info(rmat, nrow_global=nstate)
371  tolerance = 1.0e10_dp
372 
373  sweeps = 0
374  unit_nr = -1
375  IF (rmat%matrix_struct%para_env%is_source()) THEN
377  WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
378  END IF
379 
380  IF (restricted > 0) THEN
382  WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
383  nstate = nstate - restricted
384  END IF
385 
386  ! do jacobi sweeps until converged
387  DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
388  sweeps = sweeps + 1
389  t1 = m_walltime()
390 
391  DO istate = 1, nstate
392  DO jstate = istate + 1, nstate
393  DO idim = 1, dim2
394  CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
395  CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
396  CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
397  END DO
398  CALL get_angle(mii, mjj, mij, weights, theta)
399  st = sin(theta)
400  ct = cos(theta)
401  CALL rotate_zij(istate, jstate, st, ct, c_zij)
402 
403  CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
404  END DO
405  END DO
406 
407  CALL check_tolerance(c_zij, weights, tolerance)
408 
409  t2 = m_walltime()
410  IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
411  WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
412  "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
413  CALL m_flush(unit_nr)
414  END IF
415 
416  END DO
417 
418  DO idim = 1, dim2
419  zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
420  zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
421  CALL cp_cfm_release(c_zij(idim))
422  END DO
423  DEALLOCATE (c_zij)
424  DEALLOCATE (mii, mij, mjj)
425  rmat%local_data = real(c_rmat%local_data, dp)
426  CALL cp_cfm_release(c_rmat)
427  CALL rotate_orbitals(rmat, vectors)
428  CALL cp_fm_release(rmat)
429 
430  CALL timestop(handle)
431 
432  END SUBROUTINE jacobi_rotations_serial
433 ! **************************************************************************************************
434 !> \brief very similar to jacobi_rotations_serial with some extra output options
435 !> \param weights ...
436 !> \param c_zij ...
437 !> \param max_iter ...
438 !> \param c_rmat ...
439 !> \param eps_localization ...
440 !> \param tol_out ...
441 !> \param jsweeps ...
442 !> \param out_each ...
443 !> \param c_zij_out ...
444 !> \param grad_final ...
445 ! **************************************************************************************************
446  SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
447  tol_out, jsweeps, out_each, c_zij_out, grad_final)
448  REAL(kind=dp), INTENT(IN) :: weights(:)
449  TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
450  INTEGER, INTENT(IN) :: max_iter
451  TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
452  REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
453  REAL(kind=dp), INTENT(OUT), OPTIONAL :: tol_out
454  INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
455  INTEGER, INTENT(IN), OPTIONAL :: out_each
456  TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
457  TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
458 
459  CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rotations_serial_1'
460 
461  COMPLEX(KIND=dp) :: mzii
462  COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
463  INTEGER :: dim2, handle, idim, istate, jstate, &
464  nstate, sweeps, unit_nr
465  REAL(kind=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
466  sum_spread_ii, t1, t2, theta, tolerance
467  TYPE(cp_cfm_type) :: c_rmat_local
468  TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
469 
470  CALL timeset(routinen, handle)
471 
472  dim2 = SIZE(c_zij)
473  NULLIFY (mii, mij, mjj)
474  ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
475 
476  ALLOCATE (c_zij_local(dim2))
477  CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
478  CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
479  DO idim = 1, dim2
480  CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
481  c_zij_local(idim)%local_data = c_zij(idim)%local_data
482  END DO
483 
484  CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
485  tolerance = 1.0e10_dp
486 
487  IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
488 
489  sweeps = 0
490  IF (PRESENT(out_each)) THEN
491  unit_nr = -1
492  IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
494  END IF
495  alpha = 0.0_dp
496  DO idim = 1, dim2
497  alpha = alpha + weights(idim)
498  END DO
499  END IF
500 
501  ! do jacobi sweeps until converged
502  DO WHILE (sweeps < max_iter)
503  sweeps = sweeps + 1
504  IF (PRESENT(eps_localization)) THEN
505  IF (tolerance < eps_localization) EXIT
506  END IF
507  IF (PRESENT(out_each)) t1 = m_walltime()
508 
509  DO istate = 1, nstate
510  DO jstate = istate + 1, nstate
511  DO idim = 1, dim2
512  CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
513  CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
514  CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
515  END DO
516  CALL get_angle(mii, mjj, mij, weights, theta)
517  st = sin(theta)
518  ct = cos(theta)
519  CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
520 
521  CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
522  END DO
523  END DO
524 
525  IF (PRESENT(grad_final)) THEN
526  CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
527  ELSE
528  CALL check_tolerance(c_zij_local, weights, tolerance)
529  END IF
530  IF (PRESENT(tol_out)) tol_out = tolerance
531 
532  IF (PRESENT(out_each)) THEN
533  t2 = m_walltime()
534  IF (unit_nr > 0 .AND. modulo(sweeps, out_each) == 0) THEN
535  sum_spread_ii = 0.0_dp
536  DO istate = 1, nstate
537  spread_ii = 0.0_dp
538  DO idim = 1, dim2
539  CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
540  spread_ii = spread_ii + weights(idim)* &
541  abs(mzii)**2/twopi/twopi
542  END DO
543  sum_spread_ii = sum_spread_ii + spread_ii
544  END DO
545  sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
546  avg_spread_ii = sum_spread_ii/nstate
547  WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
548  "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
549  WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
550  sweeps, avg_spread_ii, tolerance, t2 - t1
551  CALL m_flush(unit_nr)
552  END IF
553  IF (PRESENT(jsweeps)) jsweeps = sweeps
554  END IF
555 
556  END DO
557 
558  IF (PRESENT(c_zij_out)) THEN
559  DO idim = 1, dim2
560  CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
561  END DO
562  END IF
563  CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
564 
565  DEALLOCATE (mii, mij, mjj)
566  DO idim = 1, dim2
567  CALL cp_cfm_release(c_zij_local(idim))
568  END DO
569  DEALLOCATE (c_zij_local)
570  CALL cp_cfm_release(c_rmat_local)
571 
572  CALL timestop(handle)
573 
574  END SUBROUTINE jacobi_rotations_serial_1
575 ! **************************************************************************************************
576 !> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
577 !> for partially occupied wannier functions
578 !> \param para_env ...
579 !> \param weights ...
580 !> \param zij ...
581 !> \param vectors ...
582 !> \param max_iter ...
583 !> \param eps_localization ...
584 !> \param iter ...
585 !> \param out_each ...
586 !> \param nextra ...
587 !> \param do_cg ...
588 !> \param nmo ...
589 !> \param vectors_2 ...
590 !> \param mos_guess ...
591 ! **************************************************************************************************
592  SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
593  iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
594  TYPE(mp_para_env_type), POINTER :: para_env
595  REAL(kind=dp), INTENT(IN) :: weights(:)
596  TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
597  INTEGER, INTENT(IN) :: max_iter
598  REAL(kind=dp), INTENT(IN) :: eps_localization
599  INTEGER :: iter
600  INTEGER, INTENT(IN) :: out_each, nextra
601  LOGICAL, INTENT(IN) :: do_cg
602  INTEGER, INTENT(IN), OPTIONAL :: nmo
603  TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
604 
605  CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_cg_edf_ls'
606  COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
607  czero = (0.0_dp, 0.0_dp)
608  REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
609 
610  COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
611  COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
612  COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
613  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
614  INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
615  lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
616  INTEGER, DIMENSION(1) :: iloc
617  LOGICAL :: do_cinit_mo, do_cinit_random, &
618  do_u_guess_mo, new_direction
619  REAL(kind=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
620  norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
621  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
622  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
623  REAL(kind=dp), DIMENSION(50) :: energy, pos
624  REAL(kind=dp), DIMENSION(:), POINTER :: tmp_arr
625  TYPE(cp_blacs_env_type), POINTER :: context
626  TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
627  grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
628  tmp_cfm_2, u, ul, v, vl, zdiag
629  TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
630  TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
631  TYPE(cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
632  matrix_v_all, rmat, tmp_fm, vectors_all
633 
634  CALL timeset(routinen, handle)
635 
636  dim2 = SIZE(zij, 2)
637  NULLIFY (context)
638  NULLIFY (matrix_zii, arr_zii)
639  NULLIFY (tmp_fm_struct)
640  NULLIFY (tmp_arr)
641 
642  ALLOCATE (c_zij(dim2))
643 
644  CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
645 
646  ALLOCATE (sum_spread(nstate))
647  ALLOCATE (matrix_zii(nstate, dim2))
648  matrix_zii = czero
649  sum_spread = 0.0_dp
650 
651  alpha = 0.0_dp
652  DO idim = 1, dim2
653  alpha = alpha + weights(idim)
654  CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
655  c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
656  zij(2, idim)%local_data, dp)
657  END DO
658 
659  ALLOCATE (zij_0(dim2))
660 
661  CALL cp_cfm_create(u, zij(1, 1)%matrix_struct)
662  CALL cp_fm_create(matrix_u, zij(1, 1)%matrix_struct)
663 
664  CALL cp_cfm_set_all(u, czero, cone)
665  CALL cp_fm_set_all(matrix_u, 0.0_dp, 1.0_dp)
666 
667  CALL cp_fm_get_info(vectors, nrow_global=nao)
668  IF (nextra > 0) THEN
669  IF (PRESENT(mos_guess)) THEN
670  do_cinit_random = .false.
671  do_cinit_mo = .true.
672  CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
673  ELSE
674  do_cinit_random = .true.
675  do_cinit_mo = .false.
676  ndummy = nstate
677  END IF
678 
679  IF (do_cinit_random) THEN
680  icinit = 1
681  do_u_guess_mo = .false.
682  ELSEIF (do_cinit_mo) THEN
683  icinit = 2
684  do_u_guess_mo = .true.
685  END IF
686 
687  nocc = nstate - nextra
688  northo = nmo - nocc
689  norextra = nmo - nstate
690  CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
691 
692  ALLOCATE (tmp_cmat(nstate, nstate))
693  CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
694  para_env=para_env, context=context)
695  DO idim = 1, dim2
696  CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
697  CALL cp_cfm_set_all(zij_0(idim), czero, cone)
698  CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
699  CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
700  END DO
701  CALL cp_fm_struct_release(tmp_fm_struct)
702  DEALLOCATE (tmp_cmat)
703 
704  CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
705  para_env=para_env, context=context)
706  CALL cp_cfm_create(v, tmp_fm_struct)
707  CALL cp_fm_create(matrix_v, tmp_fm_struct)
708  CALL cp_cfm_create(zdiag, tmp_fm_struct)
709  CALL cp_fm_create(rmat, tmp_fm_struct)
710  CALL cp_fm_struct_release(tmp_fm_struct)
711  CALL cp_cfm_set_all(v, czero, cone)
712  CALL cp_fm_set_all(matrix_v, 0.0_dp, 1.0_dp)
713 
714  CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
715  para_env=para_env, context=context)
716  CALL cp_fm_create(matrix_v_all, tmp_fm_struct)
717  CALL cp_fm_struct_release(tmp_fm_struct)
718  CALL cp_fm_set_all(matrix_v_all, 0._dp, 1._dp)
719 
720  ALLOCATE (arr_zii(nstate))
721 
722  CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
723  para_env=para_env, context=context)
724  CALL cp_cfm_create(c_tilde, tmp_fm_struct)
725  CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
726  CALL cp_cfm_create(gct_old, tmp_fm_struct)
727  CALL cp_cfm_create(skc, tmp_fm_struct)
728  CALL cp_fm_struct_release(tmp_fm_struct)
729  CALL cp_cfm_set_all(c_tilde, czero)
730  CALL cp_cfm_set_all(gct_old, czero)
731  CALL cp_cfm_set_all(skc, czero)
732 
733  CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
734  para_env=para_env, context=context)
735  CALL cp_cfm_create(vl, tmp_fm_struct)
736  CALL cp_cfm_set_all(vl, czero)
737  CALL cp_fm_struct_release(tmp_fm_struct)
738 
739  CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
740  para_env=para_env, context=context)
741  CALL cp_fm_create(id_nextra, tmp_fm_struct)
742  CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
743  CALL cp_fm_struct_release(tmp_fm_struct)
744  CALL cp_cfm_set_all(ctrans_lambda, czero)
745  CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
746 
747  CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
748  para_env=para_env, context=context)
749  CALL cp_cfm_create(ul, tmp_fm_struct)
750  CALL cp_fm_struct_release(tmp_fm_struct)
751  CALL cp_cfm_set_all(ul, czero)
752 
753  CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
754  para_env=para_env, context=context)
755  CALL cp_fm_create(vectors_all, tmp_fm_struct)
756  CALL cp_fm_struct_release(tmp_fm_struct)
757  ALLOCATE (tmp_mat(nao, nstate))
758  CALL cp_fm_get_submatrix(vectors, tmp_mat)
759  CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
760  DEALLOCATE (tmp_mat)
761  ALLOCATE (tmp_mat(nao, norextra))
762  CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
763  CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
764  DEALLOCATE (tmp_mat)
765 
766  ! initialize c_tilde
767  SELECT CASE (icinit)
768  CASE (1) ! random coefficients
769  !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
770  CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
771  CALL cp_fm_init_random(tmp_fm, nextra)
772  CALL ortho_vectors(tmp_fm)
773  c_tilde%local_data = tmp_fm%local_data
774  CALL cp_fm_release(tmp_fm)
775  ALLOCATE (tmp_cmat(northo, nextra))
776  CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
777  CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
778  DEALLOCATE (tmp_cmat)
779  CASE (2) ! MO based coeffs
780  CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
781  ALLOCATE (tmp_arr(nmo))
782  ALLOCATE (tmp_mat(nmo, ndummy))
783  ALLOCATE (tmp_mat_1(nmo, nstate))
784  ! normalize matrix_V_all
785  CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat)
786  DO istate = 1, ndummy
787  tmp_arr(:) = tmp_mat(:, istate)
788  norm = norm2(tmp_arr)
789  tmp_arr(:) = tmp_arr(:)/norm
790  tmp_mat(:, istate) = tmp_arr(:)
791  END DO
792  CALL cp_fm_set_submatrix(matrix_v_all, tmp_mat)
793  CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat_1, 1, 1, nmo, nstate)
794  CALL cp_fm_set_submatrix(matrix_v, tmp_mat_1)
795  DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
796  CALL cp_fm_to_cfm(msourcer=matrix_v, mtarget=v)
797  ALLOCATE (tmp_mat(northo, ndummy))
798  ALLOCATE (tmp_mat_1(northo, nextra))
799  CALL cp_fm_get_submatrix(matrix_v_all, tmp_mat, nocc + 1, 1, northo, ndummy)
800  ALLOCATE (tmp_arr(ndummy))
801  tmp_arr = 0.0_dp
802  DO istate = 1, ndummy
803  tmp_arr(istate) = norm2(tmp_mat(:, istate))
804  END DO
805  ! find edfs
806  DO istate = 1, nextra
807  iloc = maxloc(tmp_arr)
808  tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
809  tmp_arr(iloc(1)) = 0.0_dp
810  END DO
811 
812  DEALLOCATE (tmp_arr, tmp_mat)
813 
814  CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
815  para_env=para_env, context=context)
816  CALL cp_fm_create(tmp_fm, tmp_fm_struct)
817  CALL cp_fm_struct_release(tmp_fm_struct)
818  CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
819  DEALLOCATE (tmp_mat_1)
820  CALL ortho_vectors(tmp_fm)
821  CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
822  CALL cp_fm_release(tmp_fm)
823  ! initialize U
824  IF (do_u_guess_mo) THEN
825  ALLOCATE (tmp_cmat(nocc, nstate))
826  CALL cp_cfm_get_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
827  CALL cp_cfm_set_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
828  DEALLOCATE (tmp_cmat)
829  ALLOCATE (tmp_cmat(northo, nstate))
830  CALL cp_cfm_get_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
831  CALL cp_cfm_set_submatrix(vl, tmp_cmat, 1, 1, northo, nstate)
832  DEALLOCATE (tmp_cmat)
833  CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
834  ALLOCATE (tmp_cmat(nextra, nstate))
835  CALL cp_cfm_get_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
836  CALL cp_cfm_set_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
837  DEALLOCATE (tmp_cmat)
838  CALL cp_fm_create(tmp_fm, u%matrix_struct)
839  tmp_fm%local_data = real(u%local_data, kind=dp)
840  CALL ortho_vectors(tmp_fm)
841  CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=u)
842  CALL cp_fm_release(tmp_fm)
843  CALL cp_cfm_to_fm(u, matrix_u)
844  END IF
845  ! reevaluate V
846  ALLOCATE (tmp_cmat(nocc, nstate))
847  CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
848  CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
849  DEALLOCATE (tmp_cmat)
850  ALLOCATE (tmp_cmat(nextra, nstate))
851  CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
852  CALL cp_cfm_set_submatrix(ul, tmp_cmat, 1, 1, nextra, nstate)
853  DEALLOCATE (tmp_cmat)
854  CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
855  ALLOCATE (tmp_cmat(northo, nstate))
856  CALL cp_cfm_get_submatrix(vl, tmp_cmat)
857  CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
858  DEALLOCATE (tmp_cmat)
859  END SELECT
860  ELSE
861  DO idim = 1, dim2
862  CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
863  CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
864  END DO
865  CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
866  CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
867  END IF
868 
869  unit_nr = -1
870  IF (rmat%matrix_struct%para_env%is_source()) THEN
872  WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
873  END IF
874 
875  norm2_old = 1.0e30_dp
876  ds_min = 1.0_dp
877  new_direction = .true.
878  iter = 0
879  line_searches = 0
880  line_search_count = 0
881  tol = 1.0e+20_dp
882  mintol = 1.0e+10_dp
883  miniter = 0
884 
885  !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
886  ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
887 
888  ! do conjugate gradient until converged
889  DO WHILE (iter < max_iter)
890  iter = iter + 1
891  !WRITE(*,*) 'iter = ', iter
892  t1 = m_walltime()
893 
894  IF (iter > 1) THEN
895  ! comput U
896  CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
897  CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
898  IF (para_env%num_pe == 1) THEN
899  CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
900  ELSE
901  CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
902  END IF
903  CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
904  CALL cp_cfm_to_cfm(tmp_cfm, u)
905  CALL cp_cfm_release(tmp_cfm)
906  CALL cp_cfm_release(tmp_cfm_2)
907  END IF
908 
909  IF (nextra > 0) THEN
910  ALLOCATE (tmp_cmat(nextra, nstate))
911  CALL cp_cfm_get_submatrix(u, tmp_cmat, nocc + 1, 1, nextra, nstate)
912  CALL cp_cfm_set_submatrix(ul, tmp_cmat)
913  DEALLOCATE (tmp_cmat)
914  IF (iter > 1) THEN
915  ! orthonormalize c_tilde
916  CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
917  tmp_fm%local_data = real(c_tilde%local_data, kind=dp)
918  CALL ortho_vectors(tmp_fm)
919  CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
920  CALL cp_fm_release(tmp_fm)
921 
922  ALLOCATE (tmp_cmat(nocc, nstate))
923  CALL cp_cfm_get_submatrix(u, tmp_cmat, 1, 1, nocc, nstate)
924  CALL cp_cfm_set_submatrix(v, tmp_cmat, 1, 1, nocc, nstate)
925  DEALLOCATE (tmp_cmat)
926  CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
927  ALLOCATE (tmp_cmat(northo, nstate))
928  CALL cp_cfm_get_submatrix(vl, tmp_cmat)
929  CALL cp_cfm_set_submatrix(v, tmp_cmat, nocc + 1, 1, northo, nstate)
930  DEALLOCATE (tmp_cmat)
931  END IF
932 
933  ! reset if new_direction
934  IF (new_direction .AND. mod(line_searches, 20) .EQ. 5) THEN
935  CALL cp_cfm_set_all(skc, czero)
936  CALL cp_cfm_set_all(gct_old, czero)
937  norm2_old = 1.0e30_dp
938  END IF
939 
940  CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
941  CALL cp_cfm_to_cfm(v, tmp_cfm)
942  CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
943  ndummy = nmo
944  ELSE
945  CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
946  CALL cp_cfm_to_cfm(u, tmp_cfm)
947  CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
948  ndummy = nstate
949  END IF
950  ! update z_ij
951  DO idim = 1, dim2
952  ! 'tmp_cfm_1 = zij_0*tmp_cfm'
953  CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
954  tmp_cfm, czero, tmp_cfm_1)
955  ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
956  CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
957  czero, c_zij(idim))
958  END DO
959  CALL cp_cfm_release(tmp_cfm)
960  CALL cp_cfm_release(tmp_cfm_1)
961  ! compute spread
962  DO istate = 1, nstate
963  spread_ii = 0.0_dp
964  DO idim = 1, dim2
965  CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
966  spread_ii = spread_ii + weights(idim)* &
967  abs(mzii)**2/twopi/twopi
968  matrix_zii(istate, idim) = mzii
969  END DO
970  !WRITE(*,*) 'spread_ii', spread_ii
971  sum_spread(istate) = spread_ii
972  END DO
973  CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
974  spread_sum = accurate_sum(sum_spread)
975 
976  IF (nextra > 0) THEN
977  ! update c_tilde
978  CALL cp_cfm_set_all(zdiag, czero)
979  CALL cp_cfm_set_all(grad_ctilde, czero)
980  CALL cp_cfm_create(tmp_cfm, v%matrix_struct)
981  CALL cp_cfm_set_all(tmp_cfm, czero)
982  CALL cp_cfm_create(tmp_cfm_1, v%matrix_struct)
983  CALL cp_cfm_set_all(tmp_cfm_1, czero)
984  ALLOCATE (tmp_cmat(northo, nstate))
985  DO idim = 1, dim2
986  weight = weights(idim)
987  arr_zii = matrix_zii(:, idim)
988  ! tmp_cfm = zij_0*V
989  CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
990  zij_0(idim), v, czero, tmp_cfm)
991  ! tmp_cfm = tmp_cfm*diag_zij_dagg
992  CALL cp_cfm_column_scale(tmp_cfm, conjg(arr_zii))
993  ! tmp_cfm_1 = tmp_cfm*U_dagg
994  CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
995  u, czero, tmp_cfm_1)
996  CALL cp_cfm_scale(weight, tmp_cfm_1)
997  ! zdiag = zdiag + tmp_cfm_1'
998  CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
999 
1000  ! tmp_cfm = zij_0_dagg*V
1001  CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1002  zij_0(idim), v, czero, tmp_cfm)
1003 
1004  ! tmp_cfm = tmp_cfm*diag_zij
1005  CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1006  ! tmp_cfm_1 = tmp_cfm*U_dagg
1007  CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1008  u, czero, tmp_cfm_1)
1009  CALL cp_cfm_scale(weight, tmp_cfm_1)
1010  ! zdiag = zdiag + tmp_cfm_1'
1011  CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1012  END DO ! idim
1013  CALL cp_cfm_release(tmp_cfm)
1014  CALL cp_cfm_release(tmp_cfm_1)
1015  DEALLOCATE (tmp_cmat)
1016  ALLOCATE (tmp_cmat(northo, nextra))
1017  CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1018  northo, nextra, .false.)
1019  ! 'grad_ctilde'
1020  CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1021  DEALLOCATE (tmp_cmat)
1022  ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1023  CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1024  !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1025  ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1026  CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1027  END IF ! nextra > 0
1028 
1029  ! tolerance
1030  IF (nextra > 0) THEN
1031  tolc = 0.0_dp
1032  CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1033  CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1034  CALL cp_fm_maxabsval(tmp_fm, tolc)
1035  CALL cp_fm_release(tmp_fm)
1036  !WRITE(*,*) 'tolc = ', tolc
1037  tol = tol + tolc
1038  END IF
1039  !WRITE(*,*) 'tol = ', tol
1040 
1041  IF (nextra > 0) THEN
1042  !WRITE(*,*) 'new_direction: ', new_direction
1043  IF (new_direction) THEN
1044  line_searches = line_searches + 1
1045  IF (mintol > tol) THEN
1046  mintol = tol
1047  miniter = iter
1048  END IF
1049 
1050  IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1051  sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1052  avg_spread_ii = sum_spread_ii/nstate
1053  WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1054  "Iteration", "Avg. Spread_ii", "Tolerance"
1055  WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1056  iter, avg_spread_ii, tol
1057  CALL m_flush(unit_nr)
1058  END IF
1059  IF (tol < eps_localization) EXIT
1060 
1061  IF (do_cg) THEN
1062  cnorm2_gct = czero
1063  cnorm2_gct_cross = czero
1064  CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1065  norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1066  gct_old%local_data = grad_ctilde%local_data
1067  CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct)
1068  norm2_gct = real(cnorm2_gct, kind=dp)
1069  ! compute beta_pr
1070  beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1071  norm2_old = norm2_gct
1072  beta = max(0.0_dp, beta_pr)
1073  !WRITE(*,*) 'beta = ', beta
1074  ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1075  CALL cp_cfm_scale(beta, skc)
1076  CALL cp_cfm_scale_and_add(cone, skc, cone, gct_old)
1077  CALL cp_cfm_trace(skc, gct_old, cnorm2_gct_cross)
1078  norm2_gct_cross = real(cnorm2_gct_cross, kind=dp)
1079  IF (norm2_gct_cross .LE. 0.0_dp) THEN ! back to steepest ascent
1080  CALL cp_cfm_scale_and_add(czero, skc, cone, gct_old)
1081  END IF
1082  ELSE
1083  CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1084  END IF
1085  line_search_count = 0
1086  END IF
1087 
1088  line_search_count = line_search_count + 1
1089  !WRITE(*,*) 'line_search_count = ', line_search_count
1090  energy(line_search_count) = spread_sum
1091 
1092  ! gold line search
1093  new_direction = .false.
1094  IF (line_search_count .EQ. 1) THEN
1095  lsl = 1
1096  lsr = 0
1097  lsm = 1
1098  pos(1) = 0.0_dp
1099  pos(2) = ds_min/gold_sec
1100  ds = pos(2)
1101  ELSE
1102  IF (line_search_count .EQ. 50) THEN
1103  IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp) THEN
1104  cpwarn("Line search failed to converge properly")
1105  ds_min = 0.1_dp
1106  new_direction = .true.
1107  ds = pos(line_search_count)
1108  line_search_count = 0
1109  ELSE
1110  cpabort("No. of line searches exceeds 50")
1111  END IF
1112  ELSE
1113  IF (lsr .EQ. 0) THEN
1114  IF (energy(line_search_count - 1) .GT. energy(line_search_count)) THEN
1115  lsr = line_search_count
1116  pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1117  ELSE
1118  lsl = lsm
1119  lsm = line_search_count
1120  pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1121  END IF
1122  ELSE
1123  IF (pos(line_search_count) .LT. pos(lsm)) THEN
1124  IF (energy(line_search_count) .GT. energy(lsm)) THEN
1125  lsr = lsm
1126  lsm = line_search_count
1127  ELSE
1128  lsl = line_search_count
1129  END IF
1130  ELSE
1131  IF (energy(line_search_count) .GT. energy(lsm)) THEN
1132  lsl = lsm
1133  lsm = line_search_count
1134  ELSE
1135  lsr = line_search_count
1136  END IF
1137  END IF
1138  IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
1139  pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1140  ELSE
1141  pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1142  END IF
1143  IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr)) THEN
1144  new_direction = .true.
1145  END IF
1146  END IF ! lsr .eq. 0
1147  END IF ! line_search_count .eq. 50
1148  ! now go to the suggested point
1149  ds = pos(line_search_count + 1) - pos(line_search_count)
1150  !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1151  IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1)) THEN
1152  new_direction = .true.
1153  ds_min = 0.5_dp/alpha
1154  ELSEIF (abs(ds) > 10.0_dp) THEN
1155  new_direction = .true.
1156  ds_min = 0.5_dp/alpha
1157  ELSE
1158  ds_min = pos(line_search_count + 1)
1159  END IF
1160  END IF ! first step
1161  ! 'c_tilde = c_tilde + d*skc'
1162  CALL cp_cfm_scale(ds, skc)
1163  CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1164  ELSE
1165  IF (mintol > tol) THEN
1166  mintol = tol
1167  miniter = iter
1168  END IF
1169  IF (unit_nr > 0 .AND. modulo(iter, out_each) == 0) THEN
1170  sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1171  avg_spread_ii = sum_spread_ii/nstate
1172  WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1173  "Iteration", "Avg. Spread_ii", "Tolerance"
1174  WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1175  iter, avg_spread_ii, tol
1176  CALL m_flush(unit_nr)
1177  END IF
1178  IF (tol < eps_localization) EXIT
1179  END IF ! nextra > 0
1180 
1181  END DO ! iteration
1182 
1183  IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1184  WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1185  WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1186  CALL m_flush(unit_nr)
1187  END IF
1188 
1189  CALL cp_cfm_to_fm(u, matrix_u)
1190 
1191  IF (nextra > 0) THEN
1192  rmat%local_data = real(v%local_data, kind=dp)
1193  CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1194 
1195  CALL cp_cfm_release(c_tilde)
1196  CALL cp_cfm_release(grad_ctilde)
1197  CALL cp_cfm_release(gct_old)
1198  CALL cp_cfm_release(skc)
1199  CALL cp_cfm_release(ul)
1200  CALL cp_cfm_release(zdiag)
1201  CALL cp_cfm_release(ctrans_lambda)
1202  CALL cp_fm_release(id_nextra)
1203  CALL cp_fm_release(vectors_all)
1204  CALL cp_cfm_release(v)
1205  CALL cp_fm_release(matrix_v)
1206  CALL cp_fm_release(matrix_v_all)
1207  CALL cp_cfm_release(vl)
1208  DEALLOCATE (arr_zii)
1209  ELSE
1210  rmat%local_data = matrix_u%local_data
1211  CALL rotate_orbitals(rmat, vectors)
1212  END IF
1213  DO idim = 1, dim2
1214  CALL cp_cfm_release(zij_0(idim))
1215  END DO
1216  DEALLOCATE (zij_0)
1217 
1218  DO idim = 1, dim2
1219  zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
1220  zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1221  CALL cp_cfm_release(c_zij(idim))
1222  END DO
1223  DEALLOCATE (c_zij)
1224  CALL cp_fm_release(rmat)
1225  CALL cp_cfm_release(u)
1226  CALL cp_fm_release(matrix_u)
1227  DEALLOCATE (matrix_zii, sum_spread)
1228 
1229  CALL timestop(handle)
1230 
1231  END SUBROUTINE jacobi_cg_edf_ls
1232 
1233 ! **************************************************************************************************
1234 !> \brief ...
1235 !> \param vmatrix ...
1236 ! **************************************************************************************************
1237  SUBROUTINE ortho_vectors(vmatrix)
1238 
1239  TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1240 
1241  CHARACTER(LEN=*), PARAMETER :: routinen = 'ortho_vectors'
1242 
1243  INTEGER :: handle, n, ncol
1244  TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1245  TYPE(cp_fm_type) :: overlap_vv
1246 
1247  CALL timeset(routinen, handle)
1248 
1249  NULLIFY (fm_struct_tmp)
1250 
1251  CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1252 
1253  CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1254  para_env=vmatrix%matrix_struct%para_env, &
1255  context=vmatrix%matrix_struct%context)
1256  CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1257  CALL cp_fm_struct_release(fm_struct_tmp)
1258 
1259  CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1260  CALL cp_fm_cholesky_decompose(overlap_vv)
1261  CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.true.)
1262 
1263  CALL cp_fm_release(overlap_vv)
1264 
1265  CALL timestop(handle)
1266 
1267  END SUBROUTINE ortho_vectors
1268 
1269 ! **************************************************************************************************
1270 !> \brief ...
1271 !> \param istate ...
1272 !> \param jstate ...
1273 !> \param st ...
1274 !> \param ct ...
1275 !> \param zij ...
1276 ! **************************************************************************************************
1277  SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1278  INTEGER, INTENT(IN) :: istate, jstate
1279  REAL(kind=dp), INTENT(IN) :: st, ct
1280  TYPE(cp_cfm_type) :: zij(:)
1281 
1282  INTEGER :: id
1283 
1284 ! Locals
1285 
1286  DO id = 1, SIZE(zij, 1)
1287  CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1288  CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1289  END DO
1290 
1291  END SUBROUTINE rotate_zij
1292 ! **************************************************************************************************
1293 !> \brief ...
1294 !> \param istate ...
1295 !> \param jstate ...
1296 !> \param st ...
1297 !> \param ct ...
1298 !> \param rmat ...
1299 ! **************************************************************************************************
1300  SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1301  INTEGER, INTENT(IN) :: istate, jstate
1302  REAL(kind=dp), INTENT(IN) :: st, ct
1303  TYPE(cp_cfm_type), INTENT(IN) :: rmat
1304 
1305  CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1306 
1307  END SUBROUTINE rotate_rmat
1308 ! **************************************************************************************************
1309 !> \brief ...
1310 !> \param mii ...
1311 !> \param mjj ...
1312 !> \param mij ...
1313 !> \param weights ...
1314 !> \param theta ...
1315 !> \param grad_ij ...
1316 !> \param step ...
1317 ! **************************************************************************************************
1318  SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1319  COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1320  REAL(kind=dp), INTENT(IN) :: weights(:)
1321  REAL(kind=dp), INTENT(OUT) :: theta
1322  REAL(kind=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1323 
1324  COMPLEX(KIND=dp) :: z11, z12, z22
1325  INTEGER :: dim_m, idim
1326  REAL(kind=dp) :: a12, b12, d2, ratio
1327 
1328  a12 = 0.0_dp
1329  b12 = 0.0_dp
1330  dim_m = SIZE(mii)
1331  DO idim = 1, dim_m
1332  z11 = mii(idim)
1333  z22 = mjj(idim)
1334  z12 = mij(idim)
1335  a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=dp)
1336  b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1337  0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=dp)
1338  END DO
1339  IF (abs(b12) > 1.e-10_dp) THEN
1340  ratio = -a12/b12
1341  theta = 0.25_dp*atan(ratio)
1342  ELSEIF (abs(b12) < 1.e-10_dp) THEN
1343  b12 = 0.0_dp
1344  theta = 0.0_dp
1345  ELSE
1346  theta = 0.25_dp*pi
1347  END IF
1348  IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1349 ! Check second derivative info
1350  d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1351  IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1352  IF (theta > 0.0_dp) THEN ! make theta as small as possible
1353  theta = theta - 0.25_dp*pi
1354  ELSE
1355  theta = theta + 0.25_dp*pi
1356  END IF
1357  END IF
1358  END SUBROUTINE get_angle
1359 ! **************************************************************************************************
1360 !> \brief ...
1361 !> \param zij ...
1362 !> \param weights ...
1363 !> \param tolerance ...
1364 !> \param grad ...
1365 ! **************************************************************************************************
1366  SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1367  TYPE(cp_cfm_type) :: zij(:)
1368  REAL(kind=dp), INTENT(IN) :: weights(:)
1369  REAL(kind=dp), INTENT(OUT) :: tolerance
1370  TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1371 
1372  CHARACTER(len=*), PARAMETER :: routinen = 'check_tolerance'
1373 
1374  INTEGER :: handle
1375  TYPE(cp_fm_type) :: force
1376 
1377  CALL timeset(routinen, handle)
1378 
1379 ! compute gradient at t=0
1380 
1381  CALL cp_fm_create(force, zij(1)%matrix_struct)
1382  CALL cp_fm_set_all(force, 0._dp)
1383  CALL grad_at_0(zij, weights, force)
1384  CALL cp_fm_maxabsval(force, tolerance)
1385  IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1386  CALL cp_fm_release(force)
1387 
1388  CALL timestop(handle)
1389 
1390  END SUBROUTINE check_tolerance
1391 ! **************************************************************************************************
1392 !> \brief ...
1393 !> \param rmat ...
1394 !> \param vectors ...
1395 ! **************************************************************************************************
1396  SUBROUTINE rotate_orbitals(rmat, vectors)
1397  TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1398 
1399  INTEGER :: k, n
1400  TYPE(cp_fm_type) :: wf
1401 
1402  CALL cp_fm_create(wf, vectors%matrix_struct)
1403  CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1404  CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1405  CALL cp_fm_to_fm(wf, vectors)
1406  CALL cp_fm_release(wf)
1407  END SUBROUTINE rotate_orbitals
1408 ! **************************************************************************************************
1409 !> \brief ...
1410 !> \param rmat ...
1411 !> \param vec_all ...
1412 !> \param vectors ...
1413 ! **************************************************************************************************
1414  SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1415  TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1416 
1417  INTEGER :: k, l, n
1418  TYPE(cp_fm_type) :: wf
1419 
1420  CALL cp_fm_create(wf, vectors%matrix_struct)
1421  CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1422  CALL cp_fm_get_info(rmat, ncol_global=l)
1423 
1424  CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1425  CALL cp_fm_to_fm(wf, vectors)
1426  CALL cp_fm_release(wf)
1427  END SUBROUTINE rotate_orbitals_edf
1428 ! **************************************************************************************************
1429 !> \brief ...
1430 !> \param diag ...
1431 !> \param weights ...
1432 !> \param matrix ...
1433 !> \param ndim ...
1434 ! **************************************************************************************************
1435  SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1436  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1437  REAL(kind=dp), INTENT(IN) :: weights(:)
1438  TYPE(cp_fm_type), INTENT(IN) :: matrix
1439  INTEGER, INTENT(IN) :: ndim
1440 
1441  COMPLEX(KIND=dp) :: zii, zjj
1442  INTEGER :: idim, istate, jstate, ncol_local, &
1443  nrow_global, nrow_local
1444  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1445  REAL(kind=dp) :: gradsq_ij
1446 
1447  CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1448  ncol_local=ncol_local, nrow_global=nrow_global, &
1449  row_indices=row_indices, col_indices=col_indices)
1450 
1451  DO istate = 1, nrow_local
1452  DO jstate = 1, ncol_local
1453 ! get real and imaginary parts
1454  gradsq_ij = 0.0_dp
1455  DO idim = 1, ndim
1456  zii = diag(row_indices(istate), idim)
1457  zjj = diag(col_indices(jstate), idim)
1458  gradsq_ij = gradsq_ij + weights(idim)* &
1459  4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=dp)
1460  END DO
1461  matrix%local_data(istate, jstate) = gradsq_ij
1462  END DO
1463  END DO
1464  END SUBROUTINE gradsq_at_0
1465 ! **************************************************************************************************
1466 !> \brief ...
1467 !> \param matrix_p ...
1468 !> \param weights ...
1469 !> \param matrix ...
1470 ! **************************************************************************************************
1471  SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1472  TYPE(cp_cfm_type) :: matrix_p(:)
1473  REAL(kind=dp), INTENT(IN) :: weights(:)
1474  TYPE(cp_fm_type), INTENT(IN) :: matrix
1475 
1476  COMPLEX(KIND=dp) :: zii, zij, zjj
1477  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1478  INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1479  nrow_global, nrow_local
1480  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1481  REAL(kind=dp) :: grad_ij
1482 
1483  NULLIFY (diag)
1484  CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1485  ncol_local=ncol_local, nrow_global=nrow_global, &
1486  row_indices=row_indices, col_indices=col_indices)
1487  dim_m = SIZE(matrix_p, 1)
1488  ALLOCATE (diag(nrow_global, dim_m))
1489 
1490  DO idim = 1, dim_m
1491  DO istate = 1, nrow_global
1492  CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1493  END DO
1494  END DO
1495 
1496  DO istate = 1, nrow_local
1497  DO jstate = 1, ncol_local
1498 ! get real and imaginary parts
1499  grad_ij = 0.0_dp
1500  DO idim = 1, dim_m
1501  zii = diag(row_indices(istate), idim)
1502  zjj = diag(col_indices(jstate), idim)
1503  zij = matrix_p(idim)%local_data(istate, jstate)
1504  grad_ij = grad_ij + weights(idim)* &
1505  REAL(4.0_dp*conjg(zij)*(zjj - zii), dp)
1506  END DO
1507  matrix%local_data(istate, jstate) = grad_ij
1508  END DO
1509  END DO
1510  DEALLOCATE (diag)
1511  END SUBROUTINE grad_at_0
1512 
1513 ! return energy and maximum gradient in the current point
1514 ! **************************************************************************************************
1515 !> \brief ...
1516 !> \param weights ...
1517 !> \param zij ...
1518 !> \param tolerance ...
1519 !> \param value ...
1520 ! **************************************************************************************************
1521  SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1522  REAL(kind=dp), INTENT(IN) :: weights(:)
1523  TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1524  REAL(kind=dp) :: tolerance, value
1525 
1526  COMPLEX(KIND=dp) :: kii, kij, kjj
1527  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1528  INTEGER :: idim, istate, jstate, ncol_local, &
1529  nrow_global, nrow_local
1530  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1531  REAL(kind=dp) :: grad_ij, ra, rb
1532 
1533  NULLIFY (diag)
1534  CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1535  ncol_local=ncol_local, nrow_global=nrow_global, &
1536  row_indices=row_indices, col_indices=col_indices)
1537  ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1538  value = 0.0_dp
1539  DO idim = 1, SIZE(zij, 2)
1540  DO istate = 1, nrow_global
1541  CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1542  CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1543  diag(istate, idim) = cmplx(ra, rb, dp)
1544  value = value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1545  END DO
1546  END DO
1547  tolerance = 0.0_dp
1548  DO istate = 1, nrow_local
1549  DO jstate = 1, ncol_local
1550  grad_ij = 0.0_dp
1551  DO idim = 1, SIZE(zij, 2)
1552  kii = diag(row_indices(istate), idim)
1553  kjj = diag(col_indices(jstate), idim)
1554  ra = zij(1, idim)%local_data(istate, jstate)
1555  rb = zij(2, idim)%local_data(istate, jstate)
1556  kij = cmplx(ra, rb, dp)
1557  grad_ij = grad_ij + weights(idim)* &
1558  REAL(4.0_dp*conjg(kij)*(kjj - kii), dp)
1559  END DO
1560  tolerance = max(abs(grad_ij), tolerance)
1561  END DO
1562  END DO
1563  CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1564 
1565  DEALLOCATE (diag)
1566 
1567  END SUBROUTINE check_tolerance_new
1568 
1569 ! **************************************************************************************************
1570 !> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1571 !> and rotates them all at the same time (hoping for the best of course)
1572 !> \param weights ...
1573 !> \param zij ...
1574 !> \param vectors ...
1575 !> \param max_iter ...
1576 !> \param max_crazy_angle ...
1577 !> \param crazy_scale ...
1578 !> \param crazy_use_diag ...
1579 !> \param eps_localization ...
1580 !> \param iterations ...
1581 !> \param converged ...
1582 ! **************************************************************************************************
1583  SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1584  eps_localization, iterations, converged)
1585  REAL(kind=dp), INTENT(IN) :: weights(:)
1586  TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1587  INTEGER, INTENT(IN) :: max_iter
1588  REAL(kind=dp), INTENT(IN) :: max_crazy_angle
1589  REAL(kind=dp) :: crazy_scale
1590  LOGICAL :: crazy_use_diag
1591  REAL(kind=dp), INTENT(IN) :: eps_localization
1592  INTEGER :: iterations
1593  LOGICAL, INTENT(out), OPTIONAL :: converged
1594 
1595  CHARACTER(len=*), PARAMETER :: routinen = 'crazy_rotations'
1596  COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1597  czero = (0.0_dp, 0.0_dp)
1598 
1599  COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1600  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1601  COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1602  INTEGER :: dim2, handle, i, icol, idim, irow, &
1603  method, ncol_global, ncol_local, &
1604  norder, nrow_global, nrow_local, &
1605  nsquare, unit_nr
1606  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1607  LOGICAL :: do_emd
1608  REAL(kind=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1609  norm, ra, rb, theta, tolerance, value
1610  REAL(kind=dp), DIMENSION(:), POINTER :: evals
1611  TYPE(cp_cfm_type) :: cmat_a, cmat_r, cmat_t1
1612  TYPE(cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1613 
1614  CALL timeset(routinen, handle)
1615  NULLIFY (row_indices, col_indices)
1616  CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1617  ncol_global=ncol_global, &
1618  row_indices=row_indices, col_indices=col_indices, &
1619  nrow_local=nrow_local, ncol_local=ncol_local)
1620 
1621  limit_crazy_angle = max_crazy_angle
1622 
1623  NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1624  dim2 = SIZE(zij, 2)
1625  ALLOCATE (diag_z(nrow_global, dim2))
1626  ALLOCATE (evals(nrow_global))
1627  ALLOCATE (evals_exp(nrow_global))
1628 
1629  CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1630  CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1631  CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1632 
1633  CALL cp_fm_create(mat_u, zij(1, 1)%matrix_struct)
1634  CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1635  CALL cp_fm_create(mat_r, zij(1, 1)%matrix_struct)
1636 
1637  CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1638 
1639  CALL cp_fm_set_all(mat_r, 0.0_dp, 1.0_dp)
1640  CALL cp_fm_set_all(mat_t, 0.0_dp)
1641  ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1642  DO idim = 1, dim2
1643  CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1644  CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1645  END DO
1646  CALL cp_fm_syevd(mat_t, mat_u, evals)
1647  DO idim = 1, dim2
1648  ! rotate z's
1649  CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1650  CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1651  CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1652  CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1653  END DO
1654  ! collect rotation matrix
1655  CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1656  CALL cp_fm_to_fm(mat_t, mat_r)
1657 
1658  unit_nr = -1
1659  IF (cmat_a%matrix_struct%para_env%is_source()) THEN
1660  unit_nr = cp_logger_get_default_unit_nr()
1661  WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1662  "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1663  END IF
1664 
1665  iterations = 0
1666  tolerance = 1.0_dp
1667 
1668  DO
1669  iterations = iterations + 1
1670  DO idim = 1, dim2
1671  DO i = 1, nrow_global
1672  CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1673  CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1674  diag_z(i, idim) = cmplx(ra, rb, dp)
1675  END DO
1676  END DO
1677  DO irow = 1, nrow_local
1678  DO icol = 1, ncol_local
1679  DO idim = 1, dim2
1680  ra = zij(1, idim)%local_data(irow, icol)
1681  rb = zij(2, idim)%local_data(irow, icol)
1682  mij(idim) = cmplx(ra, rb, dp)
1683  mii(idim) = diag_z(row_indices(irow), idim)
1684  mjj(idim) = diag_z(col_indices(icol), idim)
1685  END DO
1686  IF (row_indices(irow) .NE. col_indices(icol)) THEN
1687  CALL get_angle(mii, mjj, mij, weights, theta)
1688  theta = crazy_scale*theta
1689  IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
1690  IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
1691  IF (crazy_use_diag) THEN
1692  cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta, dp)
1693  ELSE
1694  mat_theta%local_data(irow, icol) = -theta
1695  END IF
1696  ELSE
1697  IF (crazy_use_diag) THEN
1698  cmat_a%local_data(irow, icol) = czero
1699  ELSE
1700  mat_theta%local_data(irow, icol) = 0.0_dp
1701  END IF
1702  END IF
1703  END DO
1704  END DO
1705 
1706  ! construct rotation matrix U based on A using diagonalization
1707  ! alternatively, exp based on repeated squaring could be faster
1708  IF (crazy_use_diag) THEN
1709  CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1710  maxeval = maxval(abs(evals))
1711  evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1712  CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1713  CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1714  CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1715  cmat_t1, cmat_r, czero, cmat_a)
1716  mat_u%local_data = real(cmat_a%local_data, kind=dp) ! U is a real matrix
1717  ELSE
1718  do_emd = .false.
1719  method = 2
1720  eps_exp = 1.0_dp*epsilon(eps_exp)
1721  CALL cp_fm_maxabsrownorm(mat_theta, norm)
1722  maxeval = norm ! an upper bound
1723  CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1724  CALL exp_pade_real(mat_u, mat_theta, nsquare, norder)
1725  END IF
1726 
1727  DO idim = 1, dim2
1728  ! rotate z's
1729  CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1730  CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1731  CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1732  CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1733  END DO
1734  ! collect rotation matrix
1735  CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1736  CALL cp_fm_to_fm(mat_t, mat_r)
1737 
1738  CALL check_tolerance_new(weights, zij, tolerance, value)
1739 
1740  IF (unit_nr > 0) THEN
1741  WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1742  "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1743  CALL m_flush(unit_nr)
1744  END IF
1745  IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter) EXIT
1746  END DO
1747 
1748  IF (PRESENT(converged)) converged = (tolerance .LT. eps_localization)
1749 
1750  CALL cp_cfm_release(cmat_a)
1751  CALL cp_cfm_release(cmat_r)
1752  CALL cp_cfm_release(cmat_t1)
1753 
1754  CALL cp_fm_release(mat_u)
1755  CALL cp_fm_release(mat_t)
1756  CALL cp_fm_release(mat_theta)
1757 
1758  CALL rotate_orbitals(mat_r, vectors)
1759 
1760  CALL cp_fm_release(mat_r)
1761  DEALLOCATE (evals_exp, evals, diag_z)
1762  DEALLOCATE (mii, mij, mjj)
1763 
1764  CALL timestop(handle)
1765 
1766  END SUBROUTINE crazy_rotations
1767 
1768 ! **************************************************************************************************
1769 !> \brief use the exponential parametrization as described in to perform a direct mini
1770 !> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1771 !> none of the input is modified for the time being, just finds the rotations
1772 !> that minimizes, and throws it away afterwards :-)
1773 !> apart from being expensive and not cleaned, this works fine
1774 !> useful to try different spread functionals
1775 !> \param weights ...
1776 !> \param zij ...
1777 !> \param vectors ...
1778 !> \param max_iter ...
1779 !> \param eps_localization ...
1780 !> \param iterations ...
1781 ! **************************************************************************************************
1782  SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1783  REAL(kind=dp), INTENT(IN) :: weights(:)
1784  TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1785  INTEGER, INTENT(IN) :: max_iter
1786  REAL(kind=dp), INTENT(IN) :: eps_localization
1787  INTEGER :: iterations
1788 
1789  CHARACTER(len=*), PARAMETER :: routinen = 'direct_mini'
1790  COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1791  czero = (0.0_dp, 0.0_dp)
1792  REAL(kind=dp), PARAMETER :: gold_sec = 0.3819_dp
1793 
1794  COMPLEX(KIND=dp) :: lk, ll, tmp
1795  COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1796  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1797  INTEGER :: handle, i, icol, idim, irow, &
1798  line_search_count, line_searches, lsl, &
1799  lsm, lsr, n, ncol_local, ndim, &
1800  nrow_local, output_unit
1801  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1802  LOGICAL :: new_direction
1803  REAL(kind=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1804  fb, fc, nom, normg, normg_cross, &
1805  normg_old, npos, omega, tol, val, x0, &
1806  x1, xa, xb, xc
1807  REAL(kind=dp), DIMENSION(150) :: energy, grad, pos
1808  REAL(kind=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1809  TYPE(cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1810  cmat_t2, cmat_u
1811  TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1812  TYPE(cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1813  matrix_g_search, matrix_h, matrix_r, &
1814  matrix_t
1815 
1816  NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1817 
1818  CALL timeset(routinen, handle)
1819  output_unit = cp_logger_get_default_io_unit()
1820 
1821  n = zij(1, 1)%matrix_struct%nrow_global
1822  ndim = (SIZE(zij, 2))
1823 
1824  IF (output_unit > 0) THEN
1825  WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1826  WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1827  END IF
1828 
1829  ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1830  ALLOCATE (c_zij(ndim))
1831 
1832  ! create the three complex matrices Z
1833  DO idim = 1, ndim
1834  CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1835  c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1836  zij(2, idim)%local_data, dp)
1837  END DO
1838 
1839  CALL cp_fm_create(matrix_a, zij(1, 1)%matrix_struct)
1840  CALL cp_fm_create(matrix_g, zij(1, 1)%matrix_struct)
1841  CALL cp_fm_create(matrix_t, zij(1, 1)%matrix_struct)
1842  CALL cp_fm_create(matrix_h, zij(1, 1)%matrix_struct)
1843  CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1844  CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1845  CALL cp_fm_create(matrix_r, zij(1, 1)%matrix_struct)
1846  CALL cp_fm_set_all(matrix_r, 0.0_dp, 1.0_dp)
1847 
1848  CALL cp_fm_set_all(matrix_a, 0.0_dp)
1849 ! CALL cp_fm_init_random ( matrix_A )
1850 
1851  CALL cp_cfm_create(cmat_a, zij(1, 1)%matrix_struct)
1852  CALL cp_cfm_create(cmat_u, zij(1, 1)%matrix_struct)
1853  CALL cp_cfm_create(cmat_r, zij(1, 1)%matrix_struct)
1854  CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1855  CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1856  CALL cp_cfm_create(cmat_b, zij(1, 1)%matrix_struct)
1857  CALL cp_cfm_create(cmat_m, zij(1, 1)%matrix_struct)
1858 
1859  CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1860  row_indices=row_indices, col_indices=col_indices)
1861 
1862  CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1863  CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1864  normg_old = 1.0e30_dp
1865  ds_min = 1.0_dp
1866  new_direction = .true.
1867  iterations = 0
1868  line_searches = 0
1869  line_search_count = 0
1870  DO
1871  iterations = iterations + 1
1872  ! compute U,R,evals given A
1873  cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data, dp) ! cmat_A is hermitian, evals are reals
1874  CALL cp_cfm_heevd(cmat_a, cmat_r, evals)
1875  evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1876  CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1877  CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1878  CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1879  cmat_u%local_data = real(cmat_u%local_data, kind=dp) ! enforce numerics, U is a real matrix
1880 
1881  IF (new_direction .AND. mod(line_searches, 20) .EQ. 5) THEN ! reset with A .eq. 0
1882  DO idim = 1, ndim
1883  CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1884  CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1885  END DO
1886  ! collect rotation matrix
1887  matrix_h%local_data = real(cmat_u%local_data, kind=dp)
1888  CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1889  CALL cp_fm_to_fm(matrix_t, matrix_r)
1890 
1891  CALL cp_cfm_set_all(cmat_u, czero, cone)
1892  CALL cp_cfm_set_all(cmat_r, czero, cone)
1893  CALL cp_cfm_set_all(cmat_a, czero)
1894  CALL cp_fm_set_all(matrix_a, 0.0_dp)
1895  evals(:) = 0.0_dp
1896  evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1897  CALL cp_fm_set_all(matrix_g_old, 0.0_dp)
1898  CALL cp_fm_set_all(matrix_g_search, 0.0_dp)
1899  normg_old = 1.0e30_dp
1900  END IF
1901 
1902  ! compute Omega and M
1903  CALL cp_cfm_set_all(cmat_m, czero)
1904  omega = 0.0_dp
1905  DO idim = 1, ndim
1906  CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1) ! t1=ZU
1907  CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1908  DO i = 1, n
1909  CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1910  SELECT CASE (2) ! allows for selection of different spread functionals
1911  CASE (1)
1912  fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1913  fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1914  CASE (2) ! corresponds to the jacobi setup
1915  fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1916  fvald(i) = -weights(idim)
1917  END SELECT
1918  omega = omega + fval(i)
1919  END DO
1920  DO icol = 1, ncol_local
1921  DO irow = 1, nrow_local
1922  tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1923  cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1924  + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=dp)
1925  END DO
1926  END DO
1927  END DO
1928 
1929  ! compute Hessian diagonal approximation for the preconditioner
1930  IF (.true.) THEN
1931  CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1932  ELSE
1933  CALL cp_fm_set_all(matrix_h, 1.0_dp)
1934  END IF
1935 
1936  ! compute B
1937  DO icol = 1, ncol_local
1938  DO irow = 1, nrow_local
1939  ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1940  lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1941  IF (abs(ll - lk) .LT. 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1942  tmp = 1.0_dp
1943  cmat_b%local_data(irow, icol) = 0.0_dp
1944  DO i = 1, 16
1945  cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1946  tmp = tmp*(ll - lk)/(i + 1)
1947  END DO
1948  cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1949  ELSE
1950  cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1951  END IF
1952  END DO
1953  END DO
1954  ! compute gradient matrix_G
1955 
1956  CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1) ! t1=(M^T)(R^T)
1957  CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1958  CALL cp_cfm_schur_product(cmat_t2, cmat_b, cmat_t1)
1959  CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1960  CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1961  matrix_g%local_data = real(cmat_t1%local_data, kind=dp)
1962  CALL cp_fm_transpose(matrix_g, matrix_t)
1963  CALL cp_fm_scale_and_add(-1.0_dp, matrix_g, 1.0_dp, matrix_t)
1964  CALL cp_fm_maxabsval(matrix_g, tol)
1965 
1966  ! from here on, minimizing technology
1967  IF (new_direction) THEN
1968  ! energy converged up to machine precision ?
1969  line_searches = line_searches + 1
1970  ! DO i=1,line_search_count
1971  ! write(15,*) pos(i),energy(i)
1972  ! ENDDO
1973  ! write(15,*) ""
1974  ! CALL m_flush(15)
1975  !write(16,*) evals(:)
1976  !write(17,*) matrix_A%local_data(:,:)
1977  !write(18,*) matrix_G%local_data(:,:)
1978  IF (output_unit > 0) THEN
1979  WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
1980  CALL m_flush(output_unit)
1981  END IF
1982  IF (tol < eps_localization .OR. iterations > max_iter) EXIT
1983 
1984  IF (.true.) THEN ! do conjugate gradient CG
1985  CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
1986  normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
1987  ! apply the preconditioner
1988  DO icol = 1, ncol_local
1989  DO irow = 1, nrow_local
1990  matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
1991  END DO
1992  END DO
1993  CALL cp_fm_trace(matrix_g, matrix_g_old, normg)
1994  normg = normg*0.5_dp
1995  beta_pr = (normg - normg_cross)/normg_old
1996  normg_old = normg
1997  beta_pr = max(beta_pr, 0.0_dp)
1998  CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
1999  CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2000  IF (normg_cross .GE. 0) THEN ! back to SD
2001  IF (matrix_a%matrix_struct%para_env%is_source()) THEN
2002  WRITE (cp_logger_get_default_unit_nr(), *) "!"
2003  END IF
2004  beta_pr = 0.0_dp
2005  CALL cp_fm_scale_and_add(beta_pr, matrix_g_search, -1.0_dp, matrix_g_old)
2006  END IF
2007  ELSE ! SD
2008  CALL cp_fm_scale_and_add(0.0_dp, matrix_g_search, -1.0_dp, matrix_g)
2009  END IF
2010  ! ds_min=1.0E-4_dp
2011  line_search_count = 0
2012  END IF
2013  line_search_count = line_search_count + 1
2014  energy(line_search_count) = omega
2015 
2016  ! line search section
2017  SELECT CASE (3)
2018  CASE (1) ! two point line search
2019  SELECT CASE (line_search_count)
2020  CASE (1)
2021  pos(1) = 0.0_dp
2022  pos(2) = ds_min
2023  CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2024  grad(1) = grad(1)/2.0_dp
2025  new_direction = .false.
2026  CASE (2)
2027  new_direction = .true.
2028  x0 = pos(1) ! 0.0_dp
2029  c = energy(1)
2030  b = grad(1)
2031  x1 = pos(2)
2032  a = (energy(2) - b*x1 - c)/(x1**2)
2033  IF (a .LE. 0.0_dp) a = 1.0e-15_dp
2034  npos = -b/(2.0_dp*a)
2035  val = a*npos**2 + b*npos + c
2036  IF (val .LT. energy(1) .AND. val .LE. energy(2)) THEN
2037  ! we go to a minimum, but ...
2038  ! we take a guard against too large steps
2039  pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2040  ELSE ! just take an extended step
2041  pos(3) = maxval(pos(1:2))*2.0_dp
2042  END IF
2043  END SELECT
2044  CASE (2) ! 3 point line search
2045  SELECT CASE (line_search_count)
2046  CASE (1)
2047  new_direction = .false.
2048  pos(1) = 0.0_dp
2049  pos(2) = ds_min*0.8_dp
2050  CASE (2)
2051  new_direction = .false.
2052  IF (energy(2) .GT. energy(1)) THEN
2053  pos(3) = ds_min*0.7_dp
2054  ELSE
2055  pos(3) = ds_min*1.4_dp
2056  END IF
2057  CASE (3)
2058  new_direction = .true.
2059  xa = pos(1)
2060  xb = pos(2)
2061  xc = pos(3)
2062  fa = energy(1)
2063  fb = energy(2)
2064  fc = energy(3)
2065  nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2066  denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2067  IF (abs(denom) .LE. 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa))) THEN
2068  npos = xb
2069  ELSE
2070  npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2071  END IF
2072  val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2073  (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2074  (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2075  IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc) THEN ! OK, we go to a minimum
2076  ! we take a guard against too large steps
2077  pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2078  min(npos, maxval(pos(1:3))*4.0_dp))
2079  ELSE ! just take an extended step
2080  pos(4) = maxval(pos(1:3))*2.0_dp
2081  END IF
2082  END SELECT
2083  CASE (3) ! golden section hunt
2084  new_direction = .false.
2085  IF (line_search_count .EQ. 1) THEN
2086  lsl = 1
2087  lsr = 0
2088  lsm = 1
2089  pos(1) = 0.0_dp
2090  pos(2) = ds_min/gold_sec
2091  ELSE
2092  IF (line_search_count .EQ. 150) cpabort("Too many")
2093  IF (lsr .EQ. 0) THEN
2094  IF (energy(line_search_count - 1) .LT. energy(line_search_count)) THEN
2095  lsr = line_search_count
2096  pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2097  ELSE
2098  lsl = lsm
2099  lsm = line_search_count
2100  pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2101  END IF
2102  ELSE
2103  IF (pos(line_search_count) .LT. pos(lsm)) THEN
2104  IF (energy(line_search_count) .LT. energy(lsm)) THEN
2105  lsr = lsm
2106  lsm = line_search_count
2107  ELSE
2108  lsl = line_search_count
2109  END IF
2110  ELSE
2111  IF (energy(line_search_count) .LT. energy(lsm)) THEN
2112  lsl = lsm
2113  lsm = line_search_count
2114  ELSE
2115  lsr = line_search_count
2116  END IF
2117  END IF
2118  IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
2119  pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2120  ELSE
2121  pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2122  END IF
2123  IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr)) THEN
2124  new_direction = .true.
2125  END IF
2126  END IF ! lsr .eq. 0
2127  END IF ! first step
2128  END SELECT
2129  ! now go to the suggested point
2130  ds_min = pos(line_search_count + 1)
2131  ds = pos(line_search_count + 1) - pos(line_search_count)
2132  CALL cp_fm_scale_and_add(1.0_dp, matrix_a, ds, matrix_g_search)
2133  END DO
2134 
2135  ! collect rotation matrix
2136  matrix_h%local_data = real(cmat_u%local_data, kind=dp)
2137  CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2138  CALL cp_fm_to_fm(matrix_t, matrix_r)
2139  CALL rotate_orbitals(matrix_r, vectors)
2140  CALL cp_fm_release(matrix_r)
2141 
2142  CALL cp_fm_release(matrix_a)
2143  CALL cp_fm_release(matrix_g)
2144  CALL cp_fm_release(matrix_h)
2145  CALL cp_fm_release(matrix_t)
2146  CALL cp_fm_release(matrix_g_search)
2147  CALL cp_fm_release(matrix_g_old)
2148  CALL cp_cfm_release(cmat_a)
2149  CALL cp_cfm_release(cmat_u)
2150  CALL cp_cfm_release(cmat_r)
2151  CALL cp_cfm_release(cmat_t1)
2152  CALL cp_cfm_release(cmat_t2)
2153  CALL cp_cfm_release(cmat_b)
2154  CALL cp_cfm_release(cmat_m)
2155 
2156  DEALLOCATE (evals, evals_exp, fval, fvald)
2157 
2158  DO idim = 1, SIZE(c_zij)
2159  zij(1, idim)%local_data = real(c_zij(idim)%local_data, dp)
2160  zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2161  CALL cp_cfm_release(c_zij(idim))
2162  END DO
2163  DEALLOCATE (c_zij)
2164  DEALLOCATE (diag_z)
2165 
2166  CALL timestop(handle)
2167 
2168  END SUBROUTINE
2169 
2170 ! **************************************************************************************************
2171 !> \brief Parallel algorithm for jacobi rotations
2172 !> \param weights ...
2173 !> \param zij ...
2174 !> \param vectors ...
2175 !> \param para_env ...
2176 !> \param max_iter ...
2177 !> \param eps_localization ...
2178 !> \param sweeps ...
2179 !> \param out_each ...
2180 !> \param target_time ...
2181 !> \param start_time ...
2182 !> \param restricted ...
2183 !> \par History
2184 !> use allgather for improved performance
2185 !> \author MI (11.2009)
2186 ! **************************************************************************************************
2187  SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2188  sweeps, out_each, target_time, start_time, restricted)
2189 
2190  REAL(kind=dp), INTENT(IN) :: weights(:)
2191  TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2192  TYPE(mp_para_env_type), POINTER :: para_env
2193  INTEGER, INTENT(IN) :: max_iter
2194  REAL(kind=dp), INTENT(IN) :: eps_localization
2195  INTEGER :: sweeps
2196  INTEGER, INTENT(IN) :: out_each
2197  REAL(dp) :: target_time, start_time
2198  INTEGER :: restricted
2199 
2200  CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para'
2201 
2202  INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2203  nblock, nblock_max, ns_me, nstate, &
2204  output_unit
2205  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2206  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2207  REAL(kind=dp) :: xstate
2208  TYPE(cp_fm_type) :: rmat
2209  TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2210 
2211  CALL timeset(routinen, handle)
2212 
2213  output_unit = cp_logger_get_default_io_unit()
2214 
2215  NULLIFY (cz_ij_loc)
2216 
2217  dim2 = SIZE(zij, 2)
2218 
2219  CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2220  CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2221 
2222  CALL cp_fm_get_info(rmat, nrow_global=nstate)
2223 
2224  IF (restricted > 0) THEN
2225  IF (output_unit > 0) THEN
2226  WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2227  END IF
2228  nstate = nstate - restricted
2229  END IF
2230 
2231  ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2232  xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2233  ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2234  DO ip = 1, para_env%num_pe
2235  ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2236  ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2237  END DO
2238  nblock_max = 0
2239  DO ip = 0, para_env%num_pe - 1
2240  nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2241  nblock_max = max(nblock_max, nblock)
2242  END DO
2243 
2244  ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2245  ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2246  ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2247  ALLOCATE (cz_ij_loc(dim2))
2248  DO idim = 1, dim2
2249  DO ip = 0, para_env%num_pe - 1
2250  nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2251  CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2252  CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2253  IF (para_env%mepos == ip) THEN
2254  ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2255  DO i = 1, nblock
2256  DO j = 1, nstate
2257  cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2258  END DO
2259  END DO
2260  END IF
2261  END DO ! ip
2262  END DO
2263  DEALLOCATE (z_ij_loc_re)
2264  DEALLOCATE (z_ij_loc_im)
2265 
2266  ALLOCATE (rotmat(nstate, 2*nblock_max))
2267 
2268  CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2269  cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2270  target_time=target_time, start_time=start_time)
2271 
2272  ilow1 = ns_bound(para_env%mepos, 1)
2273  ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2274  ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2275  ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2276  DO idim = 1, dim2
2277  DO ip = 0, para_env%num_pe - 1
2278  z_ij_loc_re = 0.0_dp
2279  z_ij_loc_im = 0.0_dp
2280  nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2281  IF (ip == para_env%mepos) THEN
2282  ns_me = nblock
2283  DO i = 1, ns_me
2284  ii = ilow1 + i - 1
2285  DO j = 1, nstate
2286  z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i), dp)
2287  z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2288  END DO
2289  END DO
2290  END IF
2291  CALL para_env%bcast(z_ij_loc_re, ip)
2292  CALL para_env%bcast(z_ij_loc_im, ip)
2293  CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2294  CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2295  END DO ! ip
2296  END DO
2297 
2298  DO ip = 0, para_env%num_pe - 1
2299  z_ij_loc_re = 0.0_dp
2300  nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2301  IF (ip == para_env%mepos) THEN
2302  ns_me = nblock
2303  DO i = 1, ns_me
2304  ii = ilow1 + i - 1
2305  DO j = 1, nstate
2306  z_ij_loc_re(j, i) = rotmat(j, i)
2307  END DO
2308  END DO
2309  END IF
2310  CALL para_env%bcast(z_ij_loc_re, ip)
2311  CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2312  END DO
2313 
2314  DEALLOCATE (z_ij_loc_re)
2315  DEALLOCATE (z_ij_loc_im)
2316  DO idim = 1, dim2
2317  DEALLOCATE (cz_ij_loc(idim)%c_array)
2318  END DO
2319  DEALLOCATE (cz_ij_loc)
2320 
2321  CALL para_env%sync()
2322  CALL rotate_orbitals(rmat, vectors)
2323  CALL cp_fm_release(rmat)
2324 
2325  DEALLOCATE (rotmat)
2326  DEALLOCATE (ns_bound)
2327 
2328  CALL timestop(handle)
2329 
2330  END SUBROUTINE jacobi_rot_para
2331 
2332 ! **************************************************************************************************
2333 !> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2334 !> \param weights ...
2335 !> \param czij ...
2336 !> \param para_env ...
2337 !> \param max_iter ...
2338 !> \param rmat ...
2339 !> \param tol_out ...
2340 !> \author Soumya Ghosh (08/21)
2341 ! **************************************************************************************************
2342  SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2343 
2344  REAL(kind=dp), INTENT(IN) :: weights(:)
2345  TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2346  TYPE(mp_para_env_type), POINTER :: para_env
2347  INTEGER, INTENT(IN) :: max_iter
2348  TYPE(cp_cfm_type), INTENT(IN) :: rmat
2349  REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2350 
2351  CHARACTER(len=*), PARAMETER :: routinen = 'jacobi_rot_para_1'
2352 
2353  COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2354  INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2355  nblock, nblock_max, ns_me, nstate, &
2356  sweeps
2357  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2358  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2359  REAL(kind=dp) :: xstate
2360  TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2361 
2362  CALL timeset(routinen, handle)
2363 
2364  dim2 = SIZE(czij)
2365 
2366  CALL cp_cfm_set_all(rmat, cmplx(0._dp, 0._dp, dp), cmplx(1._dp, 0._dp, dp))
2367 
2368  CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2369 
2370  ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2371  xstate = real(nstate, dp)/real(para_env%num_pe, dp)
2372  ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2373  DO ip = 1, para_env%num_pe
2374  ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2375  ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2376  END DO
2377  nblock_max = 0
2378  DO ip = 0, para_env%num_pe - 1
2379  nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2380  nblock_max = max(nblock_max, nblock)
2381  END DO
2382 
2383  ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2384  ALLOCATE (czij_array(nstate, nblock_max))
2385  ALLOCATE (cz_ij_loc(dim2))
2386  DO idim = 1, dim2
2387  DO ip = 0, para_env%num_pe - 1
2388  nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2389  ! cfm --> allocatable
2390  CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2391  IF (para_env%mepos == ip) THEN
2392  ns_me = nblock
2393  ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2394  DO i = 1, ns_me
2395  DO j = 1, nstate
2396  cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2397  END DO
2398  END DO
2399  END IF
2400  END DO ! ip
2401  END DO
2402  DEALLOCATE (czij_array)
2403 
2404  ALLOCATE (rotmat(nstate, 2*nblock_max))
2405 
2406  CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2407  cz_ij_loc, rotmat, 0, tol_out=tol_out)
2408 
2409  ilow1 = ns_bound(para_env%mepos, 1)
2410  ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2411  ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2412 
2413  DO ip = 0, para_env%num_pe - 1
2414  z_ij_loc_re = 0.0_dp
2415  nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2416  IF (ip == para_env%mepos) THEN
2417  ns_me = nblock
2418  DO i = 1, ns_me
2419  ii = ilow1 + i - 1
2420  DO j = 1, nstate
2421  z_ij_loc_re(j, i) = rotmat(j, i)
2422  END DO
2423  END DO
2424  END IF
2425  CALL para_env%bcast(z_ij_loc_re, ip)
2426  CALL cp_cfm_set_submatrix(rmat, cmplx(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2427  END DO
2428 
2429  DEALLOCATE (z_ij_loc_re)
2430  DO idim = 1, dim2
2431  DEALLOCATE (cz_ij_loc(idim)%c_array)
2432  END DO
2433  DEALLOCATE (cz_ij_loc)
2434 
2435  CALL para_env%sync()
2436 
2437  DEALLOCATE (rotmat)
2438  DEALLOCATE (ns_bound)
2439 
2440  CALL timestop(handle)
2441 
2442  END SUBROUTINE jacobi_rot_para_1
2443 
2444 ! **************************************************************************************************
2445 !> \brief Parallel algorithm for jacobi rotations
2446 !> \param weights ...
2447 !> \param para_env ...
2448 !> \param max_iter ...
2449 !> \param sweeps ...
2450 !> \param out_each ...
2451 !> \param dim2 ...
2452 !> \param nstate ...
2453 !> \param nblock_max ...
2454 !> \param ns_bound ...
2455 !> \param cz_ij_loc ...
2456 !> \param rotmat ...
2457 !> \param output_unit ...
2458 !> \param tol_out ...
2459 !> \param eps_localization ...
2460 !> \param target_time ...
2461 !> \param start_time ...
2462 !> \par History
2463 !> split out to reuse with different input types
2464 !> \author HF (05.2022)
2465 ! **************************************************************************************************
2466  SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2467  ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2468 
2469  REAL(kind=dp), INTENT(IN) :: weights(:)
2470  TYPE(mp_para_env_type), POINTER :: para_env
2471  INTEGER, INTENT(IN) :: max_iter
2472  INTEGER, INTENT(OUT) :: sweeps
2473  INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2474  INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2475  TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2476  REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2477  INTEGER, INTENT(IN) :: output_unit
2478  REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2479  REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_localization
2480  REAL(dp), OPTIONAL :: target_time, start_time
2481 
2482  COMPLEX(KIND=dp) :: zi, zj
2483  COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2484  COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2485  INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2486  ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2487  iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2488  ns_recv_from, ns_recv_partner
2489  INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2490  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2491  LOGICAL :: should_stop
2492  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2493  REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2494  REAL(kind=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2495  t2, theta, tolerance, zc, zr
2496  TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2497  TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2498 
2499  NULLIFY (zdiag_all, zdiag_me)
2500  NULLIFY (xyz_mix, xyz_mix_ns)
2501  NULLIFY (mii, mij, mjj)
2502 
2503  ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2504 
2505  ALLOCATE (rcount(para_env%num_pe), stat=istat)
2506  ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2507 
2508  tolerance = 1.0e10_dp
2509  sweeps = 0
2510 
2511  ! number of processor pairs and number of permutations
2512  npair = (para_env%num_pe + 1)/2
2513  nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2514  ALLOCATE (list_pair(2, npair))
2515 
2516  ! initialize rotation matrix
2517  rotmat = 0.0_dp
2518  DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2519  ii = i - ns_bound(para_env%mepos, 1) + 1
2520  rotmat(i, ii) = 1.0_dp
2521  END DO
2522 
2523  ALLOCATE (xyz_mix(dim2))
2524  ALLOCATE (xyz_mix_ns(dim2))
2525  ALLOCATE (zdiag_me(dim2))
2526  ALLOCATE (zdiag_all(dim2))
2527 
2528  ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2529  IF (ns_me /= 0) THEN
2530  ALLOCATE (c_array_me(nstate, ns_me, dim2))
2531  DO idim = 1, dim2
2532  ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2533  END DO
2534  ALLOCATE (gmat(nstate, ns_me))
2535  END IF
2536 
2537  DO idim = 1, dim2
2538  ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2539  zdiag_me(idim)%c_array = cmplx(0.0_dp, 0.0_dp, dp)
2540  ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2541  zdiag_all(idim)%c_array = cmplx(0.0_dp, 0.0_dp, dp)
2542  END DO
2543  ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2544  ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2545 
2546  ! buffer for message passing
2547  ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2548 
2549  IF (output_unit > 0) THEN
2550  WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2551  WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2552  END IF
2553 
2554  DO lsweep = 1, max_iter + 1
2555  sweeps = lsweep
2556  IF (sweeps == max_iter + 1) THEN
2557  IF (output_unit > 0) THEN
2558  WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2559  WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2560  END IF
2561  EXIT
2562  END IF
2563  t1 = m_walltime()
2564 
2565  DO iperm = 1, nperm
2566 
2567  ! fix partners for this permutation, and get the number of states
2568  CALL eberlein(iperm, para_env, list_pair)
2569  ip_partner = -1
2570  ns_partner = 0
2571  DO ipair = 1, npair
2572  IF (list_pair(1, ipair) == para_env%mepos) THEN
2573  ip_partner = list_pair(2, ipair)
2574  EXIT
2575  ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2576  ip_partner = list_pair(1, ipair)
2577  EXIT
2578  END IF
2579  END DO
2580  IF (ip_partner >= 0) THEN
2581  ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2582  ELSE
2583  ns_partner = 0
2584  END IF
2585 
2586  ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2587  IF (ns_partner*ns_me /= 0) THEN
2588 
2589  ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2590  rmat_loc = 0.0_dp
2591  DO i = 1, ns_me + ns_partner
2592  rmat_loc(i, i) = 1.0_dp
2593  END DO
2594 
2595  ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2596 
2597  DO idim = 1, dim2
2598  ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2599  DO i = 1, ns_me
2600  c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2601  END DO
2602  END DO
2603 
2604  CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2605  msgout=c_array_partner, source=ip_partner)
2606 
2607  n1 = ns_me
2608  n2 = ns_partner
2609  ilow1 = ns_bound(para_env%mepos, 1)
2610  iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2611  ilow2 = ns_bound(ip_partner, 1)
2612  iup2 = ns_bound(ip_partner, 1) + n2 - 1
2613  IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2614  il1 = 1
2615  iu1 = n1
2616  iu1 = n1
2617  il2 = 1 + n1
2618  iu2 = n1 + n2
2619  ELSE
2620  il1 = 1 + n2
2621  iu1 = n1 + n2
2622  iu1 = n1 + n2
2623  il2 = 1
2624  iu2 = n2
2625  END IF
2626 
2627  DO idim = 1, dim2
2628  DO i = 1, n1
2629  xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2630  xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2631  END DO
2632  DO i = 1, n2
2633  xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2634  xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2635  END DO
2636  END DO
2637 
2638  DO istate = 1, n1 + n2
2639  DO jstate = istate + 1, n1 + n2
2640  DO idim = 1, dim2
2641  mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2642  mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2643  mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2644  END DO
2645  CALL get_angle(mii, mjj, mij, weights, theta)
2646  st = sin(theta)
2647  ct = cos(theta)
2648  DO idim = 1, dim2
2649  DO i = 1, n1 + n2
2650  zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2651  zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2652  xyz_mix(idim)%c_array(i, istate) = zi
2653  xyz_mix(idim)%c_array(i, jstate) = zj
2654  END DO
2655  DO i = 1, n1 + n2
2656  zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2657  zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2658  xyz_mix(idim)%c_array(istate, i) = zi
2659  xyz_mix(idim)%c_array(jstate, i) = zj
2660  END DO
2661  END DO
2662 
2663  DO i = 1, n1 + n2
2664  ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2665  rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2666  rmat_loc(i, istate) = ri
2667  rmat_loc(i, jstate) = rj
2668  END DO
2669  END DO
2670  END DO
2671 
2672  k = nblock_max + 1
2673  CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2674  rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2675 
2676  IF (ilow1 < ilow2) THEN
2677  ! no longer compiles in official sdgb:
2678  !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2679  ! probably inefficient:
2680  CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2681  n2, 0.0_dp, gmat(:, :), nstate)
2682  CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2683  n1 + n2, 1.0_dp, gmat(:, :), nstate)
2684  ELSE
2685  CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2686  rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2687  ! no longer compiles in official sdgb:
2688  !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2689  ! probably inefficient:
2690  CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2691  n1, 1.0_dp, gmat(:, :), nstate)
2692  END IF
2693 
2694  CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2695 
2696  DO idim = 1, dim2
2697  DO i = 1, n1
2698  xyz_mix_ns(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp, dp)
2699  END DO
2700 
2701  DO istate = 1, n1
2702  DO jstate = 1, nstate
2703  DO i = 1, n2
2704  xyz_mix_ns(idim)%c_array(jstate, istate) = &
2705  xyz_mix_ns(idim)%c_array(jstate, istate) + &
2706  c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2707  END DO
2708  END DO
2709  END DO
2710  DO istate = 1, n1
2711  DO jstate = 1, nstate
2712  DO i = 1, n1
2713  xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2714  c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2715  END DO
2716  END DO
2717  END DO
2718  END DO ! idim
2719 
2720  DEALLOCATE (c_array_partner)
2721 
2722  ELSE ! save my data
2723  DO idim = 1, dim2
2724  DO i = 1, ns_me
2725  xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2726  END DO
2727  END DO
2728  END IF
2729 
2730  DO idim = 1, dim2
2731  DO i = 1, ns_me
2732  cz_ij_loc(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp, dp)
2733  END DO
2734  END DO
2735 
2736  IF (ns_partner*ns_me /= 0) THEN
2737  ! transpose rotation matrix rmat_loc
2738  DO i = 1, ns_me + ns_partner
2739  DO j = i + 1, ns_me + ns_partner
2740  ri = rmat_loc(i, j)
2741  rmat_loc(i, j) = rmat_loc(j, i)
2742  rmat_loc(j, i) = ri
2743  END DO
2744  END DO
2745 
2746  ! prepare for distribution
2747  DO i = 1, n1
2748  rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2749  END DO
2750  ik = nblock_max
2751  DO i = 1, n2
2752  rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2753  END DO
2754  ELSE
2755  rmat_send = 0.0_dp
2756  END IF
2757 
2758  ! collect data from all tasks (this takes some significant time)
2759  CALL para_env%allgather(rmat_send, rmat_recv_all)
2760 
2761  ! update blocks everywhere
2762  DO ip = 0, para_env%num_pe - 1
2763 
2764  ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2765  rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2766 
2767  ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2768 
2769  IF (ns_me /= 0) THEN
2770  IF (ns_recv_from /= 0) THEN
2771  !look for the partner of ip_recv_from
2772  ip_recv_partner = -1
2773  ns_recv_partner = 0
2774  DO ipair = 1, npair
2775  IF (list_pair(1, ipair) == ip_recv_from) THEN
2776  ip_recv_partner = list_pair(2, ipair)
2777  EXIT
2778  ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2779  ip_recv_partner = list_pair(1, ipair)
2780  EXIT
2781  END IF
2782  END DO
2783 
2784  IF (ip_recv_partner >= 0) THEN
2785  ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2786  END IF
2787  IF (ns_recv_partner > 0) THEN
2788  il1 = ns_bound(para_env%mepos, 1)
2789  il_recv = ns_bound(ip_recv_from, 1)
2790  il_recv_partner = ns_bound(ip_recv_partner, 1)
2791  ik = nblock_max
2792 
2793  DO idim = 1, dim2
2794  DO i = 1, ns_recv_from
2795  ii = il_recv + i - 1
2796  DO j = 1, ns_me
2797  jj = j
2798  DO k = 1, ns_recv_from
2799  kk = il_recv + k - 1
2800  cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2801  rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2802  END DO
2803  END DO
2804  END DO
2805  DO i = 1, ns_recv_from
2806  ii = il_recv + i - 1
2807  DO j = 1, ns_me
2808  jj = j
2809  DO k = 1, ns_recv_partner
2810  kk = il_recv_partner + k - 1
2811  cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2812  rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2813  END DO
2814  END DO
2815  END DO
2816  END DO ! idim
2817  ELSE
2818  il1 = ns_bound(para_env%mepos, 1)
2819  il_recv = ns_bound(ip_recv_from, 1)
2820  DO idim = 1, dim2
2821  DO j = 1, ns_me
2822  jj = j
2823  DO i = 1, ns_recv_from
2824  ii = il_recv + i - 1
2825  cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2826  END DO
2827  END DO
2828  END DO ! idim
2829  END IF
2830  END IF
2831  END IF ! ns_me
2832  END DO ! ip
2833 
2834  IF (ns_partner*ns_me /= 0) THEN
2835  DEALLOCATE (rmat_loc)
2836  DO idim = 1, dim2
2837  DEALLOCATE (xyz_mix(idim)%c_array)
2838  END DO
2839  END IF
2840 
2841  END DO ! iperm
2842 
2843  ! calculate the max gradient
2844  DO idim = 1, dim2
2845  DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2846  ii = i - ns_bound(para_env%mepos, 1) + 1
2847  zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2848  zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2849  END DO
2850  rcount(:) = SIZE(zdiag_me(idim)%c_array)
2851  rdispl(1) = 0
2852  DO ip = 2, para_env%num_pe
2853  rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2854  END DO
2855  ! collect all the diagonal elements in a replicated 1d array
2856  CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2857  END DO
2858 
2859  gmax = 0.0_dp
2860  DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2861  k = j - ns_bound(para_env%mepos, 1) + 1
2862  DO i = 1, j - 1
2863  ! find the location of the diagonal element (i,i)
2864  DO ip = 0, para_env%num_pe - 1
2865  IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2866  ip_has_i = ip
2867  EXIT
2868  END IF
2869  END DO
2870  ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2871  ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2872  jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2873  grad = 0.0_dp
2874  DO idim = 1, dim2
2875  zi = zdiag_all(idim)%c_array(ii)
2876  zj = zdiag_all(idim)%c_array(jj)
2877  grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2878  END DO
2879  gmax = max(gmax, abs(grad))
2880  END DO
2881  END DO
2882 
2883  CALL para_env%max(gmax)
2884  tolerance = gmax
2885  IF (PRESENT(tol_out)) tol_out = tolerance
2886 
2887  func = 0.0_dp
2888  DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2889  k = i - ns_bound(para_env%mepos, 1) + 1
2890  DO idim = 1, dim2
2891  zr = real(cz_ij_loc(idim)%c_array(i, k), dp)
2892  zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2893  func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2894  END DO
2895  END DO
2896  CALL para_env%sum(func)
2897  t2 = m_walltime()
2898 
2899  IF (output_unit > 0 .AND. modulo(sweeps, out_each) == 0) THEN
2900  WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2901  CALL m_flush(output_unit)
2902  END IF
2903  IF (PRESENT(eps_localization)) THEN
2904  IF (tolerance < eps_localization) EXIT
2905  END IF
2906  IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2907  CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2908  IF (should_stop) EXIT
2909  END IF
2910 
2911  END DO ! lsweep
2912 
2913  ! buffer for message passing
2914  DEALLOCATE (rmat_recv_all)
2915 
2916  DEALLOCATE (rmat_recv)
2917  DEALLOCATE (rmat_send)
2918  IF (ns_me > 0) THEN
2919  DEALLOCATE (c_array_me)
2920  END IF
2921  DO idim = 1, dim2
2922  DEALLOCATE (zdiag_me(idim)%c_array)
2923  DEALLOCATE (zdiag_all(idim)%c_array)
2924  END DO
2925  DEALLOCATE (zdiag_me)
2926  DEALLOCATE (zdiag_all)
2927  DEALLOCATE (xyz_mix)
2928  DO idim = 1, dim2
2929  IF (ns_me /= 0) THEN
2930  DEALLOCATE (xyz_mix_ns(idim)%c_array)
2931  END IF
2932  END DO
2933  DEALLOCATE (xyz_mix_ns)
2934  IF (ns_me /= 0) THEN
2935  DEALLOCATE (gmat)
2936  END IF
2937  DEALLOCATE (mii)
2938  DEALLOCATE (mij)
2939  DEALLOCATE (mjj)
2940  DEALLOCATE (list_pair)
2941 
2942  END SUBROUTINE jacobi_rot_para_core
2943 
2944 ! **************************************************************************************************
2945 !> \brief ...
2946 !> \param iperm ...
2947 !> \param para_env ...
2948 !> \param list_pair ...
2949 ! **************************************************************************************************
2950  SUBROUTINE eberlein(iperm, para_env, list_pair)
2951  INTEGER, INTENT(IN) :: iperm
2952  TYPE(mp_para_env_type), POINTER :: para_env
2953  INTEGER, DIMENSION(:, :) :: list_pair
2954 
2955  INTEGER :: i, ii, jj, npair
2956 
2957  npair = (para_env%num_pe + 1)/2
2958  IF (iperm == 1) THEN
2959 !..set up initial ordering
2960  DO i = 0, para_env%num_pe - 1
2961  ii = ((i + 1) + 1)/2
2962  jj = mod((i + 1) + 1, 2) + 1
2963  list_pair(jj, ii) = i
2964  END DO
2965  IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2966  ELSEIF (mod(iperm, 2) == 0) THEN
2967 !..a type shift
2968  jj = list_pair(1, npair)
2969  DO i = npair, 3, -1
2970  list_pair(1, i) = list_pair(1, i - 1)
2971  END DO
2972  list_pair(1, 2) = list_pair(2, 1)
2973  list_pair(2, 1) = jj
2974  ELSE
2975 !..b type shift
2976  jj = list_pair(2, 1)
2977  DO i = 1, npair - 1
2978  list_pair(2, i) = list_pair(2, i + 1)
2979  END DO
2980  list_pair(2, npair) = jj
2981  END IF
2982 
2983  END SUBROUTINE eberlein
2984 
2985 ! **************************************************************************************************
2986 !> \brief ...
2987 !> \param vectors ...
2988 !> \param op_sm_set ...
2989 !> \param zij_fm_set ...
2990 ! **************************************************************************************************
2991  SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
2992 
2993  TYPE(cp_fm_type), INTENT(IN) :: vectors
2994  TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
2995  TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
2996 
2997  CHARACTER(len=*), PARAMETER :: routinen = 'zij_matrix'
2998 
2999  INTEGER :: handle, i, j, nao, nmoloc
3000  TYPE(cp_fm_type) :: opvec
3001 
3002  CALL timeset(routinen, handle)
3003 
3004  ! get rows and cols of the input
3005  CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3006  ! replicate the input kind of matrix
3007  CALL cp_fm_create(opvec, vectors%matrix_struct)
3008 
3009  ! Compute zij here
3010  DO i = 1, SIZE(zij_fm_set, 2)
3011  DO j = 1, SIZE(zij_fm_set, 1)
3012  CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3013  CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3014  CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3015  zij_fm_set(j, i))
3016  END DO
3017  END DO
3018 
3019  CALL cp_fm_release(opvec)
3020  CALL timestop(handle)
3021 
3022  END SUBROUTINE zij_matrix
3023 
3024 ! **************************************************************************************************
3025 !> \brief ...
3026 !> \param vectors ...
3027 ! **************************************************************************************************
3028  SUBROUTINE scdm_qrfact(vectors)
3029 
3030  TYPE(cp_fm_type), INTENT(IN) :: vectors
3031 
3032  CHARACTER(len=*), PARAMETER :: routinen = 'scdm_qrfact'
3033 
3034  INTEGER :: handle, ncolt, nrowt
3035  REAL(kind=dp), DIMENSION(:), POINTER :: tau
3036  TYPE(cp_fm_struct_type), POINTER :: cstruct
3037  TYPE(cp_fm_type) :: ctp, qf, tmp
3038 
3039  CALL timeset(routinen, handle)
3040 
3041  ! Create Transpose of Coefficient Matrix vectors
3042  nrowt = vectors%matrix_struct%ncol_global
3043  ncolt = vectors%matrix_struct%nrow_global
3044 
3045  CALL cp_fm_struct_create(cstruct, vectors%matrix_struct%para_env, &
3046  vectors%matrix_struct%context, nrowt, ncolt, vectors%matrix_struct%ncol_block, &
3047  vectors%matrix_struct%nrow_block, first_p_pos=vectors%matrix_struct%first_p_pos)
3048  CALL cp_fm_create(ctp, cstruct)
3049  CALL cp_fm_struct_release(cstruct)
3050 
3051  ALLOCATE (tau(nrowt))
3052 
3053  CALL cp_fm_transpose(vectors, ctp)
3054 
3055  ! Get QR decomposition of CTs
3056  CALL cp_fm_pdgeqpf(ctp, tau, nrowt, ncolt, 1, 1)
3057 
3058  ! Construction of Q from the scalapack output
3059  CALL cp_fm_struct_create(cstruct, para_env=ctp%matrix_struct%para_env, &
3060  context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3061  ncol_global=ctp%matrix_struct%nrow_global)
3062  CALL cp_fm_create(qf, cstruct)
3063  CALL cp_fm_struct_release(cstruct)
3064  CALL cp_fm_to_fm_submat(ctp, qf, nrowt, nrowt, 1, 1, 1, 1)
3065 
3066  ! Get Q
3067  CALL cp_fm_pdorgqr(qf, tau, nrowt, 1, 1)
3068 
3069  ! Transform original coefficient matrix vectors
3070  CALL cp_fm_create(tmp, vectors%matrix_struct)
3071  CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3072  CALL cp_fm_to_fm(vectors, tmp)
3073  CALL parallel_gemm('N', 'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3074 
3075  ! Cleanup
3076  CALL cp_fm_release(ctp)
3077  CALL cp_fm_release(tmp)
3078  CALL cp_fm_release(qf)
3079  DEALLOCATE (tau)
3080 
3081  CALL timestop(handle)
3082 
3083  END SUBROUTINE scdm_qrfact
3084 
3085 END MODULE qs_localization_methods
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Definition: grid_common.h:117
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
Definition: cell_types.F:15
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
Definition: cp_cfm_diag.F:14
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Definition: cp_cfm_diag.F:52
Represents a complex full matrix distributed on many processors.
Definition: cp_cfm_types.F:12
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
Definition: cp_cfm_types.F:333
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
Definition: cp_cfm_types.F:232
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
Definition: cp_cfm_types.F:121
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
Definition: cp_cfm_types.F:159
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
Definition: cp_cfm_types.F:817
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
Definition: cp_cfm_types.F:607
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
Definition: cp_cfm_types.F:470
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
Definition: cp_cfm_types.F:179
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
Definition: cp_cfm_types.F:765
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
basic linear algebra operations for full matrices
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition: cp_fm_diag.F:17
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
Definition: cp_fm_diag.F:413
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
Definition: cp_fm_struct.F:125
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
Definition: cp_fm_struct.F:409
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition: cp_fm_struct.F:320
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
Definition: cp_fm_types.F:1016
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
Definition: cp_fm_types.F:643
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
Definition: cp_fm_types.F:768
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
Definition: cp_fm_types.F:1166
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
Definition: cp_fm_types.F:1473
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
Definition: cp_fm_types.F:1064
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
Definition: cp_fm_types.F:535
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
Definition: cp_fm_types.F:901
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
Definition: cp_fm_types.F:452
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition: kahan_sum.F:29
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition: machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition: machine.F:106
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition: machine.F:123
Definition of mathematical constants and functions.
Definition: mathconstants.F:16
real(kind=dp), parameter, public pi
real(kind=dp), parameter, public twopi
Routines for calculating a complex matrix exponential.
Definition: matrix_exp.F:13
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
Definition: matrix_exp.F:244
subroutine, public exp_pade_real(exp_H, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
Definition: matrix_exp.F:570
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.
Definition: xc.F:17