67#include "./base/base_uses.f90"
74 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'qs_localization_methods'
79 COMPLEX(KIND=dp),
POINTER,
DIMENSION(:) :: c_array => null()
83 COMPLEX(KIND=dp),
POINTER,
DIMENSION(:, :) :: c_array => null()
97 INTEGER,
INTENT(IN) :: iterations
98 REAL(kind=
dp),
INTENT(IN) :: eps
99 LOGICAL,
INTENT(INOUT) :: converged
100 INTEGER,
INTENT(INOUT) :: sweeps
102 CHARACTER(len=*),
PARAMETER :: routinen =
'approx_l1_norm_sd'
103 INTEGER,
PARAMETER :: taylor_order = 100
104 REAL(kind=
dp),
PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
106 INTEGER :: handle, i, istep, k, n, ncol_local, &
107 nrow_local, output_unit, p
108 REAL(kind=
dp) :: expfactor, f2, f2old, gnorm, tnorm
114 CALL timeset(routinen, handle)
116 NULLIFY (context, para_env, fm_struct_k_k)
121 nrow_local=nrow_local, ncol_local=ncol_local, &
122 para_env=para_env, context=context)
124 nrow_global=k, ncol_global=k)
132 IF (output_unit > 0)
THEN
133 WRITE (output_unit,
'(1X)')
134 WRITE (output_unit,
'(2X,A)')
'-----------------------------------------------------------------------------'
135 WRITE (output_unit,
'(A,I5)')
' Nbr iterations =', iterations
136 WRITE (output_unit,
'(A,E10.2)')
' eps convergence =', eps
137 WRITE (output_unit,
'(A,I5)')
' Max Taylor order =', taylor_order
138 WRITE (output_unit,
'(A,E10.2)')
' f2 eps =', f2_eps
139 WRITE (output_unit,
'(A,E10.2)')
' alpha =', alpha
140 WRITE (output_unit,
'(A)')
' iteration approx_l1_norm g_norm rel_err'
147 DO istep = 1, iterations
155 f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
158 CALL c%matrix_struct%para_env%sum(f2)
165 ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
168 CALL parallel_gemm(
'T',
'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
189 IF (tnorm .GT. 1.0e-10_dp)
THEN
192 DO i = 2, taylor_order
194 CALL parallel_gemm(
'N',
'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
197 expfactor = expfactor/real(i, kind=
dp)
201 IF (tnorm*expfactor .LT. 1.0e-10_dp)
EXIT
206 CALL parallel_gemm(
'N',
'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
210 IF (output_unit .GT. 0)
THEN
211 WRITE (output_unit,
'(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
217 IF (abs((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1)
THEN
227 IF (output_unit .GT. 0)
WRITE (output_unit,
'(A,E16.10)')
' sparseness function f2 = ', f2
253 CALL timestop(handle)
264 REAL(kind=
dp),
DIMENSION(:) :: weights
266 REAL(kind=
dp),
DIMENSION(3, 3) :: metric
268 cpassert(
ASSOCIATED(cell))
271 CALL dgemm(
'T',
'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
273 weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
274 weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
275 weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
276 weights(4) = metric(1, 2)
277 weights(5) = metric(1, 3)
278 weights(6) = metric(2, 3)
300 eps_localization, sweeps, out_each, target_time, start_time, restricted)
302 REAL(kind=
dp),
INTENT(IN) :: weights(:)
303 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
305 INTEGER,
INTENT(IN) :: max_iter
306 REAL(kind=
dp),
INTENT(IN) :: eps_localization
308 INTEGER,
INTENT(IN) :: out_each
309 REAL(
dp) :: target_time, start_time
310 INTEGER :: restricted
312 IF (para_env%num_pe == 1)
THEN
313 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
314 sweeps, out_each, restricted=restricted)
316 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
317 sweeps, out_each, target_time, start_time, restricted=restricted)
334 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
335 out_each, restricted)
336 REAL(kind=
dp),
INTENT(IN) :: weights(:)
337 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
338 INTEGER,
INTENT(IN) :: max_iter
339 REAL(kind=
dp),
INTENT(IN) :: eps_localization
341 INTEGER,
INTENT(IN) :: out_each
342 INTEGER :: restricted
344 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rotations_serial'
346 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
347 INTEGER :: dim2, handle, idim, istate, jstate, &
349 REAL(kind=
dp) :: ct, st, t1, t2, theta, tolerance
351 TYPE(
cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij
354 CALL timeset(routinen, handle)
357 ALLOCATE (c_zij(dim2))
358 NULLIFY (mii, mij, mjj)
359 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
368 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
369 zij(2, idim)%local_data,
dp)
373 tolerance = 1.0e10_dp
377 IF (rmat%matrix_struct%para_env%is_source())
THEN
379 WRITE (unit_nr,
'(T4,A )')
" Localization by iterative Jacobi rotation"
382 IF (restricted > 0)
THEN
384 WRITE (unit_nr,
'(T4,A,I2,A )')
"JACOBI: for the ROKS method, the last ", restricted,
" orbitals DO NOT ROTATE"
385 nstate = nstate - restricted
389 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
393 DO istate = 1, nstate
394 DO jstate = istate + 1, nstate
400 CALL get_angle(mii, mjj, mij, weights, theta)
403 CALL rotate_zij(istate, jstate, st, ct, c_zij)
405 CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
409 CALL check_tolerance(c_zij, weights, tolerance)
412 IF (unit_nr > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
413 WRITE (unit_nr,
'(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
414 "Iteration:", sweeps,
"Tolerance:", tolerance,
"Time:", t2 - t1
421 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
422 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
426 DEALLOCATE (mii, mij, mjj)
427 rmat%local_data = real(c_rmat%local_data,
dp)
432 CALL timestop(handle)
434 END SUBROUTINE jacobi_rotations_serial
448 SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
449 tol_out, jsweeps, out_each, c_zij_out, grad_final)
450 REAL(kind=
dp),
INTENT(IN) :: weights(:)
452 INTEGER,
INTENT(IN) :: max_iter
454 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: eps_localization
455 REAL(kind=
dp),
INTENT(OUT),
OPTIONAL :: tol_out
456 INTEGER,
INTENT(OUT),
OPTIONAL :: jsweeps
457 INTEGER,
INTENT(IN),
OPTIONAL :: out_each
458 TYPE(
cp_cfm_type),
INTENT(IN),
OPTIONAL :: c_zij_out(:)
459 TYPE(
cp_fm_type),
INTENT(OUT),
OPTIONAL,
POINTER :: grad_final
461 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rotations_serial_1'
463 COMPLEX(KIND=dp) :: mzii
464 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
465 INTEGER :: dim2, handle, idim, istate, jstate, &
466 nstate, sweeps, unit_nr
467 REAL(kind=
dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
468 sum_spread_ii, t1, t2, theta, tolerance
472 CALL timeset(routinen, handle)
475 NULLIFY (mii, mij, mjj)
476 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
478 ALLOCATE (c_zij_local(dim2))
480 CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
482 CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
483 c_zij_local(idim)%local_data = c_zij(idim)%local_data
487 tolerance = 1.0e10_dp
489 IF (
PRESENT(grad_final))
CALL cp_fm_set_all(grad_final, 0.0_dp)
492 IF (
PRESENT(out_each))
THEN
494 IF (c_rmat_local%matrix_struct%para_env%is_source())
THEN
499 alpha = alpha + weights(idim)
504 DO WHILE (sweeps < max_iter)
506 IF (
PRESENT(eps_localization))
THEN
507 IF (tolerance < eps_localization)
EXIT
511 DO istate = 1, nstate
512 DO jstate = istate + 1, nstate
518 CALL get_angle(mii, mjj, mij, weights, theta)
521 CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
523 CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
527 IF (
PRESENT(grad_final))
THEN
528 CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
530 CALL check_tolerance(c_zij_local, weights, tolerance)
532 IF (
PRESENT(tol_out)) tol_out = tolerance
534 IF (
PRESENT(out_each))
THEN
536 IF (unit_nr > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
537 sum_spread_ii = 0.0_dp
538 DO istate = 1, nstate
542 spread_ii = spread_ii + weights(idim)* &
545 sum_spread_ii = sum_spread_ii + spread_ii
547 sum_spread_ii = alpha*nstate/
twopi/
twopi - sum_spread_ii
548 avg_spread_ii = sum_spread_ii/nstate
549 WRITE (unit_nr,
'(T4,A,T26,A,T48,A,T64,A)') &
550 "Iteration",
"Avg. Spread_ii",
"Tolerance",
"Time"
551 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
552 sweeps, avg_spread_ii, tolerance, t2 - t1
555 IF (
PRESENT(jsweeps)) jsweeps = sweeps
560 IF (
PRESENT(c_zij_out))
THEN
567 DEALLOCATE (mii, mij, mjj)
571 DEALLOCATE (c_zij_local)
574 CALL timestop(handle)
576 END SUBROUTINE jacobi_rotations_serial_1
595 iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
597 REAL(kind=
dp),
INTENT(IN) :: weights(:)
598 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
599 INTEGER,
INTENT(IN) :: max_iter
600 REAL(kind=
dp),
INTENT(IN) :: eps_localization
602 INTEGER,
INTENT(IN) :: out_each, nextra
603 LOGICAL,
INTENT(IN) :: do_cg
604 INTEGER,
INTENT(IN),
OPTIONAL :: nmo
605 TYPE(
cp_fm_type),
INTENT(IN),
OPTIONAL :: vectors_2, mos_guess
607 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_cg_edf_ls'
608 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
609 czero = (0.0_dp, 0.0_dp)
610 REAL(kind=
dp),
PARAMETER :: gold_sec = 0.3819_dp
612 COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
613 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp_cmat
614 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: arr_zii
615 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: matrix_zii
616 INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
617 lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
618 INTEGER,
DIMENSION(1) :: iloc
619 LOGICAL :: do_cinit_mo, do_cinit_random, &
620 do_u_guess_mo, new_direction
621 REAL(kind=
dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
622 norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
623 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: sum_spread
624 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp_mat, tmp_mat_1
625 REAL(kind=
dp),
DIMENSION(50) :: energy, pos
626 REAL(kind=
dp),
DIMENSION(:),
POINTER :: tmp_arr
628 TYPE(
cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
629 grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
630 tmp_cfm_2, u, ul, v, vl, zdiag
631 TYPE(
cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij, zij_0
633 TYPE(
cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
634 matrix_v_all, rmat, tmp_fm, vectors_all
636 CALL timeset(routinen, handle)
640 NULLIFY (matrix_zii, arr_zii)
641 NULLIFY (tmp_fm_struct)
644 ALLOCATE (c_zij(dim2))
648 ALLOCATE (sum_spread(nstate))
649 ALLOCATE (matrix_zii(nstate, dim2))
655 alpha = alpha + weights(idim)
657 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
658 zij(2, idim)%local_data,
dp)
661 ALLOCATE (zij_0(dim2))
671 IF (
PRESENT(mos_guess))
THEN
672 do_cinit_random = .false.
676 do_cinit_random = .true.
677 do_cinit_mo = .false.
681 IF (do_cinit_random)
THEN
683 do_u_guess_mo = .false.
684 ELSEIF (do_cinit_mo)
THEN
686 do_u_guess_mo = .true.
689 nocc = nstate - nextra
691 norextra = nmo - nstate
694 ALLOCATE (tmp_cmat(nstate, nstate))
696 para_env=para_env, context=context)
704 DEALLOCATE (tmp_cmat)
707 para_env=para_env, context=context)
717 para_env=para_env, context=context)
722 ALLOCATE (arr_zii(nstate))
725 para_env=para_env, context=context)
736 para_env=para_env, context=context)
742 para_env=para_env, context=context)
750 para_env=para_env, context=context)
756 para_env=para_env, context=context)
759 ALLOCATE (tmp_mat(nao, nstate))
763 ALLOCATE (tmp_mat(nao, norextra))
774 CALL ortho_vectors(tmp_fm)
775 c_tilde%local_data = tmp_fm%local_data
777 ALLOCATE (tmp_cmat(northo, nextra))
780 DEALLOCATE (tmp_cmat)
782 CALL parallel_gemm(
"T",
"N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
783 ALLOCATE (tmp_arr(nmo))
784 ALLOCATE (tmp_mat(nmo, ndummy))
785 ALLOCATE (tmp_mat_1(nmo, nstate))
788 DO istate = 1, ndummy
789 tmp_arr(:) = tmp_mat(:, istate)
790 norm = norm2(tmp_arr)
791 tmp_arr(:) = tmp_arr(:)/norm
792 tmp_mat(:, istate) = tmp_arr(:)
797 DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
799 ALLOCATE (tmp_mat(northo, ndummy))
800 ALLOCATE (tmp_mat_1(northo, nextra))
802 ALLOCATE (tmp_arr(ndummy))
804 DO istate = 1, ndummy
805 tmp_arr(istate) = norm2(tmp_mat(:, istate))
808 DO istate = 1, nextra
809 iloc = maxloc(tmp_arr)
810 tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
811 tmp_arr(iloc(1)) = 0.0_dp
814 DEALLOCATE (tmp_arr, tmp_mat)
817 para_env=para_env, context=context)
821 DEALLOCATE (tmp_mat_1)
822 CALL ortho_vectors(tmp_fm)
826 IF (do_u_guess_mo)
THEN
827 ALLOCATE (tmp_cmat(nocc, nstate))
830 DEALLOCATE (tmp_cmat)
831 ALLOCATE (tmp_cmat(northo, nstate))
834 DEALLOCATE (tmp_cmat)
835 CALL parallel_gemm(
"C",
"N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
836 ALLOCATE (tmp_cmat(nextra, nstate))
839 DEALLOCATE (tmp_cmat)
841 tmp_fm%local_data = real(u%local_data, kind=
dp)
842 CALL ortho_vectors(tmp_fm)
848 ALLOCATE (tmp_cmat(nocc, nstate))
851 DEALLOCATE (tmp_cmat)
852 ALLOCATE (tmp_cmat(nextra, nstate))
855 DEALLOCATE (tmp_cmat)
856 CALL parallel_gemm(
"N",
"N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
857 ALLOCATE (tmp_cmat(northo, nstate))
860 DEALLOCATE (tmp_cmat)
872 IF (rmat%matrix_struct%para_env%is_source())
THEN
874 WRITE (unit_nr,
'(T4,A )')
" Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
877 norm2_old = 1.0e30_dp
879 new_direction = .true.
882 line_search_count = 0
891 DO WHILE (iter < max_iter)
900 IF (para_env%num_pe == 1)
THEN
901 CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
903 CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
905 CALL parallel_gemm(
'N',
'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
912 ALLOCATE (tmp_cmat(nextra, nstate))
915 DEALLOCATE (tmp_cmat)
919 tmp_fm%local_data = real(c_tilde%local_data, kind=
dp)
920 CALL ortho_vectors(tmp_fm)
924 ALLOCATE (tmp_cmat(nocc, nstate))
927 DEALLOCATE (tmp_cmat)
928 CALL parallel_gemm(
"N",
"N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
929 ALLOCATE (tmp_cmat(northo, nstate))
932 DEALLOCATE (tmp_cmat)
936 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5)
THEN
939 norm2_old = 1.0e30_dp
955 CALL parallel_gemm(
"N",
"N", ndummy, nstate, ndummy, cone, zij_0(idim), &
956 tmp_cfm, czero, tmp_cfm_1)
958 CALL parallel_gemm(
"C",
"N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
964 DO istate = 1, nstate
968 spread_ii = spread_ii + weights(idim)* &
970 matrix_zii(istate, idim) = mzii
973 sum_spread(istate) = spread_ii
975 CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
986 ALLOCATE (tmp_cmat(northo, nstate))
988 weight = weights(idim)
989 arr_zii = matrix_zii(:, idim)
992 zij_0(idim), v, czero, tmp_cfm)
996 CALL parallel_gemm(
"N",
"C", nmo, nstate, nstate, cone, tmp_cfm, &
1004 zij_0(idim), v, czero, tmp_cfm)
1009 CALL parallel_gemm(
"N",
"C", nmo, nstate, nstate, cone, tmp_cfm, &
1010 u, czero, tmp_cfm_1)
1017 DEALLOCATE (tmp_cmat)
1018 ALLOCATE (tmp_cmat(northo, nextra))
1020 northo, nextra, .false.)
1023 DEALLOCATE (tmp_cmat)
1025 CALL parallel_gemm(
"C",
"N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1028 CALL parallel_gemm(
"N",
"N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1032 IF (nextra > 0)
THEN
1043 IF (nextra > 0)
THEN
1045 IF (new_direction)
THEN
1046 line_searches = line_searches + 1
1047 IF (mintol > tol)
THEN
1052 IF (unit_nr > 0 .AND.
modulo(iter, out_each) == 0)
THEN
1053 sum_spread_ii = alpha*nstate/
twopi/
twopi - spread_sum
1054 avg_spread_ii = sum_spread_ii/nstate
1055 WRITE (unit_nr,
'(T4,A,T26,A,T48,A)') &
1056 "Iteration",
"Avg. Spread_ii",
"Tolerance"
1057 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4)') &
1058 iter, avg_spread_ii, tol
1061 IF (tol < eps_localization)
EXIT
1065 cnorm2_gct_cross = czero
1066 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1067 norm2_gct_cross = real(cnorm2_gct_cross, kind=
dp)
1068 gct_old%local_data = grad_ctilde%local_data
1070 norm2_gct = real(cnorm2_gct, kind=
dp)
1072 beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1073 norm2_old = norm2_gct
1074 beta = max(0.0_dp, beta_pr)
1080 norm2_gct_cross = real(cnorm2_gct_cross, kind=
dp)
1081 IF (norm2_gct_cross .LE. 0.0_dp)
THEN
1087 line_search_count = 0
1090 line_search_count = line_search_count + 1
1092 energy(line_search_count) = spread_sum
1095 new_direction = .false.
1096 IF (line_search_count .EQ. 1)
THEN
1101 pos(2) = ds_min/gold_sec
1104 IF (line_search_count .EQ. 50)
THEN
1105 IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp)
THEN
1106 cpwarn(
"Line search failed to converge properly")
1108 new_direction = .true.
1109 ds = pos(line_search_count)
1110 line_search_count = 0
1112 cpabort(
"No. of line searches exceeds 50")
1115 IF (lsr .EQ. 0)
THEN
1116 IF (energy(line_search_count - 1) .GT. energy(line_search_count))
THEN
1117 lsr = line_search_count
1118 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1121 lsm = line_search_count
1122 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1125 IF (pos(line_search_count) .LT. pos(lsm))
THEN
1126 IF (energy(line_search_count) .GT. energy(lsm))
THEN
1128 lsm = line_search_count
1130 lsl = line_search_count
1133 IF (energy(line_search_count) .GT. energy(lsm))
THEN
1135 lsm = line_search_count
1137 lsr = line_search_count
1140 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl))
THEN
1141 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1143 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1145 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr))
THEN
1146 new_direction = .true.
1151 ds = pos(line_search_count + 1) - pos(line_search_count)
1153 IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1))
THEN
1154 new_direction = .true.
1155 ds_min = 0.5_dp/alpha
1156 ELSEIF (abs(ds) > 10.0_dp)
THEN
1157 new_direction = .true.
1158 ds_min = 0.5_dp/alpha
1160 ds_min = pos(line_search_count + 1)
1167 IF (mintol > tol)
THEN
1171 IF (unit_nr > 0 .AND.
modulo(iter, out_each) == 0)
THEN
1172 sum_spread_ii = alpha*nstate/
twopi/
twopi - spread_sum
1173 avg_spread_ii = sum_spread_ii/nstate
1174 WRITE (unit_nr,
'(T4,A,T26,A,T48,A)') &
1175 "Iteration",
"Avg. Spread_ii",
"Tolerance"
1176 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4)') &
1177 iter, avg_spread_ii, tol
1180 IF (tol < eps_localization)
EXIT
1185 IF ((unit_nr > 0) .AND. (iter == max_iter))
THEN
1186 WRITE (unit_nr,
'(T4,A,T4,A)')
"Min. Itr.",
"Min. Tol."
1187 WRITE (unit_nr,
'(T4,I7,T4,E12.4)') miniter, mintol
1193 IF (nextra > 0)
THEN
1194 rmat%local_data = real(v%local_data, kind=
dp)
1195 CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1210 DEALLOCATE (arr_zii)
1212 rmat%local_data = matrix_u%local_data
1221 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
1222 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1229 DEALLOCATE (matrix_zii, sum_spread)
1231 CALL timestop(handle)
1239 SUBROUTINE ortho_vectors(vmatrix)
1243 CHARACTER(LEN=*),
PARAMETER :: routinen =
'ortho_vectors'
1245 INTEGER :: handle, n, ncol
1249 CALL timeset(routinen, handle)
1251 NULLIFY (fm_struct_tmp)
1253 CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1256 para_env=vmatrix%matrix_struct%para_env, &
1257 context=vmatrix%matrix_struct%context)
1258 CALL cp_fm_create(overlap_vv, fm_struct_tmp,
"overlap_vv")
1261 CALL parallel_gemm(
'T',
'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1267 CALL timestop(handle)
1269 END SUBROUTINE ortho_vectors
1279 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1280 INTEGER,
INTENT(IN) :: istate, jstate
1281 REAL(kind=
dp),
INTENT(IN) :: st, ct
1288 DO id = 1,
SIZE(zij, 1)
1293 END SUBROUTINE rotate_zij
1302 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1303 INTEGER,
INTENT(IN) :: istate, jstate
1304 REAL(kind=
dp),
INTENT(IN) :: st, ct
1309 END SUBROUTINE rotate_rmat
1320 SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1321 COMPLEX(KIND=dp),
POINTER :: mii(:), mjj(:), mij(:)
1322 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1323 REAL(kind=
dp),
INTENT(OUT) :: theta
1324 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: grad_ij, step
1326 COMPLEX(KIND=dp) :: z11, z12, z22
1327 INTEGER :: dim_m, idim
1328 REAL(kind=
dp) :: a12, b12, d2, ratio
1337 a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=
dp)
1338 b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1339 0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=
dp)
1341 IF (abs(b12) > 1.e-10_dp)
THEN
1343 theta = 0.25_dp*atan(ratio)
1344 ELSEIF (abs(b12) < 1.e-10_dp)
THEN
1350 IF (
PRESENT(grad_ij)) theta = theta + step*grad_ij
1352 d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1353 IF (d2 <= 0._dp)
THEN
1354 IF (theta > 0.0_dp)
THEN
1355 theta = theta - 0.25_dp*
pi
1357 theta = theta + 0.25_dp*
pi
1360 END SUBROUTINE get_angle
1368 SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1370 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1371 REAL(kind=
dp),
INTENT(OUT) :: tolerance
1372 TYPE(
cp_fm_type),
INTENT(OUT),
OPTIONAL :: grad
1374 CHARACTER(len=*),
PARAMETER :: routinen =
'check_tolerance'
1379 CALL timeset(routinen, handle)
1385 CALL grad_at_0(zij, weights, force)
1390 CALL timestop(handle)
1392 END SUBROUTINE check_tolerance
1399 TYPE(
cp_fm_type),
INTENT(IN) :: rmat, vectors
1406 CALL parallel_gemm(
"N",
"N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1416 SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1417 TYPE(
cp_fm_type),
INTENT(IN) :: rmat, vec_all, vectors
1426 CALL parallel_gemm(
"N",
"N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1429 END SUBROUTINE rotate_orbitals_edf
1437 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1438 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1439 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1441 INTEGER,
INTENT(IN) :: ndim
1443 COMPLEX(KIND=dp) :: zii, zjj
1444 INTEGER :: idim, istate, jstate, ncol_local, &
1445 nrow_global, nrow_local
1446 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1447 REAL(kind=
dp) :: gradsq_ij
1450 ncol_local=ncol_local, nrow_global=nrow_global, &
1451 row_indices=row_indices, col_indices=col_indices)
1453 DO istate = 1, nrow_local
1454 DO jstate = 1, ncol_local
1458 zii = diag(row_indices(istate), idim)
1459 zjj = diag(col_indices(jstate), idim)
1460 gradsq_ij = gradsq_ij + weights(idim)* &
1461 4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=
dp)
1463 matrix%local_data(istate, jstate) = gradsq_ij
1466 END SUBROUTINE gradsq_at_0
1473 SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1475 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1478 COMPLEX(KIND=dp) :: zii, zij, zjj
1479 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1480 INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1481 nrow_global, nrow_local
1482 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1483 REAL(kind=
dp) :: grad_ij
1487 ncol_local=ncol_local, nrow_global=nrow_global, &
1488 row_indices=row_indices, col_indices=col_indices)
1489 dim_m =
SIZE(matrix_p, 1)
1490 ALLOCATE (diag(nrow_global, dim_m))
1493 DO istate = 1, nrow_global
1498 DO istate = 1, nrow_local
1499 DO jstate = 1, ncol_local
1503 zii = diag(row_indices(istate), idim)
1504 zjj = diag(col_indices(jstate), idim)
1505 zij = matrix_p(idim)%local_data(istate, jstate)
1506 grad_ij = grad_ij + weights(idim)* &
1507 REAL(4.0_dp*conjg(zij)*(zjj - zii),
dp)
1509 matrix%local_data(istate, jstate) = grad_ij
1513 END SUBROUTINE grad_at_0
1523 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1524 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1526 REAL(kind=
dp) :: tolerance,
value
1528 COMPLEX(KIND=dp) :: kii, kij, kjj
1529 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1530 INTEGER :: idim, istate, jstate, ncol_local, &
1531 nrow_global, nrow_local
1532 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1533 REAL(kind=
dp) :: grad_ij, ra, rb
1537 ncol_local=ncol_local, nrow_global=nrow_global, &
1538 row_indices=row_indices, col_indices=col_indices)
1539 ALLOCATE (diag(nrow_global,
SIZE(zij, 2)))
1541 DO idim = 1,
SIZE(zij, 2)
1542 DO istate = 1, nrow_global
1545 diag(istate, idim) = cmplx(ra, rb,
dp)
1546 value =
value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1550 DO istate = 1, nrow_local
1551 DO jstate = 1, ncol_local
1553 DO idim = 1,
SIZE(zij, 2)
1554 kii = diag(row_indices(istate), idim)
1555 kjj = diag(col_indices(jstate), idim)
1556 ra = zij(1, idim)%local_data(istate, jstate)
1557 rb = zij(2, idim)%local_data(istate, jstate)
1558 kij = cmplx(ra, rb,
dp)
1559 grad_ij = grad_ij + weights(idim)* &
1560 REAL(4.0_dp*conjg(kij)*(kjj - kii),
dp)
1562 tolerance = max(abs(grad_ij), tolerance)
1565 CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1569 END SUBROUTINE check_tolerance_new
1585 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1586 eps_localization, iterations, converged)
1587 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1588 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
1589 INTEGER,
INTENT(IN) :: max_iter
1590 REAL(kind=
dp),
INTENT(IN) :: max_crazy_angle
1591 REAL(kind=
dp) :: crazy_scale
1592 LOGICAL :: crazy_use_diag
1593 REAL(kind=
dp),
INTENT(IN) :: eps_localization
1594 INTEGER :: iterations
1595 LOGICAL,
INTENT(out),
OPTIONAL :: converged
1597 CHARACTER(len=*),
PARAMETER :: routinen =
'crazy_rotations'
1598 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1599 czero = (0.0_dp, 0.0_dp)
1601 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: evals_exp
1602 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag_z
1603 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
1604 INTEGER :: dim2, handle, i, icol, idim, irow, &
1605 method, ncol_global, ncol_local, &
1606 norder, nrow_global, nrow_local, &
1608 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1610 REAL(kind=
dp) :: eps_exp, limit_crazy_angle, maxeval, &
1611 norm, ra, rb, theta, tolerance,
value
1612 REAL(kind=
dp),
DIMENSION(:),
POINTER :: evals
1614 TYPE(
cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1616 CALL timeset(routinen, handle)
1617 NULLIFY (row_indices, col_indices)
1619 ncol_global=ncol_global, &
1620 row_indices=row_indices, col_indices=col_indices, &
1621 nrow_local=nrow_local, ncol_local=ncol_local)
1623 limit_crazy_angle = max_crazy_angle
1625 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1627 ALLOCATE (diag_z(nrow_global, dim2))
1628 ALLOCATE (evals(nrow_global))
1629 ALLOCATE (evals_exp(nrow_global))
1643 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1651 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1652 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1653 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1654 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1657 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1661 IF (cmat_a%matrix_struct%para_env%is_source())
THEN
1663 WRITE (unit_nr,
'(T2,A7,A6,1X,A20,A12,A12,A12)') &
1664 "CRAZY| ",
"Iter",
"value ",
"gradient",
"Max. eval",
"limit"
1671 iterations = iterations + 1
1673 DO i = 1, nrow_global
1676 diag_z(i, idim) = cmplx(ra, rb,
dp)
1679 DO irow = 1, nrow_local
1680 DO icol = 1, ncol_local
1682 ra = zij(1, idim)%local_data(irow, icol)
1683 rb = zij(2, idim)%local_data(irow, icol)
1684 mij(idim) = cmplx(ra, rb,
dp)
1685 mii(idim) = diag_z(row_indices(irow), idim)
1686 mjj(idim) = diag_z(col_indices(icol), idim)
1688 IF (row_indices(irow) .NE. col_indices(icol))
THEN
1689 CALL get_angle(mii, mjj, mij, weights, theta)
1690 theta = crazy_scale*theta
1691 IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
1692 IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
1693 IF (crazy_use_diag)
THEN
1694 cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta,
dp)
1696 mat_theta%local_data(irow, icol) = -theta
1699 IF (crazy_use_diag)
THEN
1700 cmat_a%local_data(irow, icol) = czero
1702 mat_theta%local_data(irow, icol) = 0.0_dp
1710 IF (crazy_use_diag)
THEN
1712 maxeval = maxval(abs(evals))
1713 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1716 CALL parallel_gemm(
'N',
'C', nrow_global, nrow_global, nrow_global, cone, &
1717 cmat_t1, cmat_r, czero, cmat_a)
1718 mat_u%local_data = real(cmat_a%local_data, kind=
dp)
1722 eps_exp = 1.0_dp*epsilon(eps_exp)
1731 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1732 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1733 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1734 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1737 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1740 CALL check_tolerance_new(weights, zij, tolerance,
value)
1742 IF (unit_nr > 0)
THEN
1743 WRITE (unit_nr,
'(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1744 "CRAZY| ", iterations,
value, tolerance, maxeval, limit_crazy_angle
1747 IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter)
EXIT
1750 IF (
PRESENT(converged)) converged = (tolerance .LT. eps_localization)
1763 DEALLOCATE (evals_exp, evals, diag_z)
1764 DEALLOCATE (mii, mij, mjj)
1766 CALL timestop(handle)
1784 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1785 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1786 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
1787 INTEGER,
INTENT(IN) :: max_iter
1788 REAL(kind=
dp),
INTENT(IN) :: eps_localization
1789 INTEGER :: iterations
1791 CHARACTER(len=*),
PARAMETER :: routinen =
'direct_mini'
1792 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1793 czero = (0.0_dp, 0.0_dp)
1794 REAL(kind=
dp),
PARAMETER :: gold_sec = 0.3819_dp
1796 COMPLEX(KIND=dp) :: lk, ll, tmp
1797 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: evals_exp
1798 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag_z
1799 INTEGER :: handle, i, icol, idim, irow, &
1800 line_search_count, line_searches, lsl, &
1801 lsm, lsr, n, ncol_local, ndim, &
1802 nrow_local, output_unit
1803 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1804 LOGICAL :: new_direction
1805 REAL(kind=
dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1806 fb, fc, nom, normg, normg_cross, &
1807 normg_old, npos, omega, tol, val, x0, &
1809 REAL(kind=
dp),
DIMENSION(150) :: energy, grad, pos
1810 REAL(kind=
dp),
DIMENSION(:),
POINTER :: evals, fval, fvald
1811 TYPE(
cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1813 TYPE(
cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij
1814 TYPE(
cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1815 matrix_g_search, matrix_h, matrix_r, &
1818 NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1820 CALL timeset(routinen, handle)
1823 n = zij(1, 1)%matrix_struct%nrow_global
1824 ndim = (
SIZE(zij, 2))
1826 IF (output_unit > 0)
THEN
1827 WRITE (output_unit,
'(T4,A )')
"Localization by direct minimization of the functional; "
1828 WRITE (output_unit,
'(T5,2A13,A20,A20,A10 )')
" Line search ",
" Iteration ",
" Functional ",
" Tolerance ",
" ds Min "
1831 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1832 ALLOCATE (c_zij(ndim))
1837 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1838 zij(2, idim)%local_data,
dp)
1845 CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1846 CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1861 CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1862 row_indices=row_indices, col_indices=col_indices)
1866 normg_old = 1.0e30_dp
1868 new_direction = .true.
1871 line_search_count = 0
1873 iterations = iterations + 1
1875 cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data,
dp)
1877 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1880 CALL parallel_gemm(
'N',
'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1881 cmat_u%local_data = real(cmat_u%local_data, kind=
dp)
1883 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5)
THEN
1885 CALL parallel_gemm(
'N',
'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1886 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1889 matrix_h%local_data = real(cmat_u%local_data, kind=
dp)
1890 CALL parallel_gemm(
'N',
'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1898 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1901 normg_old = 1.0e30_dp
1908 CALL parallel_gemm(
'N',
'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1909 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2)
1914 fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1915 fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1917 fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1918 fvald(i) = -weights(idim)
1920 omega = omega + fval(i)
1922 DO icol = 1, ncol_local
1923 DO irow = 1, nrow_local
1924 tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1925 cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1926 + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=
dp)
1933 CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1939 DO icol = 1, ncol_local
1940 DO irow = 1, nrow_local
1941 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1942 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1943 IF (abs(ll - lk) .LT. 0.5_dp)
THEN
1945 cmat_b%local_data(irow, icol) = 0.0_dp
1947 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1948 tmp = tmp*(ll - lk)/(i + 1)
1950 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1952 cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1958 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1)
1959 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2)
1961 CALL parallel_gemm(
'N',
'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1962 CALL parallel_gemm(
'N',
'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1963 matrix_g%local_data = real(cmat_t1%local_data, kind=
dp)
1969 IF (new_direction)
THEN
1971 line_searches = line_searches + 1
1980 IF (output_unit > 0)
THEN
1981 WRITE (output_unit,
'(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
1984 IF (tol < eps_localization .OR. iterations > max_iter)
EXIT
1987 CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
1988 normg_cross = normg_cross*0.5_dp
1990 DO icol = 1, ncol_local
1991 DO irow = 1, nrow_local
1992 matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
1996 normg = normg*0.5_dp
1997 beta_pr = (normg - normg_cross)/normg_old
1999 beta_pr = max(beta_pr, 0.0_dp)
2001 CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2002 IF (normg_cross .GE. 0)
THEN
2003 IF (matrix_a%matrix_struct%para_env%is_source())
THEN
2013 line_search_count = 0
2015 line_search_count = line_search_count + 1
2016 energy(line_search_count) = omega
2021 SELECT CASE (line_search_count)
2025 CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2026 grad(1) = grad(1)/2.0_dp
2027 new_direction = .false.
2029 new_direction = .true.
2034 a = (energy(2) - b*x1 - c)/(x1**2)
2035 IF (a .LE. 0.0_dp) a = 1.0e-15_dp
2036 npos = -b/(2.0_dp*a)
2037 val = a*npos**2 + b*npos + c
2038 IF (val .LT. energy(1) .AND. val .LE. energy(2))
THEN
2041 pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2043 pos(3) = maxval(pos(1:2))*2.0_dp
2047 SELECT CASE (line_search_count)
2049 new_direction = .false.
2051 pos(2) = ds_min*0.8_dp
2053 new_direction = .false.
2054 IF (energy(2) .GT. energy(1))
THEN
2055 pos(3) = ds_min*0.7_dp
2057 pos(3) = ds_min*1.4_dp
2060 new_direction = .true.
2067 nom = (xb - xa)**2*(fb - fc) - (xb -
xc)**2*(fb - fa)
2068 denom = (xb - xa)*(fb - fc) - (xb -
xc)*(fb - fa)
2069 IF (abs(denom) .LE. 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa)))
THEN
2072 npos = xb - 0.5_dp*nom/denom
2074 val = (npos - xa)*(npos - xb)*fc/((
xc - xa)*(
xc - xb)) + &
2075 (npos - xb)*(npos -
xc)*fa/((xa - xb)*(xa -
xc)) + &
2076 (npos -
xc)*(npos - xa)*fb/((xb -
xc)*(xb - xa))
2077 IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc)
THEN
2079 pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2080 min(npos, maxval(pos(1:3))*4.0_dp))
2082 pos(4) = maxval(pos(1:3))*2.0_dp
2086 new_direction = .false.
2087 IF (line_search_count .EQ. 1)
THEN
2092 pos(2) = ds_min/gold_sec
2094 IF (line_search_count .EQ. 150) cpabort(
"Too many")
2095 IF (lsr .EQ. 0)
THEN
2096 IF (energy(line_search_count - 1) .LT. energy(line_search_count))
THEN
2097 lsr = line_search_count
2098 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2101 lsm = line_search_count
2102 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2105 IF (pos(line_search_count) .LT. pos(lsm))
THEN
2106 IF (energy(line_search_count) .LT. energy(lsm))
THEN
2108 lsm = line_search_count
2110 lsl = line_search_count
2113 IF (energy(line_search_count) .LT. energy(lsm))
THEN
2115 lsm = line_search_count
2117 lsr = line_search_count
2120 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl))
THEN
2121 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2123 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2125 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr))
THEN
2126 new_direction = .true.
2132 ds_min = pos(line_search_count + 1)
2133 ds = pos(line_search_count + 1) - pos(line_search_count)
2138 matrix_h%local_data = real(cmat_u%local_data, kind=
dp)
2139 CALL parallel_gemm(
'N',
'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2158 DEALLOCATE (evals, evals_exp, fval, fvald)
2160 DO idim = 1,
SIZE(c_zij)
2161 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
2162 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2168 CALL timestop(handle)
2189 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2190 sweeps, out_each, target_time, start_time, restricted)
2192 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2193 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
2195 INTEGER,
INTENT(IN) :: max_iter
2196 REAL(kind=
dp),
INTENT(IN) :: eps_localization
2198 INTEGER,
INTENT(IN) :: out_each
2199 REAL(
dp) :: target_time, start_time
2200 INTEGER :: restricted
2202 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rot_para'
2204 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2205 nblock, nblock_max, ns_me, nstate, &
2207 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ns_bound
2208 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2209 REAL(kind=
dp) :: xstate
2211 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2213 CALL timeset(routinen, handle)
2226 IF (restricted > 0)
THEN
2227 IF (output_unit > 0)
THEN
2228 WRITE (output_unit,
'(T4,A,I2,A )')
"JACOBI: for the ROKS method, the last ", restricted,
" orbitals DO NOT ROTATE"
2230 nstate = nstate - restricted
2234 xstate = real(nstate,
dp)/real(para_env%num_pe,
dp)
2235 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2236 DO ip = 1, para_env%num_pe
2237 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2238 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2241 DO ip = 0, para_env%num_pe - 1
2242 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2243 nblock_max = max(nblock_max, nblock)
2247 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2248 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2249 ALLOCATE (cz_ij_loc(dim2))
2251 DO ip = 0, para_env%num_pe - 1
2252 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2255 IF (para_env%mepos == ip)
THEN
2256 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2259 cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i),
dp)
2265 DEALLOCATE (z_ij_loc_re)
2266 DEALLOCATE (z_ij_loc_im)
2268 ALLOCATE (rotmat(nstate, 2*nblock_max))
2270 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2271 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2272 target_time=target_time, start_time=start_time)
2274 ilow1 = ns_bound(para_env%mepos, 1)
2275 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2276 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2277 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2279 DO ip = 0, para_env%num_pe - 1
2280 z_ij_loc_re = 0.0_dp
2281 z_ij_loc_im = 0.0_dp
2282 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2283 IF (ip == para_env%mepos)
THEN
2288 z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i),
dp)
2289 z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2293 CALL para_env%bcast(z_ij_loc_re, ip)
2294 CALL para_env%bcast(z_ij_loc_im, ip)
2300 DO ip = 0, para_env%num_pe - 1
2301 z_ij_loc_re = 0.0_dp
2302 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2303 IF (ip == para_env%mepos)
THEN
2308 z_ij_loc_re(j, i) = rotmat(j, i)
2312 CALL para_env%bcast(z_ij_loc_re, ip)
2316 DEALLOCATE (z_ij_loc_re)
2317 DEALLOCATE (z_ij_loc_im)
2319 DEALLOCATE (cz_ij_loc(idim)%c_array)
2321 DEALLOCATE (cz_ij_loc)
2323 CALL para_env%sync()
2328 DEALLOCATE (ns_bound)
2330 CALL timestop(handle)
2332 END SUBROUTINE jacobi_rot_para
2344 SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2346 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2349 INTEGER,
INTENT(IN) :: max_iter
2351 REAL(
dp),
INTENT(OUT),
OPTIONAL :: tol_out
2353 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rot_para_1'
2355 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :) :: czij_array
2356 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2357 nblock, nblock_max, ns_me, nstate, &
2359 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ns_bound
2360 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: rotmat, z_ij_loc_re
2361 REAL(kind=
dp) :: xstate
2362 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2364 CALL timeset(routinen, handle)
2373 xstate = real(nstate,
dp)/real(para_env%num_pe,
dp)
2374 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2375 DO ip = 1, para_env%num_pe
2376 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2377 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2380 DO ip = 0, para_env%num_pe - 1
2381 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2382 nblock_max = max(nblock_max, nblock)
2386 ALLOCATE (czij_array(nstate, nblock_max))
2387 ALLOCATE (cz_ij_loc(dim2))
2389 DO ip = 0, para_env%num_pe - 1
2390 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2393 IF (para_env%mepos == ip)
THEN
2395 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2398 cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2404 DEALLOCATE (czij_array)
2406 ALLOCATE (rotmat(nstate, 2*nblock_max))
2408 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2409 cz_ij_loc, rotmat, 0, tol_out=tol_out)
2411 ilow1 = ns_bound(para_env%mepos, 1)
2412 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2413 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2415 DO ip = 0, para_env%num_pe - 1
2416 z_ij_loc_re = 0.0_dp
2417 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2418 IF (ip == para_env%mepos)
THEN
2423 z_ij_loc_re(j, i) = rotmat(j, i)
2427 CALL para_env%bcast(z_ij_loc_re, ip)
2431 DEALLOCATE (z_ij_loc_re)
2433 DEALLOCATE (cz_ij_loc(idim)%c_array)
2435 DEALLOCATE (cz_ij_loc)
2437 CALL para_env%sync()
2440 DEALLOCATE (ns_bound)
2442 CALL timestop(handle)
2444 END SUBROUTINE jacobi_rot_para_1
2468 SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2469 ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2471 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2473 INTEGER,
INTENT(IN) :: max_iter
2474 INTEGER,
INTENT(OUT) :: sweeps
2475 INTEGER,
INTENT(IN) :: out_each, dim2, nstate, nblock_max
2476 INTEGER,
DIMENSION(0:, :),
INTENT(IN) :: ns_bound
2477 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2478 REAL(
dp),
DIMENSION(:, :),
INTENT(OUT) :: rotmat
2479 INTEGER,
INTENT(IN) :: output_unit
2480 REAL(
dp),
INTENT(OUT),
OPTIONAL :: tol_out
2481 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: eps_localization
2482 REAL(
dp),
OPTIONAL :: target_time, start_time
2484 COMPLEX(KIND=dp) :: zi, zj
2485 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: c_array_me, c_array_partner
2486 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
2487 INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2488 ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2489 iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2490 ns_recv_from, ns_recv_partner
2491 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: rcount, rdispl
2492 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: list_pair
2493 LOGICAL :: should_stop
2494 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2495 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: rmat_recv_all
2496 REAL(kind=
dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2497 t2, theta, tolerance, zc, zr
2498 TYPE(set_c_1d_type),
DIMENSION(:),
POINTER :: zdiag_all, zdiag_me
2499 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: xyz_mix, xyz_mix_ns
2501 NULLIFY (zdiag_all, zdiag_me)
2502 NULLIFY (xyz_mix, xyz_mix_ns)
2503 NULLIFY (mii, mij, mjj)
2505 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2507 ALLOCATE (rcount(para_env%num_pe), stat=istat)
2508 ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2510 tolerance = 1.0e10_dp
2514 npair = (para_env%num_pe + 1)/2
2515 nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2516 ALLOCATE (list_pair(2, npair))
2520 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2521 ii = i - ns_bound(para_env%mepos, 1) + 1
2522 rotmat(i, ii) = 1.0_dp
2525 ALLOCATE (xyz_mix(dim2))
2526 ALLOCATE (xyz_mix_ns(dim2))
2527 ALLOCATE (zdiag_me(dim2))
2528 ALLOCATE (zdiag_all(dim2))
2530 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2531 IF (ns_me /= 0)
THEN
2532 ALLOCATE (c_array_me(nstate, ns_me, dim2))
2534 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2536 ALLOCATE (gmat(nstate, ns_me))
2540 ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2541 zdiag_me(idim)%c_array =
z_zero
2542 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2543 zdiag_all(idim)%c_array =
z_zero
2545 ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2546 ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2549 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2551 IF (output_unit > 0)
THEN
2552 WRITE (output_unit,
'(T4,A )')
" Localization by iterative distributed Jacobi rotation"
2553 WRITE (output_unit,
'(T20,A12,T32, A22,T60, A12,A8 )')
"Iteration",
"Functional",
"Tolerance",
" Time "
2556 DO lsweep = 1, max_iter + 1
2558 IF (sweeps == max_iter + 1)
THEN
2559 IF (output_unit > 0)
THEN
2560 WRITE (output_unit, *)
' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2561 WRITE (output_unit, *)
' Present Max. gradient = ', tolerance
2570 CALL eberlein(iperm, para_env, list_pair)
2574 IF (list_pair(1, ipair) == para_env%mepos)
THEN
2575 ip_partner = list_pair(2, ipair)
2577 ELSE IF (list_pair(2, ipair) == para_env%mepos)
THEN
2578 ip_partner = list_pair(1, ipair)
2582 IF (ip_partner >= 0)
THEN
2583 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2589 IF (ns_partner*ns_me /= 0)
THEN
2591 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2593 DO i = 1, ns_me + ns_partner
2594 rmat_loc(i, i) = 1.0_dp
2597 ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2600 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2602 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2606 CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2607 msgout=c_array_partner, source=ip_partner)
2611 ilow1 = ns_bound(para_env%mepos, 1)
2612 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2613 ilow2 = ns_bound(ip_partner, 1)
2614 iup2 = ns_bound(ip_partner, 1) + n2 - 1
2615 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1))
THEN
2631 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2632 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2635 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2636 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2640 DO istate = 1, n1 + n2
2641 DO jstate = istate + 1, n1 + n2
2643 mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2644 mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2645 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2647 CALL get_angle(mii, mjj, mij, weights, theta)
2652 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2653 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2654 xyz_mix(idim)%c_array(i, istate) = zi
2655 xyz_mix(idim)%c_array(i, jstate) = zj
2658 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2659 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2660 xyz_mix(idim)%c_array(istate, i) = zi
2661 xyz_mix(idim)%c_array(jstate, i) = zj
2666 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2667 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2668 rmat_loc(i, istate) = ri
2669 rmat_loc(i, jstate) = rj
2675 CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2676 rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2678 IF (ilow1 < ilow2)
THEN
2682 CALL dgemm(
"N",
"N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2683 n2, 0.0_dp, gmat(:, :), nstate)
2684 CALL dgemm(
"N",
"N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2685 n1 + n2, 1.0_dp, gmat(:, :), nstate)
2687 CALL dgemm(
"N",
"N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2688 rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2692 CALL dgemm(
"N",
"N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2693 n1, 1.0_dp, gmat(:, :), nstate)
2696 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2700 xyz_mix_ns(idim)%c_array(1:nstate, i) =
z_zero
2704 DO jstate = 1, nstate
2706 xyz_mix_ns(idim)%c_array(jstate, istate) = &
2707 xyz_mix_ns(idim)%c_array(jstate, istate) + &
2708 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2713 DO jstate = 1, nstate
2715 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2716 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2722 DEALLOCATE (c_array_partner)
2727 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2734 cz_ij_loc(idim)%c_array(1:nstate, i) =
z_zero
2738 IF (ns_partner*ns_me /= 0)
THEN
2740 DO i = 1, ns_me + ns_partner
2741 DO j = i + 1, ns_me + ns_partner
2743 rmat_loc(i, j) = rmat_loc(j, i)
2750 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2754 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2761 CALL para_env%allgather(rmat_send, rmat_recv_all)
2764 DO ip = 0, para_env%num_pe - 1
2766 ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2767 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2769 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2771 IF (ns_me /= 0)
THEN
2772 IF (ns_recv_from /= 0)
THEN
2774 ip_recv_partner = -1
2777 IF (list_pair(1, ipair) == ip_recv_from)
THEN
2778 ip_recv_partner = list_pair(2, ipair)
2780 ELSE IF (list_pair(2, ipair) == ip_recv_from)
THEN
2781 ip_recv_partner = list_pair(1, ipair)
2786 IF (ip_recv_partner >= 0)
THEN
2787 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2789 IF (ns_recv_partner > 0)
THEN
2790 il1 = ns_bound(para_env%mepos, 1)
2791 il_recv = ns_bound(ip_recv_from, 1)
2792 il_recv_partner = ns_bound(ip_recv_partner, 1)
2796 DO i = 1, ns_recv_from
2797 ii = il_recv + i - 1
2800 DO k = 1, ns_recv_from
2801 kk = il_recv + k - 1
2802 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2803 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2807 DO i = 1, ns_recv_from
2808 ii = il_recv + i - 1
2811 DO k = 1, ns_recv_partner
2812 kk = il_recv_partner + k - 1
2813 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2814 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2820 il1 = ns_bound(para_env%mepos, 1)
2821 il_recv = ns_bound(ip_recv_from, 1)
2825 DO i = 1, ns_recv_from
2826 ii = il_recv + i - 1
2827 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2836 IF (ns_partner*ns_me /= 0)
THEN
2837 DEALLOCATE (rmat_loc)
2839 DEALLOCATE (xyz_mix(idim)%c_array)
2847 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2848 ii = i - ns_bound(para_env%mepos, 1) + 1
2849 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2850 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2852 rcount(:) =
SIZE(zdiag_me(idim)%c_array)
2854 DO ip = 2, para_env%num_pe
2855 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2858 CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2862 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2863 k = j - ns_bound(para_env%mepos, 1) + 1
2866 DO ip = 0, para_env%num_pe - 1
2867 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2))
THEN
2872 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2874 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2877 zi = zdiag_all(idim)%c_array(ii)
2878 zj = zdiag_all(idim)%c_array(jj)
2879 grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi),
dp)
2881 gmax = max(gmax, abs(grad))
2885 CALL para_env%max(gmax)
2887 IF (
PRESENT(tol_out)) tol_out = tolerance
2890 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2891 k = i - ns_bound(para_env%mepos, 1) + 1
2893 zr = real(cz_ij_loc(idim)%c_array(i, k),
dp)
2894 zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2895 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/
twopi/
twopi
2898 CALL para_env%sum(func)
2901 IF (output_unit > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
2902 WRITE (output_unit,
'(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2905 IF (
PRESENT(eps_localization))
THEN
2906 IF (tolerance < eps_localization)
EXIT
2908 IF (
PRESENT(target_time) .AND.
PRESENT(start_time))
THEN
2909 CALL external_control(should_stop,
"LOC", target_time=target_time, start_time=start_time)
2910 IF (should_stop)
EXIT
2916 DEALLOCATE (rmat_recv_all)
2918 DEALLOCATE (rmat_recv)
2919 DEALLOCATE (rmat_send)
2921 DEALLOCATE (c_array_me)
2924 DEALLOCATE (zdiag_me(idim)%c_array)
2925 DEALLOCATE (zdiag_all(idim)%c_array)
2927 DEALLOCATE (zdiag_me)
2928 DEALLOCATE (zdiag_all)
2929 DEALLOCATE (xyz_mix)
2931 IF (ns_me /= 0)
THEN
2932 DEALLOCATE (xyz_mix_ns(idim)%c_array)
2935 DEALLOCATE (xyz_mix_ns)
2936 IF (ns_me /= 0)
THEN
2942 DEALLOCATE (list_pair)
2944 END SUBROUTINE jacobi_rot_para_core
2952 SUBROUTINE eberlein(iperm, para_env, list_pair)
2953 INTEGER,
INTENT(IN) :: iperm
2955 INTEGER,
DIMENSION(:, :) :: list_pair
2957 INTEGER :: i, ii, jj, npair
2959 npair = (para_env%num_pe + 1)/2
2960 IF (iperm == 1)
THEN
2962 DO i = 0, para_env%num_pe - 1
2963 ii = ((i + 1) + 1)/2
2964 jj = mod((i + 1) + 1, 2) + 1
2965 list_pair(jj, ii) = i
2967 IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2968 ELSEIF (mod(iperm, 2) == 0)
THEN
2970 jj = list_pair(1, npair)
2972 list_pair(1, i) = list_pair(1, i - 1)
2974 list_pair(1, 2) = list_pair(2, 1)
2975 list_pair(2, 1) = jj
2978 jj = list_pair(2, 1)
2980 list_pair(2, i) = list_pair(2, i + 1)
2982 list_pair(2, npair) = jj
2985 END SUBROUTINE eberlein
2996 TYPE(
dbcsr_p_type),
DIMENSION(:, :),
POINTER :: op_sm_set
2997 TYPE(
cp_fm_type),
DIMENSION(:, :),
INTENT(IN) :: zij_fm_set
2999 CHARACTER(len=*),
PARAMETER :: routinen =
'zij_matrix'
3001 INTEGER :: handle, i, j, nao, nmoloc
3004 CALL timeset(routinen, handle)
3007 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3012 DO i = 1,
SIZE(zij_fm_set, 2)
3013 DO j = 1,
SIZE(zij_fm_set, 1)
3016 CALL parallel_gemm(
"T",
"N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3022 CALL timestop(handle)
3034 CHARACTER(len=*),
PARAMETER :: routinen =
'scdm_qrfact'
3036 INTEGER :: handle, ncolt, nrowt
3037 REAL(kind=
dp),
DIMENSION(:),
POINTER :: tau
3041 CALL timeset(routinen, handle)
3044 nrowt = vectors%matrix_struct%ncol_global
3045 ncolt = vectors%matrix_struct%nrow_global
3048 nrow_global=nrowt, ncol_global=ncolt)
3052 ALLOCATE (tau(nrowt))
3061 context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3062 ncol_global=ctp%matrix_struct%nrow_global)
3074 CALL parallel_gemm(
'N',
'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3082 CALL timestop(handle)
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
methods related to the blacs parallel environment
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Defines the basic variable types.
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
real(kind=dp), parameter, public twopi
complex(kind=dp), parameter, public z_zero
Routines for calculating a complex matrix exponential.
subroutine, public exp_pade_real(exp_h, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public approx_l1_norm_sd(c, iterations, eps, converged, sweeps)
...
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.
Type defining parameters related to the simulation cell.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Represent a complex full matrix.
keeps the information about the structure of a full matrix
stores all the informations relevant to an mpi environment