66#include "./base/base_uses.f90"
73 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'qs_localization_methods'
78 COMPLEX(KIND=dp),
POINTER,
DIMENSION(:) :: c_array => null()
82 COMPLEX(KIND=dp),
POINTER,
DIMENSION(:, :) :: c_array => null()
96 INTEGER,
INTENT(IN) :: iterations
97 REAL(kind=
dp),
INTENT(IN) :: eps
98 LOGICAL,
INTENT(INOUT) :: converged
99 INTEGER,
INTENT(INOUT) :: sweeps
101 CHARACTER(len=*),
PARAMETER :: routinen =
'approx_l1_norm_sd'
102 INTEGER,
PARAMETER :: taylor_order = 100
103 REAL(kind=
dp),
PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
105 INTEGER :: handle, i, istep, k, n, ncol_local, &
106 nrow_local, output_unit, p
107 REAL(kind=
dp) :: expfactor, f2, f2old, gnorm, tnorm
113 CALL timeset(routinen, handle)
115 NULLIFY (context, para_env, fm_struct_k_k)
120 nrow_local=nrow_local, ncol_local=ncol_local, &
121 para_env=para_env, context=context)
123 nrow_global=k, ncol_global=k)
131 IF (output_unit > 0)
THEN
132 WRITE (output_unit,
'(1X)')
133 WRITE (output_unit,
'(2X,A)')
'-----------------------------------------------------------------------------'
134 WRITE (output_unit,
'(A,I5)')
' Nbr iterations =', iterations
135 WRITE (output_unit,
'(A,E10.2)')
' eps convergence =', eps
136 WRITE (output_unit,
'(A,I5)')
' Max Taylor order =', taylor_order
137 WRITE (output_unit,
'(A,E10.2)')
' f2 eps =', f2_eps
138 WRITE (output_unit,
'(A,E10.2)')
' alpha =', alpha
139 WRITE (output_unit,
'(A)')
' iteration approx_l1_norm g_norm rel_err'
146 DO istep = 1, iterations
154 f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
157 CALL c%matrix_struct%para_env%sum(f2)
164 ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
167 CALL parallel_gemm(
'T',
'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
188 IF (tnorm .GT. 1.0e-10_dp)
THEN
191 DO i = 2, taylor_order
193 CALL parallel_gemm(
'N',
'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
196 expfactor = expfactor/real(i, kind=
dp)
200 IF (tnorm*expfactor .LT. 1.0e-10_dp)
EXIT
205 CALL parallel_gemm(
'N',
'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
209 IF (output_unit .GT. 0)
THEN
210 WRITE (output_unit,
'(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
216 IF (abs((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1)
THEN
226 IF (output_unit .GT. 0)
WRITE (output_unit,
'(A,E16.10)')
' sparseness function f2 = ', f2
252 CALL timestop(handle)
263 REAL(kind=
dp),
DIMENSION(:) :: weights
265 REAL(kind=
dp),
DIMENSION(3, 3) :: metric
267 cpassert(
ASSOCIATED(cell))
270 CALL dgemm(
'T',
'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
272 weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
273 weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
274 weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
275 weights(4) = metric(1, 2)
276 weights(5) = metric(1, 3)
277 weights(6) = metric(2, 3)
299 eps_localization, sweeps, out_each, target_time, start_time, restricted)
301 REAL(kind=
dp),
INTENT(IN) :: weights(:)
302 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
304 INTEGER,
INTENT(IN) :: max_iter
305 REAL(kind=
dp),
INTENT(IN) :: eps_localization
307 INTEGER,
INTENT(IN) :: out_each
308 REAL(
dp) :: target_time, start_time
309 INTEGER :: restricted
311 IF (para_env%num_pe == 1)
THEN
312 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
313 sweeps, out_each, restricted=restricted)
315 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
316 sweeps, out_each, target_time, start_time, restricted=restricted)
333 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
334 out_each, restricted)
335 REAL(kind=
dp),
INTENT(IN) :: weights(:)
336 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
337 INTEGER,
INTENT(IN) :: max_iter
338 REAL(kind=
dp),
INTENT(IN) :: eps_localization
340 INTEGER,
INTENT(IN) :: out_each
341 INTEGER :: restricted
343 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rotations_serial'
345 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
346 INTEGER :: dim2, handle, idim, istate, jstate, &
348 REAL(kind=
dp) :: ct, st, t1, t2, theta, tolerance
350 TYPE(
cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij
353 CALL timeset(routinen, handle)
356 ALLOCATE (c_zij(dim2))
357 NULLIFY (mii, mij, mjj)
358 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
367 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
368 zij(2, idim)%local_data,
dp)
372 tolerance = 1.0e10_dp
376 IF (rmat%matrix_struct%para_env%is_source())
THEN
378 WRITE (unit_nr,
'(T4,A )')
" Localization by iterative Jacobi rotation"
381 IF (restricted > 0)
THEN
383 WRITE (unit_nr,
'(T4,A,I2,A )')
"JACOBI: for the ROKS method, the last ", restricted,
" orbitals DO NOT ROTATE"
384 nstate = nstate - restricted
388 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
392 DO istate = 1, nstate
393 DO jstate = istate + 1, nstate
399 CALL get_angle(mii, mjj, mij, weights, theta)
402 CALL rotate_zij(istate, jstate, st, ct, c_zij)
404 CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
408 CALL check_tolerance(c_zij, weights, tolerance)
411 IF (unit_nr > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
412 WRITE (unit_nr,
'(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
413 "Iteration:", sweeps,
"Tolerance:", tolerance,
"Time:", t2 - t1
420 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
421 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
425 DEALLOCATE (mii, mij, mjj)
426 rmat%local_data = real(c_rmat%local_data,
dp)
431 CALL timestop(handle)
433 END SUBROUTINE jacobi_rotations_serial
447 SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
448 tol_out, jsweeps, out_each, c_zij_out, grad_final)
449 REAL(kind=
dp),
INTENT(IN) :: weights(:)
451 INTEGER,
INTENT(IN) :: max_iter
453 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: eps_localization
454 REAL(kind=
dp),
INTENT(OUT),
OPTIONAL :: tol_out
455 INTEGER,
INTENT(OUT),
OPTIONAL :: jsweeps
456 INTEGER,
INTENT(IN),
OPTIONAL :: out_each
457 TYPE(
cp_cfm_type),
INTENT(IN),
OPTIONAL :: c_zij_out(:)
458 TYPE(
cp_fm_type),
INTENT(OUT),
OPTIONAL,
POINTER :: grad_final
460 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rotations_serial_1'
462 COMPLEX(KIND=dp) :: mzii
463 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
464 INTEGER :: dim2, handle, idim, istate, jstate, &
465 nstate, sweeps, unit_nr
466 REAL(kind=
dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
467 sum_spread_ii, t1, t2, theta, tolerance
471 CALL timeset(routinen, handle)
474 NULLIFY (mii, mij, mjj)
475 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
477 ALLOCATE (c_zij_local(dim2))
479 CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
481 CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
482 c_zij_local(idim)%local_data = c_zij(idim)%local_data
486 tolerance = 1.0e10_dp
488 IF (
PRESENT(grad_final))
CALL cp_fm_set_all(grad_final, 0.0_dp)
491 IF (
PRESENT(out_each))
THEN
493 IF (c_rmat_local%matrix_struct%para_env%is_source())
THEN
498 alpha = alpha + weights(idim)
503 DO WHILE (sweeps < max_iter)
505 IF (
PRESENT(eps_localization))
THEN
506 IF (tolerance < eps_localization)
EXIT
510 DO istate = 1, nstate
511 DO jstate = istate + 1, nstate
517 CALL get_angle(mii, mjj, mij, weights, theta)
520 CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
522 CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
526 IF (
PRESENT(grad_final))
THEN
527 CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
529 CALL check_tolerance(c_zij_local, weights, tolerance)
531 IF (
PRESENT(tol_out)) tol_out = tolerance
533 IF (
PRESENT(out_each))
THEN
535 IF (unit_nr > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
536 sum_spread_ii = 0.0_dp
537 DO istate = 1, nstate
541 spread_ii = spread_ii + weights(idim)* &
544 sum_spread_ii = sum_spread_ii + spread_ii
546 sum_spread_ii = alpha*nstate/
twopi/
twopi - sum_spread_ii
547 avg_spread_ii = sum_spread_ii/nstate
548 WRITE (unit_nr,
'(T4,A,T26,A,T48,A,T64,A)') &
549 "Iteration",
"Avg. Spread_ii",
"Tolerance",
"Time"
550 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
551 sweeps, avg_spread_ii, tolerance, t2 - t1
554 IF (
PRESENT(jsweeps)) jsweeps = sweeps
559 IF (
PRESENT(c_zij_out))
THEN
566 DEALLOCATE (mii, mij, mjj)
570 DEALLOCATE (c_zij_local)
573 CALL timestop(handle)
575 END SUBROUTINE jacobi_rotations_serial_1
594 iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
596 REAL(kind=
dp),
INTENT(IN) :: weights(:)
597 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
598 INTEGER,
INTENT(IN) :: max_iter
599 REAL(kind=
dp),
INTENT(IN) :: eps_localization
601 INTEGER,
INTENT(IN) :: out_each, nextra
602 LOGICAL,
INTENT(IN) :: do_cg
603 INTEGER,
INTENT(IN),
OPTIONAL :: nmo
604 TYPE(
cp_fm_type),
INTENT(IN),
OPTIONAL :: vectors_2, mos_guess
606 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_cg_edf_ls'
607 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
608 czero = (0.0_dp, 0.0_dp)
609 REAL(kind=
dp),
PARAMETER :: gold_sec = 0.3819_dp
611 COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
612 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp_cmat
613 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: arr_zii
614 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: matrix_zii
615 INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
616 lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
617 INTEGER,
DIMENSION(1) :: iloc
618 LOGICAL :: do_cinit_mo, do_cinit_random, &
619 do_u_guess_mo, new_direction
620 REAL(kind=
dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
621 norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
622 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: sum_spread
623 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp_mat, tmp_mat_1
624 REAL(kind=
dp),
DIMENSION(50) :: energy, pos
625 REAL(kind=
dp),
DIMENSION(:),
POINTER :: tmp_arr
627 TYPE(
cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
628 grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
629 tmp_cfm_2, u, ul, v, vl, zdiag
630 TYPE(
cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij, zij_0
632 TYPE(
cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
633 matrix_v_all, rmat, tmp_fm, vectors_all
635 CALL timeset(routinen, handle)
639 NULLIFY (matrix_zii, arr_zii)
640 NULLIFY (tmp_fm_struct)
643 ALLOCATE (c_zij(dim2))
647 ALLOCATE (sum_spread(nstate))
648 ALLOCATE (matrix_zii(nstate, dim2))
654 alpha = alpha + weights(idim)
656 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
657 zij(2, idim)%local_data,
dp)
660 ALLOCATE (zij_0(dim2))
670 IF (
PRESENT(mos_guess))
THEN
671 do_cinit_random = .false.
675 do_cinit_random = .true.
676 do_cinit_mo = .false.
680 IF (do_cinit_random)
THEN
682 do_u_guess_mo = .false.
683 ELSEIF (do_cinit_mo)
THEN
685 do_u_guess_mo = .true.
688 nocc = nstate - nextra
690 norextra = nmo - nstate
693 ALLOCATE (tmp_cmat(nstate, nstate))
695 para_env=para_env, context=context)
703 DEALLOCATE (tmp_cmat)
706 para_env=para_env, context=context)
716 para_env=para_env, context=context)
721 ALLOCATE (arr_zii(nstate))
724 para_env=para_env, context=context)
735 para_env=para_env, context=context)
741 para_env=para_env, context=context)
749 para_env=para_env, context=context)
755 para_env=para_env, context=context)
758 ALLOCATE (tmp_mat(nao, nstate))
762 ALLOCATE (tmp_mat(nao, norextra))
773 CALL ortho_vectors(tmp_fm)
774 c_tilde%local_data = tmp_fm%local_data
776 ALLOCATE (tmp_cmat(northo, nextra))
779 DEALLOCATE (tmp_cmat)
781 CALL parallel_gemm(
"T",
"N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
782 ALLOCATE (tmp_arr(nmo))
783 ALLOCATE (tmp_mat(nmo, ndummy))
784 ALLOCATE (tmp_mat_1(nmo, nstate))
787 DO istate = 1, ndummy
788 tmp_arr(:) = tmp_mat(:, istate)
789 norm = norm2(tmp_arr)
790 tmp_arr(:) = tmp_arr(:)/norm
791 tmp_mat(:, istate) = tmp_arr(:)
796 DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
798 ALLOCATE (tmp_mat(northo, ndummy))
799 ALLOCATE (tmp_mat_1(northo, nextra))
801 ALLOCATE (tmp_arr(ndummy))
803 DO istate = 1, ndummy
804 tmp_arr(istate) = norm2(tmp_mat(:, istate))
807 DO istate = 1, nextra
808 iloc = maxloc(tmp_arr)
809 tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
810 tmp_arr(iloc(1)) = 0.0_dp
813 DEALLOCATE (tmp_arr, tmp_mat)
816 para_env=para_env, context=context)
820 DEALLOCATE (tmp_mat_1)
821 CALL ortho_vectors(tmp_fm)
825 IF (do_u_guess_mo)
THEN
826 ALLOCATE (tmp_cmat(nocc, nstate))
829 DEALLOCATE (tmp_cmat)
830 ALLOCATE (tmp_cmat(northo, nstate))
833 DEALLOCATE (tmp_cmat)
834 CALL parallel_gemm(
"C",
"N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
835 ALLOCATE (tmp_cmat(nextra, nstate))
838 DEALLOCATE (tmp_cmat)
840 tmp_fm%local_data = real(u%local_data, kind=
dp)
841 CALL ortho_vectors(tmp_fm)
847 ALLOCATE (tmp_cmat(nocc, nstate))
850 DEALLOCATE (tmp_cmat)
851 ALLOCATE (tmp_cmat(nextra, nstate))
854 DEALLOCATE (tmp_cmat)
855 CALL parallel_gemm(
"N",
"N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
856 ALLOCATE (tmp_cmat(northo, nstate))
859 DEALLOCATE (tmp_cmat)
871 IF (rmat%matrix_struct%para_env%is_source())
THEN
873 WRITE (unit_nr,
'(T4,A )')
" Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
876 norm2_old = 1.0e30_dp
878 new_direction = .true.
881 line_search_count = 0
890 DO WHILE (iter < max_iter)
899 IF (para_env%num_pe == 1)
THEN
900 CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
902 CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
904 CALL parallel_gemm(
'N',
'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
911 ALLOCATE (tmp_cmat(nextra, nstate))
914 DEALLOCATE (tmp_cmat)
918 tmp_fm%local_data = real(c_tilde%local_data, kind=
dp)
919 CALL ortho_vectors(tmp_fm)
923 ALLOCATE (tmp_cmat(nocc, nstate))
926 DEALLOCATE (tmp_cmat)
927 CALL parallel_gemm(
"N",
"N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
928 ALLOCATE (tmp_cmat(northo, nstate))
931 DEALLOCATE (tmp_cmat)
935 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5)
THEN
938 norm2_old = 1.0e30_dp
954 CALL parallel_gemm(
"N",
"N", ndummy, nstate, ndummy, cone, zij_0(idim), &
955 tmp_cfm, czero, tmp_cfm_1)
957 CALL parallel_gemm(
"C",
"N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
963 DO istate = 1, nstate
967 spread_ii = spread_ii + weights(idim)* &
969 matrix_zii(istate, idim) = mzii
972 sum_spread(istate) = spread_ii
974 CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
985 ALLOCATE (tmp_cmat(northo, nstate))
987 weight = weights(idim)
988 arr_zii = matrix_zii(:, idim)
991 zij_0(idim), v, czero, tmp_cfm)
995 CALL parallel_gemm(
"N",
"C", nmo, nstate, nstate, cone, tmp_cfm, &
1003 zij_0(idim), v, czero, tmp_cfm)
1008 CALL parallel_gemm(
"N",
"C", nmo, nstate, nstate, cone, tmp_cfm, &
1009 u, czero, tmp_cfm_1)
1016 DEALLOCATE (tmp_cmat)
1017 ALLOCATE (tmp_cmat(northo, nextra))
1019 northo, nextra, .false.)
1022 DEALLOCATE (tmp_cmat)
1024 CALL parallel_gemm(
"C",
"N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1027 CALL parallel_gemm(
"N",
"N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1031 IF (nextra > 0)
THEN
1042 IF (nextra > 0)
THEN
1044 IF (new_direction)
THEN
1045 line_searches = line_searches + 1
1046 IF (mintol > tol)
THEN
1051 IF (unit_nr > 0 .AND.
modulo(iter, out_each) == 0)
THEN
1052 sum_spread_ii = alpha*nstate/
twopi/
twopi - spread_sum
1053 avg_spread_ii = sum_spread_ii/nstate
1054 WRITE (unit_nr,
'(T4,A,T26,A,T48,A)') &
1055 "Iteration",
"Avg. Spread_ii",
"Tolerance"
1056 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4)') &
1057 iter, avg_spread_ii, tol
1060 IF (tol < eps_localization)
EXIT
1064 cnorm2_gct_cross = czero
1065 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1066 norm2_gct_cross = real(cnorm2_gct_cross, kind=
dp)
1067 gct_old%local_data = grad_ctilde%local_data
1069 norm2_gct = real(cnorm2_gct, kind=
dp)
1071 beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1072 norm2_old = norm2_gct
1073 beta = max(0.0_dp, beta_pr)
1079 norm2_gct_cross = real(cnorm2_gct_cross, kind=
dp)
1080 IF (norm2_gct_cross .LE. 0.0_dp)
THEN
1086 line_search_count = 0
1089 line_search_count = line_search_count + 1
1091 energy(line_search_count) = spread_sum
1094 new_direction = .false.
1095 IF (line_search_count .EQ. 1)
THEN
1100 pos(2) = ds_min/gold_sec
1103 IF (line_search_count .EQ. 50)
THEN
1104 IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp)
THEN
1105 cpwarn(
"Line search failed to converge properly")
1107 new_direction = .true.
1108 ds = pos(line_search_count)
1109 line_search_count = 0
1111 cpabort(
"No. of line searches exceeds 50")
1114 IF (lsr .EQ. 0)
THEN
1115 IF (energy(line_search_count - 1) .GT. energy(line_search_count))
THEN
1116 lsr = line_search_count
1117 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1120 lsm = line_search_count
1121 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1124 IF (pos(line_search_count) .LT. pos(lsm))
THEN
1125 IF (energy(line_search_count) .GT. energy(lsm))
THEN
1127 lsm = line_search_count
1129 lsl = line_search_count
1132 IF (energy(line_search_count) .GT. energy(lsm))
THEN
1134 lsm = line_search_count
1136 lsr = line_search_count
1139 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl))
THEN
1140 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1142 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1144 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr))
THEN
1145 new_direction = .true.
1150 ds = pos(line_search_count + 1) - pos(line_search_count)
1152 IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1))
THEN
1153 new_direction = .true.
1154 ds_min = 0.5_dp/alpha
1155 ELSEIF (abs(ds) > 10.0_dp)
THEN
1156 new_direction = .true.
1157 ds_min = 0.5_dp/alpha
1159 ds_min = pos(line_search_count + 1)
1166 IF (mintol > tol)
THEN
1170 IF (unit_nr > 0 .AND.
modulo(iter, out_each) == 0)
THEN
1171 sum_spread_ii = alpha*nstate/
twopi/
twopi - spread_sum
1172 avg_spread_ii = sum_spread_ii/nstate
1173 WRITE (unit_nr,
'(T4,A,T26,A,T48,A)') &
1174 "Iteration",
"Avg. Spread_ii",
"Tolerance"
1175 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4)') &
1176 iter, avg_spread_ii, tol
1179 IF (tol < eps_localization)
EXIT
1184 IF ((unit_nr > 0) .AND. (iter == max_iter))
THEN
1185 WRITE (unit_nr,
'(T4,A,T4,A)')
"Min. Itr.",
"Min. Tol."
1186 WRITE (unit_nr,
'(T4,I7,T4,E12.4)') miniter, mintol
1192 IF (nextra > 0)
THEN
1193 rmat%local_data = real(v%local_data, kind=
dp)
1194 CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1209 DEALLOCATE (arr_zii)
1211 rmat%local_data = matrix_u%local_data
1220 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
1221 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1228 DEALLOCATE (matrix_zii, sum_spread)
1230 CALL timestop(handle)
1238 SUBROUTINE ortho_vectors(vmatrix)
1242 CHARACTER(LEN=*),
PARAMETER :: routinen =
'ortho_vectors'
1244 INTEGER :: handle, n, ncol
1248 CALL timeset(routinen, handle)
1250 NULLIFY (fm_struct_tmp)
1252 CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1255 para_env=vmatrix%matrix_struct%para_env, &
1256 context=vmatrix%matrix_struct%context)
1257 CALL cp_fm_create(overlap_vv, fm_struct_tmp,
"overlap_vv")
1260 CALL parallel_gemm(
'T',
'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1266 CALL timestop(handle)
1268 END SUBROUTINE ortho_vectors
1278 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1279 INTEGER,
INTENT(IN) :: istate, jstate
1280 REAL(kind=
dp),
INTENT(IN) :: st, ct
1287 DO id = 1,
SIZE(zij, 1)
1292 END SUBROUTINE rotate_zij
1301 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1302 INTEGER,
INTENT(IN) :: istate, jstate
1303 REAL(kind=
dp),
INTENT(IN) :: st, ct
1308 END SUBROUTINE rotate_rmat
1319 SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1320 COMPLEX(KIND=dp),
POINTER :: mii(:), mjj(:), mij(:)
1321 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1322 REAL(kind=
dp),
INTENT(OUT) :: theta
1323 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: grad_ij, step
1325 COMPLEX(KIND=dp) :: z11, z12, z22
1326 INTEGER :: dim_m, idim
1327 REAL(kind=
dp) :: a12, b12, d2, ratio
1336 a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=
dp)
1337 b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1338 0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=
dp)
1340 IF (abs(b12) > 1.e-10_dp)
THEN
1342 theta = 0.25_dp*atan(ratio)
1343 ELSEIF (abs(b12) < 1.e-10_dp)
THEN
1349 IF (
PRESENT(grad_ij)) theta = theta + step*grad_ij
1351 d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1352 IF (d2 <= 0._dp)
THEN
1353 IF (theta > 0.0_dp)
THEN
1354 theta = theta - 0.25_dp*
pi
1356 theta = theta + 0.25_dp*
pi
1359 END SUBROUTINE get_angle
1367 SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1369 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1370 REAL(kind=
dp),
INTENT(OUT) :: tolerance
1371 TYPE(
cp_fm_type),
INTENT(OUT),
OPTIONAL :: grad
1373 CHARACTER(len=*),
PARAMETER :: routinen =
'check_tolerance'
1378 CALL timeset(routinen, handle)
1384 CALL grad_at_0(zij, weights, force)
1389 CALL timestop(handle)
1391 END SUBROUTINE check_tolerance
1398 TYPE(
cp_fm_type),
INTENT(IN) :: rmat, vectors
1405 CALL parallel_gemm(
"N",
"N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1415 SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1416 TYPE(
cp_fm_type),
INTENT(IN) :: rmat, vec_all, vectors
1425 CALL parallel_gemm(
"N",
"N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1428 END SUBROUTINE rotate_orbitals_edf
1436 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1437 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1438 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1440 INTEGER,
INTENT(IN) :: ndim
1442 COMPLEX(KIND=dp) :: zii, zjj
1443 INTEGER :: idim, istate, jstate, ncol_local, &
1444 nrow_global, nrow_local
1445 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1446 REAL(kind=
dp) :: gradsq_ij
1449 ncol_local=ncol_local, nrow_global=nrow_global, &
1450 row_indices=row_indices, col_indices=col_indices)
1452 DO istate = 1, nrow_local
1453 DO jstate = 1, ncol_local
1457 zii = diag(row_indices(istate), idim)
1458 zjj = diag(col_indices(jstate), idim)
1459 gradsq_ij = gradsq_ij + weights(idim)* &
1460 4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=
dp)
1462 matrix%local_data(istate, jstate) = gradsq_ij
1465 END SUBROUTINE gradsq_at_0
1472 SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1474 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1477 COMPLEX(KIND=dp) :: zii, zij, zjj
1478 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1479 INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1480 nrow_global, nrow_local
1481 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1482 REAL(kind=
dp) :: grad_ij
1486 ncol_local=ncol_local, nrow_global=nrow_global, &
1487 row_indices=row_indices, col_indices=col_indices)
1488 dim_m =
SIZE(matrix_p, 1)
1489 ALLOCATE (diag(nrow_global, dim_m))
1492 DO istate = 1, nrow_global
1497 DO istate = 1, nrow_local
1498 DO jstate = 1, ncol_local
1502 zii = diag(row_indices(istate), idim)
1503 zjj = diag(col_indices(jstate), idim)
1504 zij = matrix_p(idim)%local_data(istate, jstate)
1505 grad_ij = grad_ij + weights(idim)* &
1506 REAL(4.0_dp*conjg(zij)*(zjj - zii),
dp)
1508 matrix%local_data(istate, jstate) = grad_ij
1512 END SUBROUTINE grad_at_0
1522 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1523 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1525 REAL(kind=
dp) :: tolerance,
value
1527 COMPLEX(KIND=dp) :: kii, kij, kjj
1528 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1529 INTEGER :: idim, istate, jstate, ncol_local, &
1530 nrow_global, nrow_local
1531 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1532 REAL(kind=
dp) :: grad_ij, ra, rb
1536 ncol_local=ncol_local, nrow_global=nrow_global, &
1537 row_indices=row_indices, col_indices=col_indices)
1538 ALLOCATE (diag(nrow_global,
SIZE(zij, 2)))
1540 DO idim = 1,
SIZE(zij, 2)
1541 DO istate = 1, nrow_global
1544 diag(istate, idim) = cmplx(ra, rb,
dp)
1545 value =
value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1549 DO istate = 1, nrow_local
1550 DO jstate = 1, ncol_local
1552 DO idim = 1,
SIZE(zij, 2)
1553 kii = diag(row_indices(istate), idim)
1554 kjj = diag(col_indices(jstate), idim)
1555 ra = zij(1, idim)%local_data(istate, jstate)
1556 rb = zij(2, idim)%local_data(istate, jstate)
1557 kij = cmplx(ra, rb,
dp)
1558 grad_ij = grad_ij + weights(idim)* &
1559 REAL(4.0_dp*conjg(kij)*(kjj - kii),
dp)
1561 tolerance = max(abs(grad_ij), tolerance)
1564 CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1568 END SUBROUTINE check_tolerance_new
1584 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1585 eps_localization, iterations, converged)
1586 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1587 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
1588 INTEGER,
INTENT(IN) :: max_iter
1589 REAL(kind=
dp),
INTENT(IN) :: max_crazy_angle
1590 REAL(kind=
dp) :: crazy_scale
1591 LOGICAL :: crazy_use_diag
1592 REAL(kind=
dp),
INTENT(IN) :: eps_localization
1593 INTEGER :: iterations
1594 LOGICAL,
INTENT(out),
OPTIONAL :: converged
1596 CHARACTER(len=*),
PARAMETER :: routinen =
'crazy_rotations'
1597 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1598 czero = (0.0_dp, 0.0_dp)
1600 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: evals_exp
1601 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag_z
1602 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
1603 INTEGER :: dim2, handle, i, icol, idim, irow, &
1604 method, ncol_global, ncol_local, &
1605 norder, nrow_global, nrow_local, &
1607 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1609 REAL(kind=
dp) :: eps_exp, limit_crazy_angle, maxeval, &
1610 norm, ra, rb, theta, tolerance,
value
1611 REAL(kind=
dp),
DIMENSION(:),
POINTER :: evals
1613 TYPE(
cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1615 CALL timeset(routinen, handle)
1616 NULLIFY (row_indices, col_indices)
1618 ncol_global=ncol_global, &
1619 row_indices=row_indices, col_indices=col_indices, &
1620 nrow_local=nrow_local, ncol_local=ncol_local)
1622 limit_crazy_angle = max_crazy_angle
1624 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1626 ALLOCATE (diag_z(nrow_global, dim2))
1627 ALLOCATE (evals(nrow_global))
1628 ALLOCATE (evals_exp(nrow_global))
1642 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1650 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1651 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1652 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1653 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1656 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1660 IF (cmat_a%matrix_struct%para_env%is_source())
THEN
1662 WRITE (unit_nr,
'(T2,A7,A6,1X,A20,A12,A12,A12)') &
1663 "CRAZY| ",
"Iter",
"value ",
"gradient",
"Max. eval",
"limit"
1670 iterations = iterations + 1
1672 DO i = 1, nrow_global
1675 diag_z(i, idim) = cmplx(ra, rb,
dp)
1678 DO irow = 1, nrow_local
1679 DO icol = 1, ncol_local
1681 ra = zij(1, idim)%local_data(irow, icol)
1682 rb = zij(2, idim)%local_data(irow, icol)
1683 mij(idim) = cmplx(ra, rb,
dp)
1684 mii(idim) = diag_z(row_indices(irow), idim)
1685 mjj(idim) = diag_z(col_indices(icol), idim)
1687 IF (row_indices(irow) .NE. col_indices(icol))
THEN
1688 CALL get_angle(mii, mjj, mij, weights, theta)
1689 theta = crazy_scale*theta
1690 IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
1691 IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
1692 IF (crazy_use_diag)
THEN
1693 cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta,
dp)
1695 mat_theta%local_data(irow, icol) = -theta
1698 IF (crazy_use_diag)
THEN
1699 cmat_a%local_data(irow, icol) = czero
1701 mat_theta%local_data(irow, icol) = 0.0_dp
1709 IF (crazy_use_diag)
THEN
1711 maxeval = maxval(abs(evals))
1712 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1715 CALL parallel_gemm(
'N',
'C', nrow_global, nrow_global, nrow_global, cone, &
1716 cmat_t1, cmat_r, czero, cmat_a)
1717 mat_u%local_data = real(cmat_a%local_data, kind=
dp)
1721 eps_exp = 1.0_dp*epsilon(eps_exp)
1730 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1731 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1732 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1733 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1736 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1739 CALL check_tolerance_new(weights, zij, tolerance,
value)
1741 IF (unit_nr > 0)
THEN
1742 WRITE (unit_nr,
'(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1743 "CRAZY| ", iterations,
value, tolerance, maxeval, limit_crazy_angle
1746 IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter)
EXIT
1749 IF (
PRESENT(converged)) converged = (tolerance .LT. eps_localization)
1762 DEALLOCATE (evals_exp, evals, diag_z)
1763 DEALLOCATE (mii, mij, mjj)
1765 CALL timestop(handle)
1783 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1784 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1785 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
1786 INTEGER,
INTENT(IN) :: max_iter
1787 REAL(kind=
dp),
INTENT(IN) :: eps_localization
1788 INTEGER :: iterations
1790 CHARACTER(len=*),
PARAMETER :: routinen =
'direct_mini'
1791 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1792 czero = (0.0_dp, 0.0_dp)
1793 REAL(kind=
dp),
PARAMETER :: gold_sec = 0.3819_dp
1795 COMPLEX(KIND=dp) :: lk, ll, tmp
1796 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: evals_exp
1797 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag_z
1798 INTEGER :: handle, i, icol, idim, irow, &
1799 line_search_count, line_searches, lsl, &
1800 lsm, lsr, n, ncol_local, ndim, &
1801 nrow_local, output_unit
1802 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1803 LOGICAL :: new_direction
1804 REAL(kind=
dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1805 fb, fc, nom, normg, normg_cross, &
1806 normg_old, npos, omega, tol, val, x0, &
1808 REAL(kind=
dp),
DIMENSION(150) :: energy, grad, pos
1809 REAL(kind=
dp),
DIMENSION(:),
POINTER :: evals, fval, fvald
1810 TYPE(
cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1812 TYPE(
cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij
1813 TYPE(
cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1814 matrix_g_search, matrix_h, matrix_r, &
1817 NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1819 CALL timeset(routinen, handle)
1822 n = zij(1, 1)%matrix_struct%nrow_global
1823 ndim = (
SIZE(zij, 2))
1825 IF (output_unit > 0)
THEN
1826 WRITE (output_unit,
'(T4,A )')
"Localization by direct minimization of the functional; "
1827 WRITE (output_unit,
'(T5,2A13,A20,A20,A10 )')
" Line search ",
" Iteration ",
" Functional ",
" Tolerance ",
" ds Min "
1830 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1831 ALLOCATE (c_zij(ndim))
1836 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1837 zij(2, idim)%local_data,
dp)
1844 CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1845 CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1860 CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1861 row_indices=row_indices, col_indices=col_indices)
1865 normg_old = 1.0e30_dp
1867 new_direction = .true.
1870 line_search_count = 0
1872 iterations = iterations + 1
1874 cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data,
dp)
1876 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1879 CALL parallel_gemm(
'N',
'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1880 cmat_u%local_data = real(cmat_u%local_data, kind=
dp)
1882 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5)
THEN
1884 CALL parallel_gemm(
'N',
'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1885 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1888 matrix_h%local_data = real(cmat_u%local_data, kind=
dp)
1889 CALL parallel_gemm(
'N',
'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1897 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1900 normg_old = 1.0e30_dp
1907 CALL parallel_gemm(
'N',
'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1908 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2)
1913 fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1914 fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1916 fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1917 fvald(i) = -weights(idim)
1919 omega = omega + fval(i)
1921 DO icol = 1, ncol_local
1922 DO irow = 1, nrow_local
1923 tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1924 cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1925 + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=
dp)
1932 CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1938 DO icol = 1, ncol_local
1939 DO irow = 1, nrow_local
1940 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1941 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1942 IF (abs(ll - lk) .LT. 0.5_dp)
THEN
1944 cmat_b%local_data(irow, icol) = 0.0_dp
1946 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1947 tmp = tmp*(ll - lk)/(i + 1)
1949 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1951 cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1957 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1)
1958 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2)
1960 CALL parallel_gemm(
'N',
'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1961 CALL parallel_gemm(
'N',
'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1962 matrix_g%local_data = real(cmat_t1%local_data, kind=
dp)
1968 IF (new_direction)
THEN
1970 line_searches = line_searches + 1
1979 IF (output_unit > 0)
THEN
1980 WRITE (output_unit,
'(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
1983 IF (tol < eps_localization .OR. iterations > max_iter)
EXIT
1986 CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
1987 normg_cross = normg_cross*0.5_dp
1989 DO icol = 1, ncol_local
1990 DO irow = 1, nrow_local
1991 matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
1995 normg = normg*0.5_dp
1996 beta_pr = (normg - normg_cross)/normg_old
1998 beta_pr = max(beta_pr, 0.0_dp)
2000 CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2001 IF (normg_cross .GE. 0)
THEN
2002 IF (matrix_a%matrix_struct%para_env%is_source())
THEN
2012 line_search_count = 0
2014 line_search_count = line_search_count + 1
2015 energy(line_search_count) = omega
2020 SELECT CASE (line_search_count)
2024 CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2025 grad(1) = grad(1)/2.0_dp
2026 new_direction = .false.
2028 new_direction = .true.
2033 a = (energy(2) - b*x1 - c)/(x1**2)
2034 IF (a .LE. 0.0_dp) a = 1.0e-15_dp
2035 npos = -b/(2.0_dp*a)
2036 val = a*npos**2 + b*npos + c
2037 IF (val .LT. energy(1) .AND. val .LE. energy(2))
THEN
2040 pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2042 pos(3) = maxval(pos(1:2))*2.0_dp
2046 SELECT CASE (line_search_count)
2048 new_direction = .false.
2050 pos(2) = ds_min*0.8_dp
2052 new_direction = .false.
2053 IF (energy(2) .GT. energy(1))
THEN
2054 pos(3) = ds_min*0.7_dp
2056 pos(3) = ds_min*1.4_dp
2059 new_direction = .true.
2066 nom = (xb - xa)**2*(fb - fc) - (xb -
xc)**2*(fb - fa)
2067 denom = (xb - xa)*(fb - fc) - (xb -
xc)*(fb - fa)
2068 IF (abs(denom) .LE. 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa)))
THEN
2071 npos = xb - 0.5_dp*nom/denom
2073 val = (npos - xa)*(npos - xb)*fc/((
xc - xa)*(
xc - xb)) + &
2074 (npos - xb)*(npos -
xc)*fa/((xa - xb)*(xa -
xc)) + &
2075 (npos -
xc)*(npos - xa)*fb/((xb -
xc)*(xb - xa))
2076 IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc)
THEN
2078 pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2079 min(npos, maxval(pos(1:3))*4.0_dp))
2081 pos(4) = maxval(pos(1:3))*2.0_dp
2085 new_direction = .false.
2086 IF (line_search_count .EQ. 1)
THEN
2091 pos(2) = ds_min/gold_sec
2093 IF (line_search_count .EQ. 150) cpabort(
"Too many")
2094 IF (lsr .EQ. 0)
THEN
2095 IF (energy(line_search_count - 1) .LT. energy(line_search_count))
THEN
2096 lsr = line_search_count
2097 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2100 lsm = line_search_count
2101 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2104 IF (pos(line_search_count) .LT. pos(lsm))
THEN
2105 IF (energy(line_search_count) .LT. energy(lsm))
THEN
2107 lsm = line_search_count
2109 lsl = line_search_count
2112 IF (energy(line_search_count) .LT. energy(lsm))
THEN
2114 lsm = line_search_count
2116 lsr = line_search_count
2119 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl))
THEN
2120 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2122 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2124 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr))
THEN
2125 new_direction = .true.
2131 ds_min = pos(line_search_count + 1)
2132 ds = pos(line_search_count + 1) - pos(line_search_count)
2137 matrix_h%local_data = real(cmat_u%local_data, kind=
dp)
2138 CALL parallel_gemm(
'N',
'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2157 DEALLOCATE (evals, evals_exp, fval, fvald)
2159 DO idim = 1,
SIZE(c_zij)
2160 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
2161 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2167 CALL timestop(handle)
2188 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2189 sweeps, out_each, target_time, start_time, restricted)
2191 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2192 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
2194 INTEGER,
INTENT(IN) :: max_iter
2195 REAL(kind=
dp),
INTENT(IN) :: eps_localization
2197 INTEGER,
INTENT(IN) :: out_each
2198 REAL(
dp) :: target_time, start_time
2199 INTEGER :: restricted
2201 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rot_para'
2203 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2204 nblock, nblock_max, ns_me, nstate, &
2206 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ns_bound
2207 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2208 REAL(kind=
dp) :: xstate
2210 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2212 CALL timeset(routinen, handle)
2225 IF (restricted > 0)
THEN
2226 IF (output_unit > 0)
THEN
2227 WRITE (output_unit,
'(T4,A,I2,A )')
"JACOBI: for the ROKS method, the last ", restricted,
" orbitals DO NOT ROTATE"
2229 nstate = nstate - restricted
2233 xstate = real(nstate,
dp)/real(para_env%num_pe,
dp)
2234 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2235 DO ip = 1, para_env%num_pe
2236 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2237 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2240 DO ip = 0, para_env%num_pe - 1
2241 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2242 nblock_max = max(nblock_max, nblock)
2246 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2247 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2248 ALLOCATE (cz_ij_loc(dim2))
2250 DO ip = 0, para_env%num_pe - 1
2251 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2254 IF (para_env%mepos == ip)
THEN
2255 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2258 cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i),
dp)
2264 DEALLOCATE (z_ij_loc_re)
2265 DEALLOCATE (z_ij_loc_im)
2267 ALLOCATE (rotmat(nstate, 2*nblock_max))
2269 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2270 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2271 target_time=target_time, start_time=start_time)
2273 ilow1 = ns_bound(para_env%mepos, 1)
2274 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2275 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2276 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2278 DO ip = 0, para_env%num_pe - 1
2279 z_ij_loc_re = 0.0_dp
2280 z_ij_loc_im = 0.0_dp
2281 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2282 IF (ip == para_env%mepos)
THEN
2287 z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i),
dp)
2288 z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2292 CALL para_env%bcast(z_ij_loc_re, ip)
2293 CALL para_env%bcast(z_ij_loc_im, ip)
2299 DO ip = 0, para_env%num_pe - 1
2300 z_ij_loc_re = 0.0_dp
2301 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2302 IF (ip == para_env%mepos)
THEN
2307 z_ij_loc_re(j, i) = rotmat(j, i)
2311 CALL para_env%bcast(z_ij_loc_re, ip)
2315 DEALLOCATE (z_ij_loc_re)
2316 DEALLOCATE (z_ij_loc_im)
2318 DEALLOCATE (cz_ij_loc(idim)%c_array)
2320 DEALLOCATE (cz_ij_loc)
2322 CALL para_env%sync()
2327 DEALLOCATE (ns_bound)
2329 CALL timestop(handle)
2331 END SUBROUTINE jacobi_rot_para
2343 SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2345 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2348 INTEGER,
INTENT(IN) :: max_iter
2350 REAL(
dp),
INTENT(OUT),
OPTIONAL :: tol_out
2352 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rot_para_1'
2354 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :) :: czij_array
2355 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2356 nblock, nblock_max, ns_me, nstate, &
2358 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ns_bound
2359 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: rotmat, z_ij_loc_re
2360 REAL(kind=
dp) :: xstate
2361 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2363 CALL timeset(routinen, handle)
2372 xstate = real(nstate,
dp)/real(para_env%num_pe,
dp)
2373 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2374 DO ip = 1, para_env%num_pe
2375 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2376 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2379 DO ip = 0, para_env%num_pe - 1
2380 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2381 nblock_max = max(nblock_max, nblock)
2385 ALLOCATE (czij_array(nstate, nblock_max))
2386 ALLOCATE (cz_ij_loc(dim2))
2388 DO ip = 0, para_env%num_pe - 1
2389 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2392 IF (para_env%mepos == ip)
THEN
2394 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2397 cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2403 DEALLOCATE (czij_array)
2405 ALLOCATE (rotmat(nstate, 2*nblock_max))
2407 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2408 cz_ij_loc, rotmat, 0, tol_out=tol_out)
2410 ilow1 = ns_bound(para_env%mepos, 1)
2411 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2412 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2414 DO ip = 0, para_env%num_pe - 1
2415 z_ij_loc_re = 0.0_dp
2416 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2417 IF (ip == para_env%mepos)
THEN
2422 z_ij_loc_re(j, i) = rotmat(j, i)
2426 CALL para_env%bcast(z_ij_loc_re, ip)
2430 DEALLOCATE (z_ij_loc_re)
2432 DEALLOCATE (cz_ij_loc(idim)%c_array)
2434 DEALLOCATE (cz_ij_loc)
2436 CALL para_env%sync()
2439 DEALLOCATE (ns_bound)
2441 CALL timestop(handle)
2443 END SUBROUTINE jacobi_rot_para_1
2467 SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2468 ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2470 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2472 INTEGER,
INTENT(IN) :: max_iter
2473 INTEGER,
INTENT(OUT) :: sweeps
2474 INTEGER,
INTENT(IN) :: out_each, dim2, nstate, nblock_max
2475 INTEGER,
DIMENSION(0:, :),
INTENT(IN) :: ns_bound
2476 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2477 REAL(
dp),
DIMENSION(:, :),
INTENT(OUT) :: rotmat
2478 INTEGER,
INTENT(IN) :: output_unit
2479 REAL(
dp),
INTENT(OUT),
OPTIONAL :: tol_out
2480 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: eps_localization
2481 REAL(
dp),
OPTIONAL :: target_time, start_time
2483 COMPLEX(KIND=dp) :: zi, zj
2484 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: c_array_me, c_array_partner
2485 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
2486 INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2487 ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2488 iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2489 ns_recv_from, ns_recv_partner
2490 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: rcount, rdispl
2491 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: list_pair
2492 LOGICAL :: should_stop
2493 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2494 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: rmat_recv_all
2495 REAL(kind=
dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2496 t2, theta, tolerance, zc, zr
2497 TYPE(set_c_1d_type),
DIMENSION(:),
POINTER :: zdiag_all, zdiag_me
2498 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: xyz_mix, xyz_mix_ns
2500 NULLIFY (zdiag_all, zdiag_me)
2501 NULLIFY (xyz_mix, xyz_mix_ns)
2502 NULLIFY (mii, mij, mjj)
2504 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2506 ALLOCATE (rcount(para_env%num_pe), stat=istat)
2507 ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2509 tolerance = 1.0e10_dp
2513 npair = (para_env%num_pe + 1)/2
2514 nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2515 ALLOCATE (list_pair(2, npair))
2519 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2520 ii = i - ns_bound(para_env%mepos, 1) + 1
2521 rotmat(i, ii) = 1.0_dp
2524 ALLOCATE (xyz_mix(dim2))
2525 ALLOCATE (xyz_mix_ns(dim2))
2526 ALLOCATE (zdiag_me(dim2))
2527 ALLOCATE (zdiag_all(dim2))
2529 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2530 IF (ns_me /= 0)
THEN
2531 ALLOCATE (c_array_me(nstate, ns_me, dim2))
2533 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2535 ALLOCATE (gmat(nstate, ns_me))
2539 ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2540 zdiag_me(idim)%c_array = cmplx(0.0_dp, 0.0_dp,
dp)
2541 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2542 zdiag_all(idim)%c_array = cmplx(0.0_dp, 0.0_dp,
dp)
2544 ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2545 ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2548 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2550 IF (output_unit > 0)
THEN
2551 WRITE (output_unit,
'(T4,A )')
" Localization by iterative distributed Jacobi rotation"
2552 WRITE (output_unit,
'(T20,A12,T32, A22,T60, A12,A8 )')
"Iteration",
"Functional",
"Tolerance",
" Time "
2555 DO lsweep = 1, max_iter + 1
2557 IF (sweeps == max_iter + 1)
THEN
2558 IF (output_unit > 0)
THEN
2559 WRITE (output_unit, *)
' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2560 WRITE (output_unit, *)
' Present Max. gradient = ', tolerance
2569 CALL eberlein(iperm, para_env, list_pair)
2573 IF (list_pair(1, ipair) == para_env%mepos)
THEN
2574 ip_partner = list_pair(2, ipair)
2576 ELSE IF (list_pair(2, ipair) == para_env%mepos)
THEN
2577 ip_partner = list_pair(1, ipair)
2581 IF (ip_partner >= 0)
THEN
2582 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2588 IF (ns_partner*ns_me /= 0)
THEN
2590 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2592 DO i = 1, ns_me + ns_partner
2593 rmat_loc(i, i) = 1.0_dp
2596 ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2599 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2601 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2605 CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2606 msgout=c_array_partner, source=ip_partner)
2610 ilow1 = ns_bound(para_env%mepos, 1)
2611 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2612 ilow2 = ns_bound(ip_partner, 1)
2613 iup2 = ns_bound(ip_partner, 1) + n2 - 1
2614 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1))
THEN
2630 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2631 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2634 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2635 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2639 DO istate = 1, n1 + n2
2640 DO jstate = istate + 1, n1 + n2
2642 mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2643 mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2644 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2646 CALL get_angle(mii, mjj, mij, weights, theta)
2651 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2652 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2653 xyz_mix(idim)%c_array(i, istate) = zi
2654 xyz_mix(idim)%c_array(i, jstate) = zj
2657 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2658 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2659 xyz_mix(idim)%c_array(istate, i) = zi
2660 xyz_mix(idim)%c_array(jstate, i) = zj
2665 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2666 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2667 rmat_loc(i, istate) = ri
2668 rmat_loc(i, jstate) = rj
2674 CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2675 rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2677 IF (ilow1 < ilow2)
THEN
2681 CALL dgemm(
"N",
"N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2682 n2, 0.0_dp, gmat(:, :), nstate)
2683 CALL dgemm(
"N",
"N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2684 n1 + n2, 1.0_dp, gmat(:, :), nstate)
2686 CALL dgemm(
"N",
"N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2687 rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2691 CALL dgemm(
"N",
"N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2692 n1, 1.0_dp, gmat(:, :), nstate)
2695 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2699 xyz_mix_ns(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp,
dp)
2703 DO jstate = 1, nstate
2705 xyz_mix_ns(idim)%c_array(jstate, istate) = &
2706 xyz_mix_ns(idim)%c_array(jstate, istate) + &
2707 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2712 DO jstate = 1, nstate
2714 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2715 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2721 DEALLOCATE (c_array_partner)
2726 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2733 cz_ij_loc(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp,
dp)
2737 IF (ns_partner*ns_me /= 0)
THEN
2739 DO i = 1, ns_me + ns_partner
2740 DO j = i + 1, ns_me + ns_partner
2742 rmat_loc(i, j) = rmat_loc(j, i)
2749 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2753 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2760 CALL para_env%allgather(rmat_send, rmat_recv_all)
2763 DO ip = 0, para_env%num_pe - 1
2765 ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2766 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2768 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2770 IF (ns_me /= 0)
THEN
2771 IF (ns_recv_from /= 0)
THEN
2773 ip_recv_partner = -1
2776 IF (list_pair(1, ipair) == ip_recv_from)
THEN
2777 ip_recv_partner = list_pair(2, ipair)
2779 ELSE IF (list_pair(2, ipair) == ip_recv_from)
THEN
2780 ip_recv_partner = list_pair(1, ipair)
2785 IF (ip_recv_partner >= 0)
THEN
2786 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2788 IF (ns_recv_partner > 0)
THEN
2789 il1 = ns_bound(para_env%mepos, 1)
2790 il_recv = ns_bound(ip_recv_from, 1)
2791 il_recv_partner = ns_bound(ip_recv_partner, 1)
2795 DO i = 1, ns_recv_from
2796 ii = il_recv + i - 1
2799 DO k = 1, ns_recv_from
2800 kk = il_recv + k - 1
2801 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2802 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2806 DO i = 1, ns_recv_from
2807 ii = il_recv + i - 1
2810 DO k = 1, ns_recv_partner
2811 kk = il_recv_partner + k - 1
2812 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2813 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2819 il1 = ns_bound(para_env%mepos, 1)
2820 il_recv = ns_bound(ip_recv_from, 1)
2824 DO i = 1, ns_recv_from
2825 ii = il_recv + i - 1
2826 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2835 IF (ns_partner*ns_me /= 0)
THEN
2836 DEALLOCATE (rmat_loc)
2838 DEALLOCATE (xyz_mix(idim)%c_array)
2846 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2847 ii = i - ns_bound(para_env%mepos, 1) + 1
2848 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2849 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2851 rcount(:) =
SIZE(zdiag_me(idim)%c_array)
2853 DO ip = 2, para_env%num_pe
2854 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2857 CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2861 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2862 k = j - ns_bound(para_env%mepos, 1) + 1
2865 DO ip = 0, para_env%num_pe - 1
2866 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2))
THEN
2871 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2873 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2876 zi = zdiag_all(idim)%c_array(ii)
2877 zj = zdiag_all(idim)%c_array(jj)
2878 grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi),
dp)
2880 gmax = max(gmax, abs(grad))
2884 CALL para_env%max(gmax)
2886 IF (
PRESENT(tol_out)) tol_out = tolerance
2889 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2890 k = i - ns_bound(para_env%mepos, 1) + 1
2892 zr = real(cz_ij_loc(idim)%c_array(i, k),
dp)
2893 zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2894 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/
twopi/
twopi
2897 CALL para_env%sum(func)
2900 IF (output_unit > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
2901 WRITE (output_unit,
'(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2904 IF (
PRESENT(eps_localization))
THEN
2905 IF (tolerance < eps_localization)
EXIT
2907 IF (
PRESENT(target_time) .AND.
PRESENT(start_time))
THEN
2908 CALL external_control(should_stop,
"LOC", target_time=target_time, start_time=start_time)
2909 IF (should_stop)
EXIT
2915 DEALLOCATE (rmat_recv_all)
2917 DEALLOCATE (rmat_recv)
2918 DEALLOCATE (rmat_send)
2920 DEALLOCATE (c_array_me)
2923 DEALLOCATE (zdiag_me(idim)%c_array)
2924 DEALLOCATE (zdiag_all(idim)%c_array)
2926 DEALLOCATE (zdiag_me)
2927 DEALLOCATE (zdiag_all)
2928 DEALLOCATE (xyz_mix)
2930 IF (ns_me /= 0)
THEN
2931 DEALLOCATE (xyz_mix_ns(idim)%c_array)
2934 DEALLOCATE (xyz_mix_ns)
2935 IF (ns_me /= 0)
THEN
2941 DEALLOCATE (list_pair)
2943 END SUBROUTINE jacobi_rot_para_core
2951 SUBROUTINE eberlein(iperm, para_env, list_pair)
2952 INTEGER,
INTENT(IN) :: iperm
2954 INTEGER,
DIMENSION(:, :) :: list_pair
2956 INTEGER :: i, ii, jj, npair
2958 npair = (para_env%num_pe + 1)/2
2959 IF (iperm == 1)
THEN
2961 DO i = 0, para_env%num_pe - 1
2962 ii = ((i + 1) + 1)/2
2963 jj = mod((i + 1) + 1, 2) + 1
2964 list_pair(jj, ii) = i
2966 IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2967 ELSEIF (mod(iperm, 2) == 0)
THEN
2969 jj = list_pair(1, npair)
2971 list_pair(1, i) = list_pair(1, i - 1)
2973 list_pair(1, 2) = list_pair(2, 1)
2974 list_pair(2, 1) = jj
2977 jj = list_pair(2, 1)
2979 list_pair(2, i) = list_pair(2, i + 1)
2981 list_pair(2, npair) = jj
2984 END SUBROUTINE eberlein
2995 TYPE(
dbcsr_p_type),
DIMENSION(:, :),
POINTER :: op_sm_set
2996 TYPE(
cp_fm_type),
DIMENSION(:, :),
INTENT(IN) :: zij_fm_set
2998 CHARACTER(len=*),
PARAMETER :: routinen =
'zij_matrix'
3000 INTEGER :: handle, i, j, nao, nmoloc
3003 CALL timeset(routinen, handle)
3006 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3011 DO i = 1,
SIZE(zij_fm_set, 2)
3012 DO j = 1,
SIZE(zij_fm_set, 1)
3015 CALL parallel_gemm(
"T",
"N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3021 CALL timestop(handle)
3033 CHARACTER(len=*),
PARAMETER :: routinen =
'scdm_qrfact'
3035 INTEGER :: handle, ncolt, nrowt
3036 REAL(kind=
dp),
DIMENSION(:),
POINTER :: tau
3040 CALL timeset(routinen, handle)
3043 nrowt = vectors%matrix_struct%ncol_global
3044 ncolt = vectors%matrix_struct%nrow_global
3047 nrow_global=nrowt, ncol_global=ncolt)
3051 ALLOCATE (tau(nrowt))
3060 context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3061 ncol_global=ctp%matrix_struct%nrow_global)
3073 CALL parallel_gemm(
'N',
'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3081 CALL timestop(handle)
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
methods related to the blacs parallel environment
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
basic linear algebra operations for full matrices
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Defines the basic variable types.
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
real(kind=dp), parameter, public twopi
Routines for calculating a complex matrix exponential.
subroutine, public exp_pade_real(exp_h, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public approx_l1_norm_sd(c, iterations, eps, converged, sweeps)
...
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.
Type defining parameters related to the simulation cell.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Represent a complex full matrix.
keeps the information about the structure of a full matrix
stores all the informations relevant to an mpi environment