73#include "./base/base_uses.f90"
80 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'qs_localization_methods'
85 COMPLEX(KIND=dp),
POINTER,
DIMENSION(:) :: c_array => null()
86 END TYPE set_c_1d_type
89 COMPLEX(KIND=dp),
POINTER,
DIMENSION(:, :) :: c_array => null()
90 END TYPE set_c_2d_type
103 INTEGER,
INTENT(IN) :: iterations
104 REAL(kind=
dp),
INTENT(IN) :: eps
105 LOGICAL,
INTENT(INOUT) :: converged
106 INTEGER,
INTENT(INOUT) :: sweeps
108 CHARACTER(len=*),
PARAMETER :: routinen =
'approx_l1_norm_sd'
109 INTEGER,
PARAMETER :: taylor_order = 100
110 REAL(kind=
dp),
PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
112 INTEGER :: handle, i, istep, k, n, ncol_local, &
113 nrow_local, output_unit, p
114 REAL(kind=
dp) :: expfactor, f2, f2old, gnorm, tnorm
120 CALL timeset(routinen, handle)
122 NULLIFY (context, para_env, fm_struct_k_k)
127 nrow_local=nrow_local, ncol_local=ncol_local, &
128 para_env=para_env, context=context)
130 nrow_global=k, ncol_global=k)
138 IF (output_unit > 0)
THEN
139 WRITE (output_unit,
'(1X)')
140 WRITE (output_unit,
'(2X,A)')
'-----------------------------------------------------------------------------'
141 WRITE (output_unit,
'(A,I5)')
' Nbr iterations =', iterations
142 WRITE (output_unit,
'(A,E10.2)')
' eps convergence =', eps
143 WRITE (output_unit,
'(A,I5)')
' Max Taylor order =', taylor_order
144 WRITE (output_unit,
'(A,E10.2)')
' f2 eps =', f2_eps
145 WRITE (output_unit,
'(A,E10.2)')
' alpha =', alpha
146 WRITE (output_unit,
'(A)')
' iteration approx_l1_norm g_norm rel_err'
153 DO istep = 1, iterations
161 f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
164 CALL c%matrix_struct%para_env%sum(f2)
171 ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
174 CALL parallel_gemm(
'T',
'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
195 IF (tnorm > 1.0e-10_dp)
THEN
198 DO i = 2, taylor_order
200 CALL parallel_gemm(
'N',
'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
203 expfactor = expfactor/real(i, kind=
dp)
207 IF (tnorm*expfactor < 1.0e-10_dp)
EXIT
212 CALL parallel_gemm(
'N',
'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
216 IF (output_unit > 0)
THEN
217 WRITE (output_unit,
'(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
223 IF (abs((f2 - f2old)/f2) <= eps .AND. istep > 1)
THEN
233 IF (output_unit > 0)
WRITE (output_unit,
'(A,E16.10)')
' sparseness function f2 = ', f2
259 CALL timestop(handle)
270 REAL(kind=
dp),
DIMENSION(:) :: weights
272 REAL(kind=
dp),
DIMENSION(3, 3) :: metric
274 cpassert(
ASSOCIATED(cell))
277 CALL dgemm(
'T',
'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
279 weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
280 weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
281 weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
282 weights(4) = metric(1, 2)
283 weights(5) = metric(1, 3)
284 weights(6) = metric(2, 3)
306 eps_localization, sweeps, out_each, target_time, start_time, restricted)
308 REAL(kind=
dp),
INTENT(IN) :: weights(:)
309 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
311 INTEGER,
INTENT(IN) :: max_iter
312 REAL(kind=
dp),
INTENT(IN) :: eps_localization
314 INTEGER,
INTENT(IN) :: out_each
315 REAL(
dp) :: target_time, start_time
316 INTEGER :: restricted
318 IF (para_env%num_pe == 1)
THEN
319 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
320 sweeps, out_each, restricted=restricted)
322 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
323 sweeps, out_each, target_time, start_time, restricted=restricted)
340 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
341 out_each, restricted)
342 REAL(kind=
dp),
INTENT(IN) :: weights(:)
343 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
344 INTEGER,
INTENT(IN) :: max_iter
345 REAL(kind=
dp),
INTENT(IN) :: eps_localization
347 INTEGER,
INTENT(IN) :: out_each
348 INTEGER :: restricted
350 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rotations_serial'
352 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
353 INTEGER :: dim2, handle, idim, istate, jstate, &
355 REAL(kind=
dp) :: ct, st, t1, t2, theta, tolerance
357 TYPE(
cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij
360 CALL timeset(routinen, handle)
363 ALLOCATE (c_zij(dim2))
364 NULLIFY (mii, mij, mjj)
365 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
374 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
375 zij(2, idim)%local_data,
dp)
379 tolerance = 1.0e10_dp
383 IF (rmat%matrix_struct%para_env%is_source())
THEN
385 WRITE (unit_nr,
'(T4,A )')
" Localization by iterative Jacobi rotation"
388 IF (restricted > 0)
THEN
390 WRITE (unit_nr,
'(T4,A,I2,A )')
"JACOBI: for the ROKS method, the last ", restricted,
" orbitals DO NOT ROTATE"
391 nstate = nstate - restricted
395 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
399 DO istate = 1, nstate
400 DO jstate = istate + 1, nstate
406 CALL get_angle(mii, mjj, mij, weights, theta)
409 CALL rotate_zij(istate, jstate, st, ct, c_zij)
411 CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
415 CALL check_tolerance(c_zij, weights, tolerance)
418 IF (unit_nr > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
419 WRITE (unit_nr,
'(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
420 "Iteration:", sweeps,
"Tolerance:", tolerance,
"Time:", t2 - t1
427 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
428 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
432 DEALLOCATE (mii, mij, mjj)
433 rmat%local_data = real(c_rmat%local_data,
dp)
440 CALL timestop(handle)
442 END SUBROUTINE jacobi_rotations_serial
456 SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
457 tol_out, jsweeps, out_each, c_zij_out, grad_final)
458 REAL(kind=
dp),
INTENT(IN) :: weights(:)
460 INTEGER,
INTENT(IN) :: max_iter
462 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: eps_localization
463 REAL(kind=
dp),
INTENT(OUT),
OPTIONAL :: tol_out
464 INTEGER,
INTENT(OUT),
OPTIONAL :: jsweeps
465 INTEGER,
INTENT(IN),
OPTIONAL :: out_each
466 TYPE(
cp_cfm_type),
INTENT(IN),
OPTIONAL :: c_zij_out(:)
467 TYPE(
cp_fm_type),
INTENT(OUT),
OPTIONAL,
POINTER :: grad_final
469 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rotations_serial_1'
471 COMPLEX(KIND=dp) :: mzii
472 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
473 INTEGER :: dim2, handle, idim, istate, jstate, &
474 nstate, sweeps, unit_nr
475 REAL(kind=
dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
476 sum_spread_ii, t1, t2, theta, tolerance
480 CALL timeset(routinen, handle)
483 NULLIFY (mii, mij, mjj)
484 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
486 ALLOCATE (c_zij_local(dim2))
488 CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
490 CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
491 c_zij_local(idim)%local_data = c_zij(idim)%local_data
495 tolerance = 1.0e10_dp
497 IF (
PRESENT(grad_final))
CALL cp_fm_set_all(grad_final, 0.0_dp)
500 IF (
PRESENT(out_each))
THEN
502 IF (c_rmat_local%matrix_struct%para_env%is_source())
THEN
507 alpha = alpha + weights(idim)
512 DO WHILE (sweeps < max_iter)
514 IF (
PRESENT(eps_localization))
THEN
515 IF (tolerance < eps_localization)
EXIT
519 DO istate = 1, nstate
520 DO jstate = istate + 1, nstate
526 CALL get_angle(mii, mjj, mij, weights, theta)
529 CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
531 CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
535 IF (
PRESENT(grad_final))
THEN
536 CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
538 CALL check_tolerance(c_zij_local, weights, tolerance)
540 IF (
PRESENT(tol_out)) tol_out = tolerance
542 IF (
PRESENT(out_each))
THEN
544 IF (unit_nr > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
545 sum_spread_ii = 0.0_dp
546 DO istate = 1, nstate
550 spread_ii = spread_ii + weights(idim)* &
553 sum_spread_ii = sum_spread_ii + spread_ii
555 sum_spread_ii = alpha*nstate/
twopi/
twopi - sum_spread_ii
556 avg_spread_ii = sum_spread_ii/nstate
557 WRITE (unit_nr,
'(T4,A,T26,A,T48,A,T64,A)') &
558 "Iteration",
"Avg. Spread_ii",
"Tolerance",
"Time"
559 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
560 sweeps, avg_spread_ii, tolerance, t2 - t1
563 IF (
PRESENT(jsweeps)) jsweeps = sweeps
568 IF (
PRESENT(c_zij_out))
THEN
575 DEALLOCATE (mii, mij, mjj)
579 DEALLOCATE (c_zij_local)
582 CALL timestop(handle)
584 END SUBROUTINE jacobi_rotations_serial_1
603 iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
605 REAL(kind=
dp),
INTENT(IN) :: weights(:)
606 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
607 INTEGER,
INTENT(IN) :: max_iter
608 REAL(kind=
dp),
INTENT(IN) :: eps_localization
610 INTEGER,
INTENT(IN) :: out_each, nextra
611 LOGICAL,
INTENT(IN) :: do_cg
612 INTEGER,
INTENT(IN),
OPTIONAL :: nmo
613 TYPE(
cp_fm_type),
INTENT(IN),
OPTIONAL :: vectors_2, mos_guess
615 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_cg_edf_ls'
616 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
617 czero = (0.0_dp, 0.0_dp)
618 REAL(kind=
dp),
PARAMETER :: gold_sec = 0.3819_dp
620 COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
621 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp_cmat
622 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: arr_zii
623 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: matrix_zii
624 INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
625 lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
626 INTEGER,
DIMENSION(1) :: iloc
627 LOGICAL :: do_cinit_mo, do_cinit_random, &
628 do_u_guess_mo, new_direction
629 REAL(kind=
dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
630 norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
631 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: sum_spread
632 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp_mat, tmp_mat_1
633 REAL(kind=
dp),
DIMENSION(50) :: energy, pos
634 REAL(kind=
dp),
DIMENSION(:),
POINTER :: tmp_arr
636 TYPE(
cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
637 grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
638 tmp_cfm_2, u, ul, v, vl, zdiag
639 TYPE(
cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij, zij_0
641 TYPE(
cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
642 matrix_v_all, rmat, tmp_fm, vectors_all
644 CALL timeset(routinen, handle)
648 NULLIFY (matrix_zii, arr_zii)
649 NULLIFY (tmp_fm_struct)
652 ALLOCATE (c_zij(dim2))
656 ALLOCATE (sum_spread(nstate))
657 ALLOCATE (matrix_zii(nstate, dim2))
663 alpha = alpha + weights(idim)
665 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
666 zij(2, idim)%local_data,
dp)
669 ALLOCATE (zij_0(dim2))
679 IF (
PRESENT(mos_guess))
THEN
680 do_cinit_random = .false.
684 do_cinit_random = .true.
685 do_cinit_mo = .false.
689 IF (do_cinit_random)
THEN
691 do_u_guess_mo = .false.
692 ELSEIF (do_cinit_mo)
THEN
694 do_u_guess_mo = .true.
697 nocc = nstate - nextra
699 norextra = nmo - nstate
702 ALLOCATE (tmp_cmat(nstate, nstate))
704 para_env=para_env, context=context)
712 DEALLOCATE (tmp_cmat)
715 para_env=para_env, context=context)
725 para_env=para_env, context=context)
730 ALLOCATE (arr_zii(nstate))
733 para_env=para_env, context=context)
744 para_env=para_env, context=context)
750 para_env=para_env, context=context)
758 para_env=para_env, context=context)
764 para_env=para_env, context=context)
767 ALLOCATE (tmp_mat(nao, nstate))
771 ALLOCATE (tmp_mat(nao, norextra))
782 CALL ortho_vectors(tmp_fm)
783 c_tilde%local_data = tmp_fm%local_data
785 ALLOCATE (tmp_cmat(northo, nextra))
788 DEALLOCATE (tmp_cmat)
790 CALL parallel_gemm(
"T",
"N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
791 ALLOCATE (tmp_arr(nmo))
792 ALLOCATE (tmp_mat(nmo, ndummy))
793 ALLOCATE (tmp_mat_1(nmo, nstate))
796 DO istate = 1, ndummy
797 tmp_arr(:) = tmp_mat(:, istate)
798 norm = norm2(tmp_arr)
799 tmp_arr(:) = tmp_arr(:)/norm
800 tmp_mat(:, istate) = tmp_arr(:)
805 DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
807 ALLOCATE (tmp_mat(northo, ndummy))
808 ALLOCATE (tmp_mat_1(northo, nextra))
810 ALLOCATE (tmp_arr(ndummy))
812 DO istate = 1, ndummy
813 tmp_arr(istate) = norm2(tmp_mat(:, istate))
816 DO istate = 1, nextra
817 iloc = maxloc(tmp_arr)
818 tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
819 tmp_arr(iloc(1)) = 0.0_dp
822 DEALLOCATE (tmp_arr, tmp_mat)
825 para_env=para_env, context=context)
829 DEALLOCATE (tmp_mat_1)
830 CALL ortho_vectors(tmp_fm)
834 IF (do_u_guess_mo)
THEN
835 ALLOCATE (tmp_cmat(nocc, nstate))
838 DEALLOCATE (tmp_cmat)
839 ALLOCATE (tmp_cmat(northo, nstate))
842 DEALLOCATE (tmp_cmat)
843 CALL parallel_gemm(
"C",
"N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
844 ALLOCATE (tmp_cmat(nextra, nstate))
847 DEALLOCATE (tmp_cmat)
849 tmp_fm%local_data = real(u%local_data, kind=
dp)
850 CALL ortho_vectors(tmp_fm)
856 ALLOCATE (tmp_cmat(nocc, nstate))
859 DEALLOCATE (tmp_cmat)
860 ALLOCATE (tmp_cmat(nextra, nstate))
863 DEALLOCATE (tmp_cmat)
864 CALL parallel_gemm(
"N",
"N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
865 ALLOCATE (tmp_cmat(northo, nstate))
868 DEALLOCATE (tmp_cmat)
880 IF (rmat%matrix_struct%para_env%is_source())
THEN
882 WRITE (unit_nr,
'(T4,A )')
" Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
885 norm2_old = 1.0e30_dp
887 new_direction = .true.
890 line_search_count = 0
899 DO WHILE (iter < max_iter)
908 IF (para_env%num_pe == 1)
THEN
909 CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
911 CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
913 CALL parallel_gemm(
'N',
'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
920 ALLOCATE (tmp_cmat(nextra, nstate))
923 DEALLOCATE (tmp_cmat)
927 tmp_fm%local_data = real(c_tilde%local_data, kind=
dp)
928 CALL ortho_vectors(tmp_fm)
932 ALLOCATE (tmp_cmat(nocc, nstate))
935 DEALLOCATE (tmp_cmat)
936 CALL parallel_gemm(
"N",
"N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
937 ALLOCATE (tmp_cmat(northo, nstate))
940 DEALLOCATE (tmp_cmat)
944 IF (new_direction .AND. mod(line_searches, 20) == 5)
THEN
947 norm2_old = 1.0e30_dp
963 CALL parallel_gemm(
"N",
"N", ndummy, nstate, ndummy, cone, zij_0(idim), &
964 tmp_cfm, czero, tmp_cfm_1)
966 CALL parallel_gemm(
"C",
"N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
972 DO istate = 1, nstate
976 spread_ii = spread_ii + weights(idim)* &
978 matrix_zii(istate, idim) = mzii
981 sum_spread(istate) = spread_ii
983 CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
994 ALLOCATE (tmp_cmat(northo, nstate))
996 weight = weights(idim)
997 arr_zii = matrix_zii(:, idim)
1000 zij_0(idim), v, czero, tmp_cfm)
1004 CALL parallel_gemm(
"N",
"C", nmo, nstate, nstate, cone, tmp_cfm, &
1005 u, czero, tmp_cfm_1)
1012 zij_0(idim), v, czero, tmp_cfm)
1017 CALL parallel_gemm(
"N",
"C", nmo, nstate, nstate, cone, tmp_cfm, &
1018 u, czero, tmp_cfm_1)
1025 DEALLOCATE (tmp_cmat)
1026 ALLOCATE (tmp_cmat(northo, nextra))
1028 northo, nextra, .false.)
1031 DEALLOCATE (tmp_cmat)
1033 CALL parallel_gemm(
"C",
"N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1036 CALL parallel_gemm(
"N",
"N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1040 IF (nextra > 0)
THEN
1051 IF (nextra > 0)
THEN
1053 IF (new_direction)
THEN
1054 line_searches = line_searches + 1
1055 IF (mintol > tol)
THEN
1060 IF (unit_nr > 0 .AND.
modulo(iter, out_each) == 0)
THEN
1061 sum_spread_ii = alpha*nstate/
twopi/
twopi - spread_sum
1062 avg_spread_ii = sum_spread_ii/nstate
1063 WRITE (unit_nr,
'(T4,A,T26,A,T48,A)') &
1064 "Iteration",
"Avg. Spread_ii",
"Tolerance"
1065 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4)') &
1066 iter, avg_spread_ii, tol
1069 IF (tol < eps_localization)
EXIT
1073 cnorm2_gct_cross = czero
1074 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1075 norm2_gct_cross = real(cnorm2_gct_cross, kind=
dp)
1076 gct_old%local_data = grad_ctilde%local_data
1078 norm2_gct = real(cnorm2_gct, kind=
dp)
1080 beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1081 norm2_old = norm2_gct
1082 beta = max(0.0_dp, beta_pr)
1088 norm2_gct_cross = real(cnorm2_gct_cross, kind=
dp)
1089 IF (norm2_gct_cross <= 0.0_dp)
THEN
1095 line_search_count = 0
1098 line_search_count = line_search_count + 1
1100 energy(line_search_count) = spread_sum
1103 new_direction = .false.
1104 IF (line_search_count == 1)
THEN
1109 pos(2) = ds_min/gold_sec
1112 IF (line_search_count == 50)
THEN
1113 IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp)
THEN
1114 cpwarn(
"Line search failed to converge properly")
1116 new_direction = .true.
1117 ds = pos(line_search_count)
1118 line_search_count = 0
1120 cpabort(
"No. of line searches exceeds 50")
1124 IF (energy(line_search_count - 1) > energy(line_search_count))
THEN
1125 lsr = line_search_count
1126 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1129 lsm = line_search_count
1130 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1133 IF (pos(line_search_count) < pos(lsm))
THEN
1134 IF (energy(line_search_count) > energy(lsm))
THEN
1136 lsm = line_search_count
1138 lsl = line_search_count
1141 IF (energy(line_search_count) > energy(lsm))
THEN
1143 lsm = line_search_count
1145 lsr = line_search_count
1148 IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl))
THEN
1149 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1151 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1153 IF ((pos(lsr) - pos(lsl)) < 1.0e-3_dp*pos(lsr))
THEN
1154 new_direction = .true.
1159 ds = pos(line_search_count + 1) - pos(line_search_count)
1161 IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1))
THEN
1162 new_direction = .true.
1163 ds_min = 0.5_dp/alpha
1164 ELSEIF (abs(ds) > 10.0_dp)
THEN
1165 new_direction = .true.
1166 ds_min = 0.5_dp/alpha
1168 ds_min = pos(line_search_count + 1)
1175 IF (mintol > tol)
THEN
1179 IF (unit_nr > 0 .AND.
modulo(iter, out_each) == 0)
THEN
1180 sum_spread_ii = alpha*nstate/
twopi/
twopi - spread_sum
1181 avg_spread_ii = sum_spread_ii/nstate
1182 WRITE (unit_nr,
'(T4,A,T26,A,T48,A)') &
1183 "Iteration",
"Avg. Spread_ii",
"Tolerance"
1184 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4)') &
1185 iter, avg_spread_ii, tol
1188 IF (tol < eps_localization)
EXIT
1193 IF ((unit_nr > 0) .AND. (iter == max_iter))
THEN
1194 WRITE (unit_nr,
'(T4,A,T4,A)')
"Min. Itr.",
"Min. Tol."
1195 WRITE (unit_nr,
'(T4,I7,T4,E12.4)') miniter, mintol
1201 IF (nextra > 0)
THEN
1202 rmat%local_data = real(v%local_data, kind=
dp)
1203 CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1218 DEALLOCATE (arr_zii)
1220 rmat%local_data = matrix_u%local_data
1229 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
1230 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1237 DEALLOCATE (matrix_zii, sum_spread)
1239 CALL timestop(handle)
1247 SUBROUTINE ortho_vectors(vmatrix)
1251 CHARACTER(LEN=*),
PARAMETER :: routinen =
'ortho_vectors'
1253 INTEGER :: handle, n, ncol
1257 CALL timeset(routinen, handle)
1259 NULLIFY (fm_struct_tmp)
1261 CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1264 para_env=vmatrix%matrix_struct%para_env, &
1265 context=vmatrix%matrix_struct%context)
1266 CALL cp_fm_create(overlap_vv, fm_struct_tmp,
"overlap_vv")
1269 CALL parallel_gemm(
'T',
'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1275 CALL timestop(handle)
1277 END SUBROUTINE ortho_vectors
1287 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1288 INTEGER,
INTENT(IN) :: istate, jstate
1289 REAL(kind=
dp),
INTENT(IN) :: st, ct
1296 DO id = 1,
SIZE(zij, 1)
1301 END SUBROUTINE rotate_zij
1310 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1311 INTEGER,
INTENT(IN) :: istate, jstate
1312 REAL(kind=
dp),
INTENT(IN) :: st, ct
1317 END SUBROUTINE rotate_rmat
1328 SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1329 COMPLEX(KIND=dp),
POINTER :: mii(:), mjj(:), mij(:)
1330 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1331 REAL(kind=
dp),
INTENT(OUT) :: theta
1332 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: grad_ij, step
1334 COMPLEX(KIND=dp) :: z11, z12, z22
1335 INTEGER :: dim_m, idim
1336 REAL(kind=
dp) :: a12, b12, d2, ratio
1345 a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=
dp)
1346 b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1347 0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=
dp)
1349 IF (abs(b12) > 1.e-10_dp)
THEN
1351 theta = 0.25_dp*atan(ratio)
1352 ELSEIF (abs(b12) < 1.e-10_dp)
THEN
1358 IF (
PRESENT(grad_ij)) theta = theta + step*grad_ij
1360 d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1361 IF (d2 <= 0._dp)
THEN
1362 IF (theta > 0.0_dp)
THEN
1363 theta = theta - 0.25_dp*
pi
1365 theta = theta + 0.25_dp*
pi
1368 END SUBROUTINE get_angle
1376 SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1378 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1379 REAL(kind=
dp),
INTENT(OUT) :: tolerance
1380 TYPE(
cp_fm_type),
INTENT(OUT),
OPTIONAL :: grad
1382 CHARACTER(len=*),
PARAMETER :: routinen =
'check_tolerance'
1387 CALL timeset(routinen, handle)
1393 CALL grad_at_0(zij, weights, force)
1398 CALL timestop(handle)
1400 END SUBROUTINE check_tolerance
1408 TYPE(
cp_fm_type),
INTENT(IN) :: rmat, vectors
1415 CALL parallel_gemm(
"N",
"N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1425 SUBROUTINE rotate_orbitals_cfm(rmat, vectors)
1436 END SUBROUTINE rotate_orbitals_cfm
1444 SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1445 TYPE(
cp_fm_type),
INTENT(IN) :: rmat, vec_all, vectors
1454 CALL parallel_gemm(
"N",
"N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1457 END SUBROUTINE rotate_orbitals_edf
1465 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1466 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1467 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1469 INTEGER,
INTENT(IN) :: ndim
1471 COMPLEX(KIND=dp) :: zii, zjj
1472 INTEGER :: idim, istate, jstate, ncol_local, &
1473 nrow_global, nrow_local
1474 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1475 REAL(kind=
dp) :: gradsq_ij
1478 ncol_local=ncol_local, nrow_global=nrow_global, &
1479 row_indices=row_indices, col_indices=col_indices)
1481 DO istate = 1, nrow_local
1482 DO jstate = 1, ncol_local
1486 zii = diag(row_indices(istate), idim)
1487 zjj = diag(col_indices(jstate), idim)
1488 gradsq_ij = gradsq_ij + weights(idim)* &
1489 4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=
dp)
1491 matrix%local_data(istate, jstate) = gradsq_ij
1494 END SUBROUTINE gradsq_at_0
1501 SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1503 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1506 COMPLEX(KIND=dp) :: zii, zij, zjj
1507 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1508 INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1509 nrow_global, nrow_local
1510 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1511 REAL(kind=
dp) :: grad_ij
1515 ncol_local=ncol_local, nrow_global=nrow_global, &
1516 row_indices=row_indices, col_indices=col_indices)
1517 dim_m =
SIZE(matrix_p, 1)
1518 ALLOCATE (diag(nrow_global, dim_m))
1521 DO istate = 1, nrow_global
1526 DO istate = 1, nrow_local
1527 DO jstate = 1, ncol_local
1531 zii = diag(row_indices(istate), idim)
1532 zjj = diag(col_indices(jstate), idim)
1533 zij = matrix_p(idim)%local_data(istate, jstate)
1534 grad_ij = grad_ij + weights(idim)* &
1535 REAL(4.0_dp*conjg(zij)*(zjj - zii),
dp)
1537 matrix%local_data(istate, jstate) = grad_ij
1541 END SUBROUTINE grad_at_0
1551 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1552 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1554 REAL(kind=
dp) :: tolerance,
value
1556 COMPLEX(KIND=dp) :: kii, kij, kjj
1557 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1558 INTEGER :: idim, istate, jstate, ncol_local, &
1559 nrow_global, nrow_local
1560 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1561 REAL(kind=
dp) :: grad_ij, ra, rb
1565 ncol_local=ncol_local, nrow_global=nrow_global, &
1566 row_indices=row_indices, col_indices=col_indices)
1567 ALLOCATE (diag(nrow_global,
SIZE(zij, 2)))
1569 DO idim = 1,
SIZE(zij, 2)
1570 DO istate = 1, nrow_global
1573 diag(istate, idim) = cmplx(ra, rb,
dp)
1574 value =
value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1578 DO istate = 1, nrow_local
1579 DO jstate = 1, ncol_local
1581 DO idim = 1,
SIZE(zij, 2)
1582 kii = diag(row_indices(istate), idim)
1583 kjj = diag(col_indices(jstate), idim)
1584 ra = zij(1, idim)%local_data(istate, jstate)
1585 rb = zij(2, idim)%local_data(istate, jstate)
1586 kij = cmplx(ra, rb,
dp)
1587 grad_ij = grad_ij + weights(idim)* &
1588 REAL(4.0_dp*conjg(kij)*(kjj - kii),
dp)
1590 tolerance = max(abs(grad_ij), tolerance)
1593 CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1597 END SUBROUTINE check_tolerance_new
1613 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1614 eps_localization, iterations, converged)
1615 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1616 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
1617 INTEGER,
INTENT(IN) :: max_iter
1618 REAL(kind=
dp),
INTENT(IN) :: max_crazy_angle
1619 REAL(kind=
dp) :: crazy_scale
1620 LOGICAL :: crazy_use_diag
1621 REAL(kind=
dp),
INTENT(IN) :: eps_localization
1622 INTEGER :: iterations
1623 LOGICAL,
INTENT(out),
OPTIONAL :: converged
1625 CHARACTER(len=*),
PARAMETER :: routinen =
'crazy_rotations'
1626 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1627 czero = (0.0_dp, 0.0_dp)
1629 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: evals_exp
1630 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag_z
1631 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
1632 INTEGER :: dim2, handle, i, icol, idim, irow, &
1633 method, ncol_global, ncol_local, &
1634 norder, nrow_global, nrow_local, &
1636 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1638 REAL(kind=
dp) :: eps_exp, limit_crazy_angle, maxeval, &
1639 norm, ra, rb, theta, tolerance,
value
1640 REAL(kind=
dp),
DIMENSION(:),
POINTER :: evals
1642 TYPE(
cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1644 CALL timeset(routinen, handle)
1645 NULLIFY (row_indices, col_indices)
1647 ncol_global=ncol_global, &
1648 row_indices=row_indices, col_indices=col_indices, &
1649 nrow_local=nrow_local, ncol_local=ncol_local)
1651 limit_crazy_angle = max_crazy_angle
1653 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1655 ALLOCATE (diag_z(nrow_global, dim2))
1656 ALLOCATE (evals(nrow_global))
1657 ALLOCATE (evals_exp(nrow_global))
1671 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1679 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1680 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1681 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1682 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1685 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1689 IF (cmat_a%matrix_struct%para_env%is_source())
THEN
1691 WRITE (unit_nr,
'(T2,A7,A6,1X,A20,A12,A12,A12)') &
1692 "CRAZY| ",
"Iter",
"value ",
"gradient",
"Max. eval",
"limit"
1699 iterations = iterations + 1
1701 DO i = 1, nrow_global
1704 diag_z(i, idim) = cmplx(ra, rb,
dp)
1707 DO irow = 1, nrow_local
1708 DO icol = 1, ncol_local
1710 ra = zij(1, idim)%local_data(irow, icol)
1711 rb = zij(2, idim)%local_data(irow, icol)
1712 mij(idim) = cmplx(ra, rb,
dp)
1713 mii(idim) = diag_z(row_indices(irow), idim)
1714 mjj(idim) = diag_z(col_indices(icol), idim)
1716 IF (row_indices(irow) /= col_indices(icol))
THEN
1717 CALL get_angle(mii, mjj, mij, weights, theta)
1718 theta = crazy_scale*theta
1719 IF (theta > limit_crazy_angle) theta = limit_crazy_angle
1720 IF (theta < -limit_crazy_angle) theta = -limit_crazy_angle
1721 IF (crazy_use_diag)
THEN
1722 cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta,
dp)
1724 mat_theta%local_data(irow, icol) = -theta
1727 IF (crazy_use_diag)
THEN
1728 cmat_a%local_data(irow, icol) = czero
1730 mat_theta%local_data(irow, icol) = 0.0_dp
1738 IF (crazy_use_diag)
THEN
1740 maxeval = maxval(abs(evals))
1741 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1744 CALL parallel_gemm(
'N',
'C', nrow_global, nrow_global, nrow_global, cone, &
1745 cmat_t1, cmat_r, czero, cmat_a)
1746 mat_u%local_data = real(cmat_a%local_data, kind=
dp)
1750 eps_exp = 1.0_dp*epsilon(eps_exp)
1759 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1760 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1761 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1762 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1765 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1768 CALL check_tolerance_new(weights, zij, tolerance,
value)
1770 IF (unit_nr > 0)
THEN
1771 WRITE (unit_nr,
'(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1772 "CRAZY| ", iterations,
value, tolerance, maxeval, limit_crazy_angle
1775 IF (tolerance < eps_localization .OR. iterations >= max_iter)
EXIT
1778 IF (
PRESENT(converged)) converged = (tolerance < eps_localization)
1791 DEALLOCATE (evals_exp, evals, diag_z)
1792 DEALLOCATE (mii, mij, mjj)
1794 CALL timestop(handle)
1812 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1813 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1814 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
1815 INTEGER,
INTENT(IN) :: max_iter
1816 REAL(kind=
dp),
INTENT(IN) :: eps_localization
1817 INTEGER :: iterations
1819 CHARACTER(len=*),
PARAMETER :: routinen =
'direct_mini'
1820 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1821 czero = (0.0_dp, 0.0_dp)
1822 REAL(kind=
dp),
PARAMETER :: gold_sec = 0.3819_dp
1824 COMPLEX(KIND=dp) :: lk, ll, tmp
1825 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: evals_exp
1826 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag_z
1827 INTEGER :: handle, i, icol, idim, irow, &
1828 line_search_count, line_searches, lsl, &
1829 lsm, lsr, n, ncol_local, ndim, &
1830 nrow_local, output_unit
1831 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1832 LOGICAL :: new_direction
1833 REAL(kind=
dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1834 fb, fc, nom, normg, normg_cross, &
1835 normg_old, npos, omega, tol, val, x0, &
1837 REAL(kind=
dp),
DIMENSION(150) :: energy, grad, pos
1838 REAL(kind=
dp),
DIMENSION(:),
POINTER :: evals, fval, fvald
1839 TYPE(
cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1841 TYPE(
cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij
1842 TYPE(
cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1843 matrix_g_search, matrix_h, matrix_r, &
1846 NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1848 CALL timeset(routinen, handle)
1851 n = zij(1, 1)%matrix_struct%nrow_global
1852 ndim = (
SIZE(zij, 2))
1854 IF (output_unit > 0)
THEN
1855 WRITE (output_unit,
'(T4,A )')
"Localization by direct minimization of the functional; "
1856 WRITE (output_unit,
'(T5,2A13,A20,A20,A10 )')
" Line search ",
" Iteration ",
" Functional ",
" Tolerance ",
" ds Min "
1859 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1860 ALLOCATE (c_zij(ndim))
1865 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1866 zij(2, idim)%local_data,
dp)
1873 CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1874 CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1889 CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1890 row_indices=row_indices, col_indices=col_indices)
1894 normg_old = 1.0e30_dp
1896 new_direction = .true.
1899 line_search_count = 0
1901 iterations = iterations + 1
1903 cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data,
dp)
1905 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1908 CALL parallel_gemm(
'N',
'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1909 cmat_u%local_data = real(cmat_u%local_data, kind=
dp)
1911 IF (new_direction .AND. mod(line_searches, 20) == 5)
THEN
1913 CALL parallel_gemm(
'N',
'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1914 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1917 matrix_h%local_data = real(cmat_u%local_data, kind=
dp)
1918 CALL parallel_gemm(
'N',
'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1926 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1929 normg_old = 1.0e30_dp
1936 CALL parallel_gemm(
'N',
'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1937 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2)
1942 fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1943 fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1945 fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1946 fvald(i) = -weights(idim)
1948 omega = omega + fval(i)
1950 DO icol = 1, ncol_local
1951 DO irow = 1, nrow_local
1952 tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1953 cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1954 + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=
dp)
1961 CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1967 DO icol = 1, ncol_local
1968 DO irow = 1, nrow_local
1969 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1970 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1971 IF (abs(ll - lk) < 0.5_dp)
THEN
1973 cmat_b%local_data(irow, icol) = 0.0_dp
1975 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1976 tmp = tmp*(ll - lk)/(i + 1)
1978 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1980 cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1986 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1)
1987 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2)
1989 CALL parallel_gemm(
'N',
'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1990 CALL parallel_gemm(
'N',
'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1991 matrix_g%local_data = real(cmat_t1%local_data, kind=
dp)
1997 IF (new_direction)
THEN
1999 line_searches = line_searches + 1
2008 IF (output_unit > 0)
THEN
2009 WRITE (output_unit,
'(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
2012 IF (tol < eps_localization .OR. iterations > max_iter)
EXIT
2015 CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
2016 normg_cross = normg_cross*0.5_dp
2018 DO icol = 1, ncol_local
2019 DO irow = 1, nrow_local
2020 matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
2024 normg = normg*0.5_dp
2025 beta_pr = (normg - normg_cross)/normg_old
2027 beta_pr = max(beta_pr, 0.0_dp)
2029 CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2030 IF (normg_cross >= 0)
THEN
2031 IF (matrix_a%matrix_struct%para_env%is_source())
THEN
2041 line_search_count = 0
2043 line_search_count = line_search_count + 1
2044 energy(line_search_count) = omega
2049 SELECT CASE (line_search_count)
2053 CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2054 grad(1) = grad(1)/2.0_dp
2055 new_direction = .false.
2057 new_direction = .true.
2062 a = (energy(2) - b*x1 - c)/(x1**2)
2063 IF (a <= 0.0_dp) a = 1.0e-15_dp
2064 npos = -b/(2.0_dp*a)
2065 val = a*npos**2 + b*npos + c
2066 IF (val < energy(1) .AND. val <= energy(2))
THEN
2069 pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2071 pos(3) = maxval(pos(1:2))*2.0_dp
2075 SELECT CASE (line_search_count)
2077 new_direction = .false.
2079 pos(2) = ds_min*0.8_dp
2081 new_direction = .false.
2082 IF (energy(2) > energy(1))
THEN
2083 pos(3) = ds_min*0.7_dp
2085 pos(3) = ds_min*1.4_dp
2088 new_direction = .true.
2095 nom = (xb - xa)**2*(fb - fc) - (xb -
xc)**2*(fb - fa)
2096 denom = (xb - xa)*(fb - fc) - (xb -
xc)*(fb - fa)
2097 IF (abs(denom) <= 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa)))
THEN
2100 npos = xb - 0.5_dp*nom/denom
2102 val = (npos - xa)*(npos - xb)*fc/((
xc - xa)*(
xc - xb)) + &
2103 (npos - xb)*(npos -
xc)*fa/((xa - xb)*(xa -
xc)) + &
2104 (npos -
xc)*(npos - xa)*fb/((xb -
xc)*(xb - xa))
2105 IF (val < fa .AND. val <= fb .AND. val <= fc)
THEN
2107 pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2108 min(npos, maxval(pos(1:3))*4.0_dp))
2110 pos(4) = maxval(pos(1:3))*2.0_dp
2114 new_direction = .false.
2115 IF (line_search_count == 1)
THEN
2120 pos(2) = ds_min/gold_sec
2122 IF (line_search_count == 150) cpabort(
"Too many")
2124 IF (energy(line_search_count - 1) < energy(line_search_count))
THEN
2125 lsr = line_search_count
2126 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2129 lsm = line_search_count
2130 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2133 IF (pos(line_search_count) < pos(lsm))
THEN
2134 IF (energy(line_search_count) < energy(lsm))
THEN
2136 lsm = line_search_count
2138 lsl = line_search_count
2141 IF (energy(line_search_count) < energy(lsm))
THEN
2143 lsm = line_search_count
2145 lsr = line_search_count
2148 IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl))
THEN
2149 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2151 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2153 IF ((pos(lsr) - pos(lsl)) < 1.0e-3_dp*pos(lsr))
THEN
2154 new_direction = .true.
2160 ds_min = pos(line_search_count + 1)
2161 ds = pos(line_search_count + 1) - pos(line_search_count)
2166 matrix_h%local_data = real(cmat_u%local_data, kind=
dp)
2167 CALL parallel_gemm(
'N',
'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2186 DEALLOCATE (evals, evals_exp, fval, fvald)
2188 DO idim = 1,
SIZE(c_zij)
2189 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
2190 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2196 CALL timestop(handle)
2217 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2218 sweeps, out_each, target_time, start_time, restricted)
2220 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2221 TYPE(
cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
2223 INTEGER,
INTENT(IN) :: max_iter
2224 REAL(kind=
dp),
INTENT(IN) :: eps_localization
2226 INTEGER,
INTENT(IN) :: out_each
2227 REAL(
dp) :: target_time, start_time
2228 INTEGER :: restricted
2230 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rot_para'
2232 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2233 nblock, nblock_max, ns_me, nstate, &
2235 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ns_bound
2236 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2237 REAL(kind=
dp) :: xstate
2239 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2241 CALL timeset(routinen, handle)
2254 IF (restricted > 0)
THEN
2255 IF (output_unit > 0)
THEN
2256 WRITE (output_unit,
'(T4,A,I2,A )')
"JACOBI: for the ROKS method, the last ", restricted,
" orbitals DO NOT ROTATE"
2258 nstate = nstate - restricted
2262 xstate = real(nstate,
dp)/real(para_env%num_pe,
dp)
2263 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2264 DO ip = 1, para_env%num_pe
2265 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2266 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2269 DO ip = 0, para_env%num_pe - 1
2270 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2271 nblock_max = max(nblock_max, nblock)
2275 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2276 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2277 ALLOCATE (cz_ij_loc(dim2))
2279 DO ip = 0, para_env%num_pe - 1
2280 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2283 IF (para_env%mepos == ip)
THEN
2284 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2287 cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i),
dp)
2293 DEALLOCATE (z_ij_loc_re)
2294 DEALLOCATE (z_ij_loc_im)
2296 ALLOCATE (rotmat(nstate, 2*nblock_max))
2298 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2299 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2300 target_time=target_time, start_time=start_time)
2302 ilow1 = ns_bound(para_env%mepos, 1)
2303 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2304 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2305 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2307 DO ip = 0, para_env%num_pe - 1
2308 z_ij_loc_re = 0.0_dp
2309 z_ij_loc_im = 0.0_dp
2310 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2311 IF (ip == para_env%mepos)
THEN
2316 z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i),
dp)
2317 z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2321 CALL para_env%bcast(z_ij_loc_re, ip)
2322 CALL para_env%bcast(z_ij_loc_im, ip)
2328 DO ip = 0, para_env%num_pe - 1
2329 z_ij_loc_re = 0.0_dp
2330 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2331 IF (ip == para_env%mepos)
THEN
2336 z_ij_loc_re(j, i) = rotmat(j, i)
2340 CALL para_env%bcast(z_ij_loc_re, ip)
2344 DEALLOCATE (z_ij_loc_re)
2345 DEALLOCATE (z_ij_loc_im)
2347 DEALLOCATE (cz_ij_loc(idim)%c_array)
2349 DEALLOCATE (cz_ij_loc)
2351 CALL para_env%sync()
2356 DEALLOCATE (ns_bound)
2358 CALL timestop(handle)
2360 END SUBROUTINE jacobi_rot_para
2372 SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2374 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2377 INTEGER,
INTENT(IN) :: max_iter
2379 REAL(
dp),
INTENT(OUT),
OPTIONAL :: tol_out
2381 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rot_para_1'
2383 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :) :: czij_array
2384 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2385 nblock, nblock_max, ns_me, nstate, &
2387 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ns_bound
2388 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: rotmat, z_ij_loc_re
2389 REAL(kind=
dp) :: xstate
2390 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2392 CALL timeset(routinen, handle)
2401 xstate = real(nstate,
dp)/real(para_env%num_pe,
dp)
2402 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2403 DO ip = 1, para_env%num_pe
2404 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2405 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2408 DO ip = 0, para_env%num_pe - 1
2409 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2410 nblock_max = max(nblock_max, nblock)
2414 ALLOCATE (czij_array(nstate, nblock_max))
2415 ALLOCATE (cz_ij_loc(dim2))
2417 DO ip = 0, para_env%num_pe - 1
2418 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2421 IF (para_env%mepos == ip)
THEN
2423 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2426 cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2432 DEALLOCATE (czij_array)
2434 ALLOCATE (rotmat(nstate, 2*nblock_max))
2436 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2437 cz_ij_loc, rotmat, 0, tol_out=tol_out)
2439 ilow1 = ns_bound(para_env%mepos, 1)
2440 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2441 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2443 DO ip = 0, para_env%num_pe - 1
2444 z_ij_loc_re = 0.0_dp
2445 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2446 IF (ip == para_env%mepos)
THEN
2451 z_ij_loc_re(j, i) = rotmat(j, i)
2455 CALL para_env%bcast(z_ij_loc_re, ip)
2459 DEALLOCATE (z_ij_loc_re)
2461 DEALLOCATE (cz_ij_loc(idim)%c_array)
2463 DEALLOCATE (cz_ij_loc)
2465 CALL para_env%sync()
2468 DEALLOCATE (ns_bound)
2470 CALL timestop(handle)
2472 END SUBROUTINE jacobi_rot_para_1
2496 SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2497 ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2499 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2501 INTEGER,
INTENT(IN) :: max_iter
2502 INTEGER,
INTENT(OUT) :: sweeps
2503 INTEGER,
INTENT(IN) :: out_each, dim2, nstate, nblock_max
2504 INTEGER,
DIMENSION(0:, :),
INTENT(IN) :: ns_bound
2505 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2506 REAL(
dp),
DIMENSION(:, :),
INTENT(OUT) :: rotmat
2507 INTEGER,
INTENT(IN) :: output_unit
2508 REAL(
dp),
INTENT(OUT),
OPTIONAL :: tol_out
2509 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: eps_localization
2510 REAL(
dp),
OPTIONAL :: target_time, start_time
2512 COMPLEX(KIND=dp) :: zi, zj
2513 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: c_array_me, c_array_partner
2514 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
2515 INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2516 ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2517 iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2518 ns_recv_from, ns_recv_partner
2519 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: rcount, rdispl
2520 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: list_pair
2521 LOGICAL :: should_stop
2522 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2523 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: rmat_recv_all
2524 REAL(kind=
dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2525 t2, theta, tolerance, zc, zr
2526 TYPE(set_c_1d_type),
DIMENSION(:),
POINTER :: zdiag_all, zdiag_me
2527 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: xyz_mix, xyz_mix_ns
2529 NULLIFY (zdiag_all, zdiag_me)
2530 NULLIFY (xyz_mix, xyz_mix_ns)
2531 NULLIFY (mii, mij, mjj)
2533 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2535 ALLOCATE (rcount(para_env%num_pe), stat=istat)
2536 ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2538 tolerance = 1.0e10_dp
2542 npair = (para_env%num_pe + 1)/2
2543 nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2544 ALLOCATE (list_pair(2, npair))
2548 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2549 ii = i - ns_bound(para_env%mepos, 1) + 1
2550 rotmat(i, ii) = 1.0_dp
2553 ALLOCATE (xyz_mix(dim2))
2554 ALLOCATE (xyz_mix_ns(dim2))
2555 ALLOCATE (zdiag_me(dim2))
2556 ALLOCATE (zdiag_all(dim2))
2558 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2559 IF (ns_me /= 0)
THEN
2560 ALLOCATE (c_array_me(nstate, ns_me, dim2))
2562 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2564 ALLOCATE (gmat(nstate, ns_me))
2568 ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2569 zdiag_me(idim)%c_array =
z_zero
2570 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2571 zdiag_all(idim)%c_array =
z_zero
2573 ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2574 ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2577 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2579 IF (output_unit > 0)
THEN
2580 WRITE (output_unit,
'(T4,A )')
" Localization by iterative distributed Jacobi rotation"
2581 WRITE (output_unit,
'(T20,A12,T32, A22,T60, A12,A8 )')
"Iteration",
"Functional",
"Tolerance",
" Time "
2584 DO lsweep = 1, max_iter + 1
2586 IF (sweeps == max_iter + 1)
THEN
2587 IF (output_unit > 0)
THEN
2588 WRITE (output_unit, *)
' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2589 WRITE (output_unit, *)
' Present Max. gradient = ', tolerance
2598 CALL eberlein(iperm, para_env, list_pair)
2602 IF (list_pair(1, ipair) == para_env%mepos)
THEN
2603 ip_partner = list_pair(2, ipair)
2605 ELSE IF (list_pair(2, ipair) == para_env%mepos)
THEN
2606 ip_partner = list_pair(1, ipair)
2610 IF (ip_partner >= 0)
THEN
2611 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2617 IF (ns_partner*ns_me /= 0)
THEN
2619 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2621 DO i = 1, ns_me + ns_partner
2622 rmat_loc(i, i) = 1.0_dp
2625 ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2628 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2630 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2634 CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2635 msgout=c_array_partner, source=ip_partner)
2639 ilow1 = ns_bound(para_env%mepos, 1)
2640 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2641 ilow2 = ns_bound(ip_partner, 1)
2642 iup2 = ns_bound(ip_partner, 1) + n2 - 1
2643 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1))
THEN
2659 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2660 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2663 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2664 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2668 DO istate = 1, n1 + n2
2669 DO jstate = istate + 1, n1 + n2
2671 mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2672 mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2673 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2675 CALL get_angle(mii, mjj, mij, weights, theta)
2680 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2681 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2682 xyz_mix(idim)%c_array(i, istate) = zi
2683 xyz_mix(idim)%c_array(i, jstate) = zj
2686 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2687 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2688 xyz_mix(idim)%c_array(istate, i) = zi
2689 xyz_mix(idim)%c_array(jstate, i) = zj
2694 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2695 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2696 rmat_loc(i, istate) = ri
2697 rmat_loc(i, jstate) = rj
2703 CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2704 rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2706 IF (ilow1 < ilow2)
THEN
2710 CALL dgemm(
"N",
"N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2711 n2, 0.0_dp, gmat(:, :), nstate)
2712 CALL dgemm(
"N",
"N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2713 n1 + n2, 1.0_dp, gmat(:, :), nstate)
2715 CALL dgemm(
"N",
"N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2716 rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2720 CALL dgemm(
"N",
"N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2721 n1, 1.0_dp, gmat(:, :), nstate)
2724 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2728 xyz_mix_ns(idim)%c_array(1:nstate, i) =
z_zero
2732 DO jstate = 1, nstate
2734 xyz_mix_ns(idim)%c_array(jstate, istate) = &
2735 xyz_mix_ns(idim)%c_array(jstate, istate) + &
2736 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2741 DO jstate = 1, nstate
2743 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2744 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2750 DEALLOCATE (c_array_partner)
2755 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2762 cz_ij_loc(idim)%c_array(1:nstate, i) =
z_zero
2766 IF (ns_partner*ns_me /= 0)
THEN
2768 DO i = 1, ns_me + ns_partner
2769 DO j = i + 1, ns_me + ns_partner
2771 rmat_loc(i, j) = rmat_loc(j, i)
2778 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2782 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2789 CALL para_env%allgather(rmat_send, rmat_recv_all)
2792 DO ip = 0, para_env%num_pe - 1
2794 ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2795 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2797 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2799 IF (ns_me /= 0)
THEN
2800 IF (ns_recv_from /= 0)
THEN
2802 ip_recv_partner = -1
2805 IF (list_pair(1, ipair) == ip_recv_from)
THEN
2806 ip_recv_partner = list_pair(2, ipair)
2808 ELSE IF (list_pair(2, ipair) == ip_recv_from)
THEN
2809 ip_recv_partner = list_pair(1, ipair)
2814 IF (ip_recv_partner >= 0)
THEN
2815 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2817 IF (ns_recv_partner > 0)
THEN
2818 il1 = ns_bound(para_env%mepos, 1)
2819 il_recv = ns_bound(ip_recv_from, 1)
2820 il_recv_partner = ns_bound(ip_recv_partner, 1)
2824 DO i = 1, ns_recv_from
2825 ii = il_recv + i - 1
2828 DO k = 1, ns_recv_from
2829 kk = il_recv + k - 1
2830 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2831 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2835 DO i = 1, ns_recv_from
2836 ii = il_recv + i - 1
2839 DO k = 1, ns_recv_partner
2840 kk = il_recv_partner + k - 1
2841 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2842 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2848 il1 = ns_bound(para_env%mepos, 1)
2849 il_recv = ns_bound(ip_recv_from, 1)
2853 DO i = 1, ns_recv_from
2854 ii = il_recv + i - 1
2855 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2864 IF (ns_partner*ns_me /= 0)
THEN
2865 DEALLOCATE (rmat_loc)
2867 DEALLOCATE (xyz_mix(idim)%c_array)
2875 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2876 ii = i - ns_bound(para_env%mepos, 1) + 1
2877 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2878 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2880 rcount(:) =
SIZE(zdiag_me(idim)%c_array)
2882 DO ip = 2, para_env%num_pe
2883 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2886 CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2890 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2891 k = j - ns_bound(para_env%mepos, 1) + 1
2894 DO ip = 0, para_env%num_pe - 1
2895 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2))
THEN
2900 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2902 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2905 zi = zdiag_all(idim)%c_array(ii)
2906 zj = zdiag_all(idim)%c_array(jj)
2907 grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi),
dp)
2909 gmax = max(gmax, abs(grad))
2913 CALL para_env%max(gmax)
2915 IF (
PRESENT(tol_out)) tol_out = tolerance
2918 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2919 k = i - ns_bound(para_env%mepos, 1) + 1
2921 zr = real(cz_ij_loc(idim)%c_array(i, k),
dp)
2922 zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2923 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/
twopi/
twopi
2926 CALL para_env%sum(func)
2929 IF (output_unit > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
2930 WRITE (output_unit,
'(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2933 IF (
PRESENT(eps_localization))
THEN
2934 IF (tolerance < eps_localization)
EXIT
2936 IF (
PRESENT(target_time) .AND.
PRESENT(start_time))
THEN
2937 CALL external_control(should_stop,
"LOC", target_time=target_time, start_time=start_time)
2938 IF (should_stop)
EXIT
2944 DEALLOCATE (rmat_recv_all)
2946 DEALLOCATE (rmat_recv)
2947 DEALLOCATE (rmat_send)
2949 DEALLOCATE (c_array_me)
2952 DEALLOCATE (zdiag_me(idim)%c_array)
2953 DEALLOCATE (zdiag_all(idim)%c_array)
2955 DEALLOCATE (zdiag_me)
2956 DEALLOCATE (zdiag_all)
2957 DEALLOCATE (xyz_mix)
2959 IF (ns_me /= 0)
THEN
2960 DEALLOCATE (xyz_mix_ns(idim)%c_array)
2963 DEALLOCATE (xyz_mix_ns)
2964 IF (ns_me /= 0)
THEN
2970 DEALLOCATE (list_pair)
2972 END SUBROUTINE jacobi_rot_para_core
2980 SUBROUTINE eberlein(iperm, para_env, list_pair)
2981 INTEGER,
INTENT(IN) :: iperm
2983 INTEGER,
DIMENSION(:, :) :: list_pair
2985 INTEGER :: i, ii, jj, npair
2987 npair = (para_env%num_pe + 1)/2
2988 IF (iperm == 1)
THEN
2990 DO i = 0, para_env%num_pe - 1
2991 ii = ((i + 1) + 1)/2
2992 jj = mod((i + 1) + 1, 2) + 1
2993 list_pair(jj, ii) = i
2995 IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2996 ELSEIF (mod(iperm, 2) == 0)
THEN
2998 jj = list_pair(1, npair)
3000 list_pair(1, i) = list_pair(1, i - 1)
3002 list_pair(1, 2) = list_pair(2, 1)
3003 list_pair(2, 1) = jj
3006 jj = list_pair(2, 1)
3008 list_pair(2, i) = list_pair(2, i + 1)
3010 list_pair(2, npair) = jj
3013 END SUBROUTINE eberlein
3024 TYPE(
dbcsr_p_type),
DIMENSION(:, :),
POINTER :: op_sm_set
3025 TYPE(
cp_fm_type),
DIMENSION(:, :),
INTENT(IN) :: zij_fm_set
3027 CHARACTER(len=*),
PARAMETER :: routinen =
'zij_matrix'
3029 INTEGER :: handle, i, j, nao, nmoloc
3032 CALL timeset(routinen, handle)
3035 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3040 DO i = 1,
SIZE(zij_fm_set, 2)
3041 DO j = 1,
SIZE(zij_fm_set, 1)
3044 CALL parallel_gemm(
"T",
"N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3050 CALL timestop(handle)
3062 CHARACTER(len=*),
PARAMETER :: routinen =
'scdm_qrfact'
3064 INTEGER :: handle, ncolt, nrowt
3065 REAL(kind=
dp),
DIMENSION(:),
POINTER :: tau
3069 CALL timeset(routinen, handle)
3072 nrowt = vectors%matrix_struct%ncol_global
3073 ncolt = vectors%matrix_struct%nrow_global
3076 nrow_global=nrowt, ncol_global=ncolt)
3080 ALLOCATE (tau(nrowt))
3089 context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3090 ncol_global=ctp%matrix_struct%nrow_global)
3102 CALL parallel_gemm(
'N',
'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3110 CALL timestop(handle)
3132 REAL(kind=
dp),
INTENT(IN) :: weights(:)
3134 INTEGER,
INTENT(IN) :: max_iter
3135 REAL(kind=
dp),
INTENT(IN) :: eps_localization
3137 INTEGER,
INTENT(IN) :: out_each
3140 CHARACTER(len=*),
PARAMETER :: routinen =
'cardoso_souloumiac'
3142 COMPLEX(KIND=dp) :: s
3143 COMPLEX(KIND=dp),
ALLOCATABLE :: mii(:), mij(:), mji(:), mjj(:)
3144 INTEGER :: dim1, dim2, handle, idim, istate, jdim, &
3145 jstate, nstate, unit_nr
3146 REAL(kind=
dp) :: c, old_spread, spread, t1, t2, tolerance
3149 CALL timeset(routinen, handle)
3154 NULLIFY (c_rmat, c_zij)
3155 ALLOCATE (c_rmat, c_zij(dim1*dim2), mii(dim1*dim2), mij(dim1*dim2), mji(dim1*dim2), mjj(dim1*dim2))
3160 CALL cp_cfm_create(c_zij((idim - 1)*dim1 + jdim), zij(jdim, idim)%matrix_struct)
3161 CALL cp_cfm_to_cfm(zij(jdim, idim), c_zij((idim - 1)*dim1 + jdim))
3166 tolerance = 1.0e10_dp
3167 old_spread = 1.0e10_dp
3170 IF (c_rmat%matrix_struct%para_env%is_source())
THEN
3172 WRITE (unit_nr,
"(T4,A )")
" Localization by iterative Jacobi rotation using "// &
3173 "Cardoso-Souloumiac angles"
3178 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
3181 DO jstate = 1, nstate
3182 DO istate = jstate + 1, nstate
3183 DO idim = 1, dim1*dim2
3189 CALL get_cardoso_angles(mii, mij, mji, mjj, c, s, vectors%matrix_struct)
3190 DO idim = 1, dim1*dim2
3197 CALL check_tolerance(c_zij, weights, spread)
3198 CALL check_tolerance(c_zij, weights, tolerance)
3199 tolerance = abs(spread - old_spread)
3203 IF (unit_nr > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
3204 WRITE (unit_nr,
"(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
3205 "Iteration:", sweeps,
"Functional", spread,
"Tolerance:", tolerance,
"Time:", t2 - t1
3213 CALL cp_cfm_to_cfm(c_zij((idim - 1)*dim1 + jdim), zij(jdim, idim))
3218 CALL rotate_orbitals_cfm(c_rmat, vectors)
3220 DEALLOCATE (c_zij, mii, mij, mji, mjj)
3224 CALL timestop(handle)
3243 INTEGER :: sweeps, max_iter
3247 CHARACTER(*),
PARAMETER :: routinen =
'cardoso_souloumiac_pipek'
3249 COMPLEX(dp) :: c_spread, s
3250 COMPLEX(dp),
POINTER :: qii(:), qij(:), qji(:), qjj(:)
3251 INTEGER :: handle, i, j, k, n_dim, n_states, &
3253 REAL(
dp) :: c, old_spread, spread, t1, t2, tol
3256 CALL timeset(routinen, handle)
3266 ALLOCATE (c_zij(n_dim), qii(n_dim), qij(n_dim), qji(n_dim), qjj(n_dim))
3270 c_zij(k) = zij(k, 1)
3276 c_spread = (0.0_dp, 0.0_dp)
3280 c_spread = c_spread + s*s
3283 spread = real(c_spread)
3288 WRITE (output_unit,
"(T4,A )")
" Localization by iterative Jacobi rotation using "// &
3289 "Cardoso-Souloumiac angles"
3290 WRITE (output_unit,
"(T4,A )")
" and Pipek-Mezey spread functional"
3292 IF (output_unit > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
3293 WRITE (output_unit,
"(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
3294 "Iteration:", sweeps,
" Functional", spread,
" Tolerance:", tol,
" Time:", 0.0_dp
3297 DO WHILE (tol >= eps .AND. sweeps < max_iter)
3302 DO j = i + 1, n_states
3309 CALL get_cardoso_angles(qii, qij, qji, qjj, c, s, vec%matrix_struct)
3318 c_spread = (0.0_dp, 0.0_dp)
3322 c_spread = c_spread + s*s
3325 spread = real(c_spread)
3327 tol = abs(spread - old_spread)
3330 IF (output_unit > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
3331 WRITE (output_unit,
"(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
3332 "Iteration:", sweeps,
" Functional", spread,
" Tolerance:", tol,
" Time:", t2 - t1
3336 CALL rotate_orbitals_cfm(rmat, vec)
3339 DEALLOCATE (c_zij, qii, qij, qji, qjj)
3343 CALL timestop(handle)
3360 SUBROUTINE get_cardoso_angles(mii, mij, mji, mjj, c, s, tmp_fm_struct)
3362 COMPLEX(KIND=dp),
DIMENSION(:) :: mii, mij, mji, mjj
3363 REAL(kind=
dp),
INTENT(out) :: c
3364 COMPLEX(KIND=dp),
INTENT(out) :: s
3367 INTEGER :: dim_m, i, i_max
3368 REAL(kind=
dp) :: r, x, y, z
3369 REAL(kind=
dp),
DIMENSION(3) :: evals
3377 template_fmstruct=tmp_fm_struct)
3379 template_fmstruct=tmp_fm_struct)
3380 ALLOCATE (hmat, c_gmat, gmat, evects)
3391 CALL cp_cfm_gemm(
"C",
"N", 3, 3, 1, (1.0_dp, 0.0_dp), hmat, hmat, (1.0_dp, 0.0_dp), c_gmat)
3397 i_max = maxloc(evals, 1)
3408 r = sqrt(x**2 + y**2 + z**2)
3409 c = sqrt((x + r)/(2*r))
3410 s = (y -
gaussi*z)/sqrt(2*r*(x + r))
3418 DEALLOCATE (hmat, c_gmat, gmat, evects)
3420 END SUBROUTINE get_cardoso_angles
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public schreder2024_2
Handles all functions related to the CELL.
methods related to the blacs parallel environment
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
Performs one of the matrix-matrix operations: matrix_c = alpha * op1( matrix_a ) * op2( matrix_b ) + ...
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
subroutine, public choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
Choose the Eigensolver depending on which library is available ELPA seems to be unstable for small sy...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Defines the basic variable types.
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
complex(kind=dp), parameter, public z_one
complex(kind=dp), parameter, public gaussi
real(kind=dp), parameter, public twopi
complex(kind=dp), parameter, public z_zero
Routines for calculating a complex matrix exponential.
subroutine, public exp_pade_real(exp_h, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public approx_l1_norm_sd(c, iterations, eps, converged, sweeps)
...
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public cardoso_souloumiac(weights, zij, max_iter, eps_localization, sweeps, out_each, vectors)
Achieves minimisation of the spread functional by simultaneous diagonalisation with Jacobi rotations ...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public cardoso_souloumiac_pipek(zij, vec, sweeps, max_iter, eps, out_each)
Pipek-Mezey version of the Cardoso-Souloumiac PADE algorithm for complex-valued matrices.
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.
Type defining parameters related to the simulation cell.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Represent a complex full matrix.
keeps the information about the structure of a full matrix
stores all the informations relevant to an mpi environment