55 USE dbcsr_api,
ONLY: dbcsr_p_type
66 #include "./base/base_uses.f90"
73 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'qs_localization_methods'
78 COMPLEX(KIND=dp),
POINTER,
DIMENSION(:) :: c_array
81 COMPLEX(KIND=dp),
POINTER,
DIMENSION(:, :) :: c_array
94 TYPE(cp_fm_type),
INTENT(IN) :: c
95 INTEGER,
INTENT(IN) :: iterations
96 REAL(kind=
dp),
INTENT(IN) :: eps
97 LOGICAL,
INTENT(INOUT) :: converged
98 INTEGER,
INTENT(INOUT) :: sweeps
100 CHARACTER(len=*),
PARAMETER :: routinen =
'approx_l1_norm_sd'
101 INTEGER,
PARAMETER :: taylor_order = 100
102 REAL(kind=
dp),
PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
104 INTEGER :: handle, i, istep, k, n, ncol_local, &
105 nrow_local, output_unit, p
106 REAL(kind=
dp) :: expfactor, f2, f2old, gnorm, tnorm
107 TYPE(cp_blacs_env_type),
POINTER :: context
108 TYPE(cp_fm_struct_type),
POINTER :: fm_struct_k_k
109 TYPE(cp_fm_type) :: ctmp, g, gp1, gp2, u
110 TYPE(mp_para_env_type),
POINTER :: para_env
112 CALL timeset(routinen, handle)
114 NULLIFY (context, para_env, fm_struct_k_k)
119 nrow_local=nrow_local, ncol_local=ncol_local, &
120 para_env=para_env, context=context)
122 nrow_global=k, ncol_global=k)
130 IF (output_unit > 0)
THEN
131 WRITE (output_unit,
'(1X)')
132 WRITE (output_unit,
'(2X,A)')
'-----------------------------------------------------------------------------'
133 WRITE (output_unit,
'(A,I5)')
' Nbr iterations =', iterations
134 WRITE (output_unit,
'(A,E10.2)')
' eps convergence =', eps
135 WRITE (output_unit,
'(A,I5)')
' Max Taylor order =', taylor_order
136 WRITE (output_unit,
'(A,E10.2)')
' f2 eps =', f2_eps
137 WRITE (output_unit,
'(A,E10.2)')
' alpha =', alpha
138 WRITE (output_unit,
'(A)')
' iteration approx_l1_norm g_norm rel_err'
145 DO istep = 1, iterations
153 f2 = f2 + sqrt(c%local_data(i, p)**2 + f2_eps)
156 CALL c%matrix_struct%para_env%sum(f2)
163 ctmp%local_data(i, p) = c%local_data(i, p)/sqrt(c%local_data(i, p)**2 + f2_eps)
166 CALL parallel_gemm(
'T',
'N', k, k, n, 1.0_dp, ctmp, c, 0.0_dp, g)
187 IF (tnorm .GT. 1.0e-10_dp)
THEN
189 CALL cp_fm_to_fm(g, gp1)
190 DO i = 2, taylor_order
192 CALL parallel_gemm(
'N',
'N', k, k, k, 1.0_dp, g, gp1, 0.0_dp, gp2)
193 CALL cp_fm_to_fm(gp2, gp1)
195 expfactor = expfactor/real(i, kind=
dp)
199 IF (tnorm*expfactor .LT. 1.0e-10_dp)
EXIT
204 CALL parallel_gemm(
'N',
'N', n, k, k, 1.0_dp, c, u, 0.0_dp, ctmp)
205 CALL cp_fm_to_fm(ctmp, c)
208 IF (output_unit .GT. 0)
THEN
209 WRITE (output_unit,
'(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, abs((f2 - f2old)/f2)
215 IF (abs((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1)
THEN
225 IF (output_unit .GT. 0)
WRITE (output_unit,
'(A,E16.10)')
' sparseness function f2 = ', f2
245 CALL cp_fm_release(ctmp)
246 CALL cp_fm_release(u)
247 CALL cp_fm_release(g)
248 CALL cp_fm_release(gp1)
249 CALL cp_fm_release(gp2)
251 CALL timestop(handle)
261 TYPE(cell_type),
POINTER :: cell
262 REAL(kind=
dp),
DIMENSION(:) :: weights
264 REAL(kind=
dp),
DIMENSION(3, 3) :: metric
266 cpassert(
ASSOCIATED(cell))
269 CALL dgemm(
'T',
'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
271 weights(1) = metric(1, 1) - metric(1, 2) - metric(1, 3)
272 weights(2) = metric(2, 2) - metric(1, 2) - metric(2, 3)
273 weights(3) = metric(3, 3) - metric(1, 3) - metric(2, 3)
274 weights(4) = metric(1, 2)
275 weights(5) = metric(1, 3)
276 weights(6) = metric(2, 3)
298 eps_localization, sweeps, out_each, target_time, start_time, restricted)
300 REAL(kind=
dp),
INTENT(IN) :: weights(:)
301 TYPE(cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
302 TYPE(mp_para_env_type),
POINTER :: para_env
303 INTEGER,
INTENT(IN) :: max_iter
304 REAL(kind=
dp),
INTENT(IN) :: eps_localization
306 INTEGER,
INTENT(IN) :: out_each
307 REAL(
dp) :: target_time, start_time
308 INTEGER :: restricted
310 IF (para_env%num_pe == 1)
THEN
311 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
312 sweeps, out_each, restricted=restricted)
314 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
315 sweeps, out_each, target_time, start_time, restricted=restricted)
332 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
333 out_each, restricted)
334 REAL(kind=
dp),
INTENT(IN) :: weights(:)
335 TYPE(cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
336 INTEGER,
INTENT(IN) :: max_iter
337 REAL(kind=
dp),
INTENT(IN) :: eps_localization
339 INTEGER,
INTENT(IN) :: out_each
340 INTEGER :: restricted
342 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rotations_serial'
344 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
345 INTEGER :: dim2, handle, idim, istate, jstate, &
347 REAL(kind=
dp) :: ct, st, t1, t2, theta, tolerance
348 TYPE(cp_cfm_type) :: c_rmat
349 TYPE(cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij
350 TYPE(cp_fm_type) :: rmat
352 CALL timeset(routinen, handle)
355 ALLOCATE (c_zij(dim2))
356 NULLIFY (mii, mij, mjj)
357 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
366 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
367 zij(2, idim)%local_data,
dp)
371 tolerance = 1.0e10_dp
375 IF (rmat%matrix_struct%para_env%is_source())
THEN
377 WRITE (unit_nr,
'(T4,A )')
" Localization by iterative Jacobi rotation"
380 IF (restricted > 0)
THEN
382 WRITE (unit_nr,
'(T4,A,I2,A )')
"JACOBI: for the ROKS method, the last ", restricted,
" orbitals DO NOT ROTATE"
383 nstate = nstate - restricted
387 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
391 DO istate = 1, nstate
392 DO jstate = istate + 1, nstate
398 CALL get_angle(mii, mjj, mij, weights, theta)
401 CALL rotate_zij(istate, jstate, st, ct, c_zij)
403 CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
407 CALL check_tolerance(c_zij, weights, tolerance)
410 IF (unit_nr > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
411 WRITE (unit_nr,
'(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
412 "Iteration:", sweeps,
"Tolerance:", tolerance,
"Time:", t2 - t1
419 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
420 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
424 DEALLOCATE (mii, mij, mjj)
425 rmat%local_data = real(c_rmat%local_data,
dp)
428 CALL cp_fm_release(rmat)
430 CALL timestop(handle)
432 END SUBROUTINE jacobi_rotations_serial
446 SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
447 tol_out, jsweeps, out_each, c_zij_out, grad_final)
448 REAL(kind=
dp),
INTENT(IN) :: weights(:)
449 TYPE(cp_cfm_type),
INTENT(IN) :: c_zij(:)
450 INTEGER,
INTENT(IN) :: max_iter
451 TYPE(cp_cfm_type),
INTENT(IN) :: c_rmat
452 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: eps_localization
453 REAL(kind=
dp),
INTENT(OUT),
OPTIONAL :: tol_out
454 INTEGER,
INTENT(OUT),
OPTIONAL :: jsweeps
455 INTEGER,
INTENT(IN),
OPTIONAL :: out_each
456 TYPE(cp_cfm_type),
INTENT(IN),
OPTIONAL :: c_zij_out(:)
457 TYPE(cp_fm_type),
INTENT(OUT),
OPTIONAL,
POINTER :: grad_final
459 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rotations_serial_1'
461 COMPLEX(KIND=dp) :: mzii
462 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
463 INTEGER :: dim2, handle, idim, istate, jstate, &
464 nstate, sweeps, unit_nr
465 REAL(kind=
dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
466 sum_spread_ii, t1, t2, theta, tolerance
467 TYPE(cp_cfm_type) :: c_rmat_local
468 TYPE(cp_cfm_type),
ALLOCATABLE :: c_zij_local(:)
470 CALL timeset(routinen, handle)
473 NULLIFY (mii, mij, mjj)
474 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
476 ALLOCATE (c_zij_local(dim2))
478 CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
480 CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
481 c_zij_local(idim)%local_data = c_zij(idim)%local_data
485 tolerance = 1.0e10_dp
487 IF (
PRESENT(grad_final))
CALL cp_fm_set_all(grad_final, 0.0_dp)
490 IF (
PRESENT(out_each))
THEN
492 IF (c_rmat_local%matrix_struct%para_env%is_source())
THEN
497 alpha = alpha + weights(idim)
502 DO WHILE (sweeps < max_iter)
504 IF (
PRESENT(eps_localization))
THEN
505 IF (tolerance < eps_localization)
EXIT
509 DO istate = 1, nstate
510 DO jstate = istate + 1, nstate
516 CALL get_angle(mii, mjj, mij, weights, theta)
519 CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
521 CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
525 IF (
PRESENT(grad_final))
THEN
526 CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
528 CALL check_tolerance(c_zij_local, weights, tolerance)
530 IF (
PRESENT(tol_out)) tol_out = tolerance
532 IF (
PRESENT(out_each))
THEN
534 IF (unit_nr > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
535 sum_spread_ii = 0.0_dp
536 DO istate = 1, nstate
540 spread_ii = spread_ii + weights(idim)* &
543 sum_spread_ii = sum_spread_ii + spread_ii
545 sum_spread_ii = alpha*nstate/
twopi/
twopi - sum_spread_ii
546 avg_spread_ii = sum_spread_ii/nstate
547 WRITE (unit_nr,
'(T4,A,T26,A,T48,A,T64,A)') &
548 "Iteration",
"Avg. Spread_ii",
"Tolerance",
"Time"
549 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
550 sweeps, avg_spread_ii, tolerance, t2 - t1
553 IF (
PRESENT(jsweeps)) jsweeps = sweeps
558 IF (
PRESENT(c_zij_out))
THEN
560 CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
563 CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
565 DEALLOCATE (mii, mij, mjj)
569 DEALLOCATE (c_zij_local)
572 CALL timestop(handle)
574 END SUBROUTINE jacobi_rotations_serial_1
593 iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
594 TYPE(mp_para_env_type),
POINTER :: para_env
595 REAL(kind=
dp),
INTENT(IN) :: weights(:)
596 TYPE(cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
597 INTEGER,
INTENT(IN) :: max_iter
598 REAL(kind=
dp),
INTENT(IN) :: eps_localization
600 INTEGER,
INTENT(IN) :: out_each, nextra
601 LOGICAL,
INTENT(IN) :: do_cg
602 INTEGER,
INTENT(IN),
OPTIONAL :: nmo
603 TYPE(cp_fm_type),
INTENT(IN),
OPTIONAL :: vectors_2, mos_guess
605 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_cg_edf_ls'
606 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
607 czero = (0.0_dp, 0.0_dp)
608 REAL(kind=
dp),
PARAMETER :: gold_sec = 0.3819_dp
610 COMPLEX(KIND=dp) :: cnorm2_gct, cnorm2_gct_cross, mzii
611 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp_cmat
612 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: arr_zii
613 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: matrix_zii
614 INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
615 lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
616 INTEGER,
DIMENSION(1) :: iloc
617 LOGICAL :: do_cinit_mo, do_cinit_random, &
618 do_u_guess_mo, new_direction
619 REAL(kind=
dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_gct, &
620 norm2_gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
621 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: sum_spread
622 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp_mat, tmp_mat_1
623 REAL(kind=
dp),
DIMENSION(50) :: energy, pos
624 REAL(kind=
dp),
DIMENSION(:),
POINTER :: tmp_arr
625 TYPE(cp_blacs_env_type),
POINTER :: context
626 TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, gct_old, &
627 grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
628 tmp_cfm_2, u, ul, v, vl, zdiag
629 TYPE(cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij, zij_0
630 TYPE(cp_fm_struct_type),
POINTER :: tmp_fm_struct
631 TYPE(cp_fm_type) :: id_nextra, matrix_u, matrix_v, &
632 matrix_v_all, rmat, tmp_fm, vectors_all
634 CALL timeset(routinen, handle)
638 NULLIFY (matrix_zii, arr_zii)
639 NULLIFY (tmp_fm_struct)
642 ALLOCATE (c_zij(dim2))
646 ALLOCATE (sum_spread(nstate))
647 ALLOCATE (matrix_zii(nstate, dim2))
653 alpha = alpha + weights(idim)
655 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
656 zij(2, idim)%local_data,
dp)
659 ALLOCATE (zij_0(dim2))
669 IF (
PRESENT(mos_guess))
THEN
670 do_cinit_random = .false.
674 do_cinit_random = .true.
675 do_cinit_mo = .false.
679 IF (do_cinit_random)
THEN
681 do_u_guess_mo = .false.
682 ELSEIF (do_cinit_mo)
THEN
684 do_u_guess_mo = .true.
687 nocc = nstate - nextra
689 norextra = nmo - nstate
692 ALLOCATE (tmp_cmat(nstate, nstate))
694 para_env=para_env, context=context)
702 DEALLOCATE (tmp_cmat)
705 para_env=para_env, context=context)
715 para_env=para_env, context=context)
720 ALLOCATE (arr_zii(nstate))
723 para_env=para_env, context=context)
734 para_env=para_env, context=context)
740 para_env=para_env, context=context)
748 para_env=para_env, context=context)
754 para_env=para_env, context=context)
757 ALLOCATE (tmp_mat(nao, nstate))
761 ALLOCATE (tmp_mat(nao, norextra))
772 CALL ortho_vectors(tmp_fm)
773 c_tilde%local_data = tmp_fm%local_data
774 CALL cp_fm_release(tmp_fm)
775 ALLOCATE (tmp_cmat(northo, nextra))
778 DEALLOCATE (tmp_cmat)
780 CALL parallel_gemm(
"T",
"N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_v_all)
781 ALLOCATE (tmp_arr(nmo))
782 ALLOCATE (tmp_mat(nmo, ndummy))
783 ALLOCATE (tmp_mat_1(nmo, nstate))
786 DO istate = 1, ndummy
787 tmp_arr(:) = tmp_mat(:, istate)
788 norm = norm2(tmp_arr)
789 tmp_arr(:) = tmp_arr(:)/norm
790 tmp_mat(:, istate) = tmp_arr(:)
795 DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
797 ALLOCATE (tmp_mat(northo, ndummy))
798 ALLOCATE (tmp_mat_1(northo, nextra))
800 ALLOCATE (tmp_arr(ndummy))
802 DO istate = 1, ndummy
803 tmp_arr(istate) = norm2(tmp_mat(:, istate))
806 DO istate = 1, nextra
807 iloc = maxloc(tmp_arr)
808 tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
809 tmp_arr(iloc(1)) = 0.0_dp
812 DEALLOCATE (tmp_arr, tmp_mat)
815 para_env=para_env, context=context)
819 DEALLOCATE (tmp_mat_1)
820 CALL ortho_vectors(tmp_fm)
822 CALL cp_fm_release(tmp_fm)
824 IF (do_u_guess_mo)
THEN
825 ALLOCATE (tmp_cmat(nocc, nstate))
828 DEALLOCATE (tmp_cmat)
829 ALLOCATE (tmp_cmat(northo, nstate))
832 DEALLOCATE (tmp_cmat)
833 CALL parallel_gemm(
"C",
"N", nextra, nstate, northo, cone, c_tilde, vl, czero, ul)
834 ALLOCATE (tmp_cmat(nextra, nstate))
837 DEALLOCATE (tmp_cmat)
839 tmp_fm%local_data = real(u%local_data, kind=
dp)
840 CALL ortho_vectors(tmp_fm)
842 CALL cp_fm_release(tmp_fm)
846 ALLOCATE (tmp_cmat(nocc, nstate))
849 DEALLOCATE (tmp_cmat)
850 ALLOCATE (tmp_cmat(nextra, nstate))
853 DEALLOCATE (tmp_cmat)
854 CALL parallel_gemm(
"N",
"N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
855 ALLOCATE (tmp_cmat(northo, nstate))
858 DEALLOCATE (tmp_cmat)
863 CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
870 IF (rmat%matrix_struct%para_env%is_source())
THEN
872 WRITE (unit_nr,
'(T4,A )')
" Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
875 norm2_old = 1.0e30_dp
877 new_direction = .true.
880 line_search_count = 0
889 DO WHILE (iter < max_iter)
898 IF (para_env%num_pe == 1)
THEN
899 CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
901 CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
903 CALL parallel_gemm(
'N',
'N', nstate, nstate, nstate, cone, u, tmp_cfm_2, czero, tmp_cfm)
904 CALL cp_cfm_to_cfm(tmp_cfm, u)
910 ALLOCATE (tmp_cmat(nextra, nstate))
913 DEALLOCATE (tmp_cmat)
917 tmp_fm%local_data = real(c_tilde%local_data, kind=
dp)
918 CALL ortho_vectors(tmp_fm)
920 CALL cp_fm_release(tmp_fm)
922 ALLOCATE (tmp_cmat(nocc, nstate))
925 DEALLOCATE (tmp_cmat)
926 CALL parallel_gemm(
"N",
"N", northo, nstate, nextra, cone, c_tilde, ul, czero, vl)
927 ALLOCATE (tmp_cmat(northo, nstate))
930 DEALLOCATE (tmp_cmat)
934 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5)
THEN
937 norm2_old = 1.0e30_dp
941 CALL cp_cfm_to_cfm(v, tmp_cfm)
946 CALL cp_cfm_to_cfm(u, tmp_cfm)
953 CALL parallel_gemm(
"N",
"N", ndummy, nstate, ndummy, cone, zij_0(idim), &
954 tmp_cfm, czero, tmp_cfm_1)
956 CALL parallel_gemm(
"C",
"N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
962 DO istate = 1, nstate
966 spread_ii = spread_ii + weights(idim)* &
968 matrix_zii(istate, idim) = mzii
971 sum_spread(istate) = spread_ii
973 CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
974 spread_sum = accurate_sum(sum_spread)
984 ALLOCATE (tmp_cmat(northo, nstate))
986 weight = weights(idim)
987 arr_zii = matrix_zii(:, idim)
989 CALL parallel_gemm(
"N",
"N", nmo, nstate, nmo, cone, &
990 zij_0(idim), v, czero, tmp_cfm)
994 CALL parallel_gemm(
"N",
"C", nmo, nstate, nstate, cone, tmp_cfm, &
996 CALL cp_cfm_scale(weight, tmp_cfm_1)
1001 CALL parallel_gemm(
"C",
"N", nmo, nstate, nmo, cone, &
1002 zij_0(idim), v, czero, tmp_cfm)
1007 CALL parallel_gemm(
"N",
"C", nmo, nstate, nstate, cone, tmp_cfm, &
1008 u, czero, tmp_cfm_1)
1009 CALL cp_cfm_scale(weight, tmp_cfm_1)
1015 DEALLOCATE (tmp_cmat)
1016 ALLOCATE (tmp_cmat(northo, nextra))
1018 northo, nextra, .false.)
1021 DEALLOCATE (tmp_cmat)
1023 CALL parallel_gemm(
"C",
"N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1026 CALL parallel_gemm(
"N",
"N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1030 IF (nextra > 0)
THEN
1035 CALL cp_fm_release(tmp_fm)
1041 IF (nextra > 0)
THEN
1043 IF (new_direction)
THEN
1044 line_searches = line_searches + 1
1045 IF (mintol > tol)
THEN
1050 IF (unit_nr > 0 .AND.
modulo(iter, out_each) == 0)
THEN
1051 sum_spread_ii = alpha*nstate/
twopi/
twopi - spread_sum
1052 avg_spread_ii = sum_spread_ii/nstate
1053 WRITE (unit_nr,
'(T4,A,T26,A,T48,A)') &
1054 "Iteration",
"Avg. Spread_ii",
"Tolerance"
1055 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4)') &
1056 iter, avg_spread_ii, tol
1059 IF (tol < eps_localization)
EXIT
1063 cnorm2_gct_cross = czero
1064 CALL cp_cfm_trace(grad_ctilde, gct_old, cnorm2_gct_cross)
1065 norm2_gct_cross = real(cnorm2_gct_cross, kind=
dp)
1066 gct_old%local_data = grad_ctilde%local_data
1068 norm2_gct = real(cnorm2_gct, kind=
dp)
1070 beta_pr = (norm2_gct - norm2_gct_cross)/norm2_old
1071 norm2_old = norm2_gct
1072 beta = max(0.0_dp, beta_pr)
1075 CALL cp_cfm_scale(beta, skc)
1078 norm2_gct_cross = real(cnorm2_gct_cross, kind=
dp)
1079 IF (norm2_gct_cross .LE. 0.0_dp)
THEN
1085 line_search_count = 0
1088 line_search_count = line_search_count + 1
1090 energy(line_search_count) = spread_sum
1093 new_direction = .false.
1094 IF (line_search_count .EQ. 1)
THEN
1099 pos(2) = ds_min/gold_sec
1102 IF (line_search_count .EQ. 50)
THEN
1103 IF (abs(energy(line_search_count) - energy(line_search_count - 1)) < 1.0e-4_dp)
THEN
1104 cpwarn(
"Line search failed to converge properly")
1106 new_direction = .true.
1107 ds = pos(line_search_count)
1108 line_search_count = 0
1110 cpabort(
"No. of line searches exceeds 50")
1113 IF (lsr .EQ. 0)
THEN
1114 IF (energy(line_search_count - 1) .GT. energy(line_search_count))
THEN
1115 lsr = line_search_count
1116 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1119 lsm = line_search_count
1120 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1123 IF (pos(line_search_count) .LT. pos(lsm))
THEN
1124 IF (energy(line_search_count) .GT. energy(lsm))
THEN
1126 lsm = line_search_count
1128 lsl = line_search_count
1131 IF (energy(line_search_count) .GT. energy(lsm))
THEN
1133 lsm = line_search_count
1135 lsr = line_search_count
1138 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl))
THEN
1139 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1141 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1143 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr))
THEN
1144 new_direction = .true.
1149 ds = pos(line_search_count + 1) - pos(line_search_count)
1151 IF ((abs(ds) < 1.0e-10_dp) .AND. (lsl == 1))
THEN
1152 new_direction = .true.
1153 ds_min = 0.5_dp/alpha
1154 ELSEIF (abs(ds) > 10.0_dp)
THEN
1155 new_direction = .true.
1156 ds_min = 0.5_dp/alpha
1158 ds_min = pos(line_search_count + 1)
1162 CALL cp_cfm_scale(ds, skc)
1165 IF (mintol > tol)
THEN
1169 IF (unit_nr > 0 .AND.
modulo(iter, out_each) == 0)
THEN
1170 sum_spread_ii = alpha*nstate/
twopi/
twopi - spread_sum
1171 avg_spread_ii = sum_spread_ii/nstate
1172 WRITE (unit_nr,
'(T4,A,T26,A,T48,A)') &
1173 "Iteration",
"Avg. Spread_ii",
"Tolerance"
1174 WRITE (unit_nr,
'(T4,I7,T20,F20.10,T45,E12.4)') &
1175 iter, avg_spread_ii, tol
1178 IF (tol < eps_localization)
EXIT
1183 IF ((unit_nr > 0) .AND. (iter == max_iter))
THEN
1184 WRITE (unit_nr,
'(T4,A,T4,A)')
"Min. Itr.",
"Min. Tol."
1185 WRITE (unit_nr,
'(T4,I7,T4,E12.4)') miniter, mintol
1191 IF (nextra > 0)
THEN
1192 rmat%local_data = real(v%local_data, kind=
dp)
1193 CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1202 CALL cp_fm_release(id_nextra)
1203 CALL cp_fm_release(vectors_all)
1205 CALL cp_fm_release(matrix_v)
1206 CALL cp_fm_release(matrix_v_all)
1208 DEALLOCATE (arr_zii)
1210 rmat%local_data = matrix_u%local_data
1219 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
1220 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
1224 CALL cp_fm_release(rmat)
1226 CALL cp_fm_release(matrix_u)
1227 DEALLOCATE (matrix_zii, sum_spread)
1229 CALL timestop(handle)
1237 SUBROUTINE ortho_vectors(vmatrix)
1239 TYPE(cp_fm_type),
INTENT(IN) :: vmatrix
1241 CHARACTER(LEN=*),
PARAMETER :: routinen =
'ortho_vectors'
1243 INTEGER :: handle, n, ncol
1244 TYPE(cp_fm_struct_type),
POINTER :: fm_struct_tmp
1245 TYPE(cp_fm_type) :: overlap_vv
1247 CALL timeset(routinen, handle)
1249 NULLIFY (fm_struct_tmp)
1251 CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1254 para_env=vmatrix%matrix_struct%para_env, &
1255 context=vmatrix%matrix_struct%context)
1256 CALL cp_fm_create(overlap_vv, fm_struct_tmp,
"overlap_vv")
1259 CALL parallel_gemm(
'T',
'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1263 CALL cp_fm_release(overlap_vv)
1265 CALL timestop(handle)
1267 END SUBROUTINE ortho_vectors
1277 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1278 INTEGER,
INTENT(IN) :: istate, jstate
1279 REAL(kind=
dp),
INTENT(IN) :: st, ct
1280 TYPE(cp_cfm_type) :: zij(:)
1286 DO id = 1,
SIZE(zij, 1)
1291 END SUBROUTINE rotate_zij
1300 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1301 INTEGER,
INTENT(IN) :: istate, jstate
1302 REAL(kind=
dp),
INTENT(IN) :: st, ct
1303 TYPE(cp_cfm_type),
INTENT(IN) :: rmat
1307 END SUBROUTINE rotate_rmat
1318 SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1319 COMPLEX(KIND=dp),
POINTER :: mii(:), mjj(:), mij(:)
1320 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1321 REAL(kind=
dp),
INTENT(OUT) :: theta
1322 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: grad_ij, step
1324 COMPLEX(KIND=dp) :: z11, z12, z22
1325 INTEGER :: dim_m, idim
1326 REAL(kind=
dp) :: a12, b12, d2, ratio
1335 a12 = a12 + weights(idim)*real(conjg(z12)*(z11 - z22), kind=
dp)
1336 b12 = b12 + weights(idim)*real((z12*conjg(z12) - &
1337 0.25_dp*(z11 - z22)*(conjg(z11) - conjg(z22))), kind=
dp)
1339 IF (abs(b12) > 1.e-10_dp)
THEN
1341 theta = 0.25_dp*atan(ratio)
1342 ELSEIF (abs(b12) < 1.e-10_dp)
THEN
1348 IF (
PRESENT(grad_ij)) theta = theta + step*grad_ij
1350 d2 = a12*sin(4._dp*theta) - b12*cos(4._dp*theta)
1351 IF (d2 <= 0._dp)
THEN
1352 IF (theta > 0.0_dp)
THEN
1353 theta = theta - 0.25_dp*
pi
1355 theta = theta + 0.25_dp*
pi
1358 END SUBROUTINE get_angle
1366 SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1367 TYPE(cp_cfm_type) :: zij(:)
1368 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1369 REAL(kind=
dp),
INTENT(OUT) :: tolerance
1370 TYPE(cp_fm_type),
INTENT(OUT),
OPTIONAL :: grad
1372 CHARACTER(len=*),
PARAMETER :: routinen =
'check_tolerance'
1375 TYPE(cp_fm_type) :: force
1377 CALL timeset(routinen, handle)
1383 CALL grad_at_0(zij, weights, force)
1385 IF (
PRESENT(grad))
CALL cp_fm_to_fm(force, grad)
1386 CALL cp_fm_release(force)
1388 CALL timestop(handle)
1390 END SUBROUTINE check_tolerance
1397 TYPE(cp_fm_type),
INTENT(IN) :: rmat, vectors
1400 TYPE(cp_fm_type) :: wf
1404 CALL parallel_gemm(
"N",
"N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1405 CALL cp_fm_to_fm(wf, vectors)
1406 CALL cp_fm_release(wf)
1414 SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1415 TYPE(cp_fm_type),
INTENT(IN) :: rmat, vec_all, vectors
1418 TYPE(cp_fm_type) :: wf
1424 CALL parallel_gemm(
"N",
"N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1425 CALL cp_fm_to_fm(wf, vectors)
1426 CALL cp_fm_release(wf)
1427 END SUBROUTINE rotate_orbitals_edf
1435 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1436 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1437 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1438 TYPE(cp_fm_type),
INTENT(IN) :: matrix
1439 INTEGER,
INTENT(IN) :: ndim
1441 COMPLEX(KIND=dp) :: zii, zjj
1442 INTEGER :: idim, istate, jstate, ncol_local, &
1443 nrow_global, nrow_local
1444 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1445 REAL(kind=
dp) :: gradsq_ij
1448 ncol_local=ncol_local, nrow_global=nrow_global, &
1449 row_indices=row_indices, col_indices=col_indices)
1451 DO istate = 1, nrow_local
1452 DO jstate = 1, ncol_local
1456 zii = diag(row_indices(istate), idim)
1457 zjj = diag(col_indices(jstate), idim)
1458 gradsq_ij = gradsq_ij + weights(idim)* &
1459 4.0_dp*real((conjg(zii)*zii + conjg(zjj)*zjj), kind=
dp)
1461 matrix%local_data(istate, jstate) = gradsq_ij
1464 END SUBROUTINE gradsq_at_0
1471 SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1472 TYPE(cp_cfm_type) :: matrix_p(:)
1473 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1474 TYPE(cp_fm_type),
INTENT(IN) :: matrix
1476 COMPLEX(KIND=dp) :: zii, zij, zjj
1477 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1478 INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1479 nrow_global, nrow_local
1480 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1481 REAL(kind=
dp) :: grad_ij
1485 ncol_local=ncol_local, nrow_global=nrow_global, &
1486 row_indices=row_indices, col_indices=col_indices)
1487 dim_m =
SIZE(matrix_p, 1)
1488 ALLOCATE (diag(nrow_global, dim_m))
1491 DO istate = 1, nrow_global
1496 DO istate = 1, nrow_local
1497 DO jstate = 1, ncol_local
1501 zii = diag(row_indices(istate), idim)
1502 zjj = diag(col_indices(jstate), idim)
1503 zij = matrix_p(idim)%local_data(istate, jstate)
1504 grad_ij = grad_ij + weights(idim)* &
1505 REAL(4.0_dp*conjg(zij)*(zjj - zii),
dp)
1507 matrix%local_data(istate, jstate) = grad_ij
1511 END SUBROUTINE grad_at_0
1521 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1522 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1523 TYPE(cp_fm_type),
INTENT(IN) :: zij(:, :)
1524 REAL(kind=
dp) :: tolerance,
value
1526 COMPLEX(KIND=dp) :: kii, kij, kjj
1527 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag
1528 INTEGER :: idim, istate, jstate, ncol_local, &
1529 nrow_global, nrow_local
1530 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1531 REAL(kind=
dp) :: grad_ij, ra, rb
1535 ncol_local=ncol_local, nrow_global=nrow_global, &
1536 row_indices=row_indices, col_indices=col_indices)
1537 ALLOCATE (diag(nrow_global,
SIZE(zij, 2)))
1539 DO idim = 1,
SIZE(zij, 2)
1540 DO istate = 1, nrow_global
1543 diag(istate, idim) = cmplx(ra, rb,
dp)
1544 value =
value + weights(idim) - weights(idim)*abs(diag(istate, idim))**2
1548 DO istate = 1, nrow_local
1549 DO jstate = 1, ncol_local
1551 DO idim = 1,
SIZE(zij, 2)
1552 kii = diag(row_indices(istate), idim)
1553 kjj = diag(col_indices(jstate), idim)
1554 ra = zij(1, idim)%local_data(istate, jstate)
1555 rb = zij(2, idim)%local_data(istate, jstate)
1556 kij = cmplx(ra, rb,
dp)
1557 grad_ij = grad_ij + weights(idim)* &
1558 REAL(4.0_dp*conjg(kij)*(kjj - kii),
dp)
1560 tolerance = max(abs(grad_ij), tolerance)
1563 CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1567 END SUBROUTINE check_tolerance_new
1583 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1584 eps_localization, iterations, converged)
1585 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1586 TYPE(cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
1587 INTEGER,
INTENT(IN) :: max_iter
1588 REAL(kind=
dp),
INTENT(IN) :: max_crazy_angle
1589 REAL(kind=
dp) :: crazy_scale
1590 LOGICAL :: crazy_use_diag
1591 REAL(kind=
dp),
INTENT(IN) :: eps_localization
1592 INTEGER :: iterations
1593 LOGICAL,
INTENT(out),
OPTIONAL :: converged
1595 CHARACTER(len=*),
PARAMETER :: routinen =
'crazy_rotations'
1596 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1597 czero = (0.0_dp, 0.0_dp)
1599 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: evals_exp
1600 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag_z
1601 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
1602 INTEGER :: dim2, handle, i, icol, idim, irow, &
1603 method, ncol_global, ncol_local, &
1604 norder, nrow_global, nrow_local, &
1606 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1608 REAL(kind=
dp) :: eps_exp, limit_crazy_angle, maxeval, &
1609 norm, ra, rb, theta, tolerance,
value
1610 REAL(kind=
dp),
DIMENSION(:),
POINTER :: evals
1611 TYPE(cp_cfm_type) :: cmat_a, cmat_r, cmat_t1
1612 TYPE(cp_fm_type) :: mat_r, mat_t, mat_theta, mat_u
1614 CALL timeset(routinen, handle)
1615 NULLIFY (row_indices, col_indices)
1617 ncol_global=ncol_global, &
1618 row_indices=row_indices, col_indices=col_indices, &
1619 nrow_local=nrow_local, ncol_local=ncol_local)
1621 limit_crazy_angle = max_crazy_angle
1623 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1625 ALLOCATE (diag_z(nrow_global, dim2))
1626 ALLOCATE (evals(nrow_global))
1627 ALLOCATE (evals_exp(nrow_global))
1641 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1649 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1650 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1651 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1652 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1655 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1656 CALL cp_fm_to_fm(mat_t, mat_r)
1659 IF (cmat_a%matrix_struct%para_env%is_source())
THEN
1661 WRITE (unit_nr,
'(T2,A7,A6,1X,A20,A12,A12,A12)') &
1662 "CRAZY| ",
"Iter",
"value ",
"gradient",
"Max. eval",
"limit"
1669 iterations = iterations + 1
1671 DO i = 1, nrow_global
1674 diag_z(i, idim) = cmplx(ra, rb,
dp)
1677 DO irow = 1, nrow_local
1678 DO icol = 1, ncol_local
1680 ra = zij(1, idim)%local_data(irow, icol)
1681 rb = zij(2, idim)%local_data(irow, icol)
1682 mij(idim) = cmplx(ra, rb,
dp)
1683 mii(idim) = diag_z(row_indices(irow), idim)
1684 mjj(idim) = diag_z(col_indices(icol), idim)
1686 IF (row_indices(irow) .NE. col_indices(icol))
THEN
1687 CALL get_angle(mii, mjj, mij, weights, theta)
1688 theta = crazy_scale*theta
1689 IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
1690 IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
1691 IF (crazy_use_diag)
THEN
1692 cmat_a%local_data(irow, icol) = -cmplx(0.0_dp, theta,
dp)
1694 mat_theta%local_data(irow, icol) = -theta
1697 IF (crazy_use_diag)
THEN
1698 cmat_a%local_data(irow, icol) = czero
1700 mat_theta%local_data(irow, icol) = 0.0_dp
1708 IF (crazy_use_diag)
THEN
1710 maxeval = maxval(abs(evals))
1711 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1712 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1714 CALL parallel_gemm(
'N',
'C', nrow_global, nrow_global, nrow_global, cone, &
1715 cmat_t1, cmat_r, czero, cmat_a)
1716 mat_u%local_data = real(cmat_a%local_data, kind=
dp)
1720 eps_exp = 1.0_dp*epsilon(eps_exp)
1729 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_u, 0.0_dp, mat_t)
1730 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(1, idim))
1731 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_u, 0.0_dp, mat_t)
1732 CALL parallel_gemm(
'T',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_u, mat_t, 0.0_dp, zij(2, idim))
1735 CALL parallel_gemm(
'N',
'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_r, mat_u, 0.0_dp, mat_t)
1736 CALL cp_fm_to_fm(mat_t, mat_r)
1738 CALL check_tolerance_new(weights, zij, tolerance,
value)
1740 IF (unit_nr > 0)
THEN
1741 WRITE (unit_nr,
'(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1742 "CRAZY| ", iterations,
value, tolerance, maxeval, limit_crazy_angle
1745 IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter)
EXIT
1748 IF (
PRESENT(converged)) converged = (tolerance .LT. eps_localization)
1754 CALL cp_fm_release(mat_u)
1755 CALL cp_fm_release(mat_t)
1756 CALL cp_fm_release(mat_theta)
1760 CALL cp_fm_release(mat_r)
1761 DEALLOCATE (evals_exp, evals, diag_z)
1762 DEALLOCATE (mii, mij, mjj)
1764 CALL timestop(handle)
1782 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1783 REAL(kind=
dp),
INTENT(IN) :: weights(:)
1784 TYPE(cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
1785 INTEGER,
INTENT(IN) :: max_iter
1786 REAL(kind=
dp),
INTENT(IN) :: eps_localization
1787 INTEGER :: iterations
1789 CHARACTER(len=*),
PARAMETER :: routinen =
'direct_mini'
1790 COMPLEX(KIND=dp),
PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1791 czero = (0.0_dp, 0.0_dp)
1792 REAL(kind=
dp),
PARAMETER :: gold_sec = 0.3819_dp
1794 COMPLEX(KIND=dp) :: lk, ll, tmp
1795 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: evals_exp
1796 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: diag_z
1797 INTEGER :: handle, i, icol, idim, irow, &
1798 line_search_count, line_searches, lsl, &
1799 lsm, lsr, n, ncol_local, ndim, &
1800 nrow_local, output_unit
1801 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1802 LOGICAL :: new_direction
1803 REAL(kind=
dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1804 fb, fc, nom, normg, normg_cross, &
1805 normg_old, npos, omega, tol, val, x0, &
1807 REAL(kind=
dp),
DIMENSION(150) :: energy, grad, pos
1808 REAL(kind=
dp),
DIMENSION(:),
POINTER :: evals, fval, fvald
1809 TYPE(cp_cfm_type) :: cmat_a, cmat_b, cmat_m, cmat_r, cmat_t1, &
1811 TYPE(cp_cfm_type),
ALLOCATABLE,
DIMENSION(:) :: c_zij
1812 TYPE(cp_fm_type) :: matrix_a, matrix_g, matrix_g_old, &
1813 matrix_g_search, matrix_h, matrix_r, &
1816 NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1818 CALL timeset(routinen, handle)
1821 n = zij(1, 1)%matrix_struct%nrow_global
1822 ndim = (
SIZE(zij, 2))
1824 IF (output_unit > 0)
THEN
1825 WRITE (output_unit,
'(T4,A )')
"Localization by direct minimization of the functional; "
1826 WRITE (output_unit,
'(T5,2A13,A20,A20,A10 )')
" Line search ",
" Iteration ",
" Functional ",
" Tolerance ",
" ds Min "
1829 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1830 ALLOCATE (c_zij(ndim))
1835 c_zij(idim)%local_data = cmplx(zij(1, idim)%local_data, &
1836 zij(2, idim)%local_data,
dp)
1843 CALL cp_fm_create(matrix_g_search, zij(1, 1)%matrix_struct)
1844 CALL cp_fm_create(matrix_g_old, zij(1, 1)%matrix_struct)
1859 CALL cp_cfm_get_info(cmat_b, nrow_local=nrow_local, ncol_local=ncol_local, &
1860 row_indices=row_indices, col_indices=col_indices)
1864 normg_old = 1.0e30_dp
1866 new_direction = .true.
1869 line_search_count = 0
1871 iterations = iterations + 1
1873 cmat_a%local_data = cmplx(0.0_dp, matrix_a%local_data,
dp)
1875 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1876 CALL cp_cfm_to_cfm(cmat_r, cmat_t1)
1878 CALL parallel_gemm(
'N',
'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_u)
1879 cmat_u%local_data = real(cmat_u%local_data, kind=
dp)
1881 IF (new_direction .AND. mod(line_searches, 20) .EQ. 5)
THEN
1883 CALL parallel_gemm(
'N',
'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1884 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_u, cmat_t1, czero, c_zij(idim))
1887 matrix_h%local_data = real(cmat_u%local_data, kind=
dp)
1888 CALL parallel_gemm(
'N',
'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
1889 CALL cp_fm_to_fm(matrix_t, matrix_r)
1896 evals_exp(:) = exp((0.0_dp, -1.0_dp)*evals(:))
1899 normg_old = 1.0e30_dp
1906 CALL parallel_gemm(
'N',
'N', n, n, n, cone, c_zij(idim), cmat_u, czero, cmat_t1)
1907 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_u, cmat_t1, czero, cmat_t2)
1912 fval(i) = -weights(idim)*log(abs(diag_z(i, idim))**2)
1913 fvald(i) = -weights(idim)/(abs(diag_z(i, idim))**2)
1915 fval(i) = weights(idim) - weights(idim)*abs(diag_z(i, idim))**2
1916 fvald(i) = -weights(idim)
1918 omega = omega + fval(i)
1920 DO icol = 1, ncol_local
1921 DO irow = 1, nrow_local
1922 tmp = cmat_t1%local_data(irow, icol)*conjg(diag_z(col_indices(icol), idim))
1923 cmat_m%local_data(irow, icol) = cmat_m%local_data(irow, icol) &
1924 + 4.0_dp*fvald(col_indices(icol))*real(tmp, kind=
dp)
1931 CALL gradsq_at_0(diag_z, weights, matrix_h, ndim)
1937 DO icol = 1, ncol_local
1938 DO irow = 1, nrow_local
1939 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1940 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1941 IF (abs(ll - lk) .LT. 0.5_dp)
THEN
1943 cmat_b%local_data(irow, icol) = 0.0_dp
1945 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol) + tmp
1946 tmp = tmp*(ll - lk)/(i + 1)
1948 cmat_b%local_data(irow, icol) = cmat_b%local_data(irow, icol)*exp(lk)
1950 cmat_b%local_data(irow, icol) = (exp(lk) - exp(ll))/(lk - ll)
1956 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_m, cmat_r, czero, cmat_t1)
1957 CALL parallel_gemm(
'C',
'N', n, n, n, cone, cmat_r, cmat_t1, czero, cmat_t2)
1959 CALL parallel_gemm(
'N',
'C', n, n, n, cone, cmat_t1, cmat_r, czero, cmat_t2)
1960 CALL parallel_gemm(
'N',
'N', n, n, n, cone, cmat_r, cmat_t2, czero, cmat_t1)
1961 matrix_g%local_data = real(cmat_t1%local_data, kind=
dp)
1967 IF (new_direction)
THEN
1969 line_searches = line_searches + 1
1978 IF (output_unit > 0)
THEN
1979 WRITE (output_unit,
'(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, iterations, omega, tol, ds_min
1982 IF (tol < eps_localization .OR. iterations > max_iter)
EXIT
1985 CALL cp_fm_trace(matrix_g, matrix_g_old, normg_cross)
1986 normg_cross = normg_cross*0.5_dp
1988 DO icol = 1, ncol_local
1989 DO irow = 1, nrow_local
1990 matrix_g_old%local_data(irow, icol) = matrix_g%local_data(irow, icol)/matrix_h%local_data(irow, icol)
1993 CALL cp_fm_trace(matrix_g, matrix_g_old, normg)
1994 normg = normg*0.5_dp
1995 beta_pr = (normg - normg_cross)/normg_old
1997 beta_pr = max(beta_pr, 0.0_dp)
1999 CALL cp_fm_trace(matrix_g_search, matrix_g_old, normg_cross)
2000 IF (normg_cross .GE. 0)
THEN
2001 IF (matrix_a%matrix_struct%para_env%is_source())
THEN
2011 line_search_count = 0
2013 line_search_count = line_search_count + 1
2014 energy(line_search_count) = omega
2019 SELECT CASE (line_search_count)
2023 CALL cp_fm_trace(matrix_g, matrix_g_search, grad(1))
2024 grad(1) = grad(1)/2.0_dp
2025 new_direction = .false.
2027 new_direction = .true.
2032 a = (energy(2) - b*x1 - c)/(x1**2)
2033 IF (a .LE. 0.0_dp) a = 1.0e-15_dp
2034 npos = -b/(2.0_dp*a)
2035 val = a*npos**2 + b*npos + c
2036 IF (val .LT. energy(1) .AND. val .LE. energy(2))
THEN
2039 pos(3) = min(npos, maxval(pos(1:2))*4.0_dp)
2041 pos(3) = maxval(pos(1:2))*2.0_dp
2045 SELECT CASE (line_search_count)
2047 new_direction = .false.
2049 pos(2) = ds_min*0.8_dp
2051 new_direction = .false.
2052 IF (energy(2) .GT. energy(1))
THEN
2053 pos(3) = ds_min*0.7_dp
2055 pos(3) = ds_min*1.4_dp
2058 new_direction = .true.
2065 nom = (xb - xa)**2*(fb - fc) - (xb -
xc)**2*(fb - fa)
2066 denom = (xb - xa)*(fb - fc) - (xb -
xc)*(fb - fa)
2067 IF (abs(denom) .LE. 1.0e-18_dp*max(abs(fb - fc), abs(fb - fa)))
THEN
2070 npos = xb - 0.5_dp*nom/denom
2072 val = (npos - xa)*(npos - xb)*fc/((
xc - xa)*(
xc - xb)) + &
2073 (npos - xb)*(npos -
xc)*fa/((xa - xb)*(xa -
xc)) + &
2074 (npos -
xc)*(npos - xa)*fb/((xb -
xc)*(xb - xa))
2075 IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc)
THEN
2077 pos(4) = max(maxval(pos(1:3))*0.01_dp, &
2078 min(npos, maxval(pos(1:3))*4.0_dp))
2080 pos(4) = maxval(pos(1:3))*2.0_dp
2084 new_direction = .false.
2085 IF (line_search_count .EQ. 1)
THEN
2090 pos(2) = ds_min/gold_sec
2092 IF (line_search_count .EQ. 150) cpabort(
"Too many")
2093 IF (lsr .EQ. 0)
THEN
2094 IF (energy(line_search_count - 1) .LT. energy(line_search_count))
THEN
2095 lsr = line_search_count
2096 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2099 lsm = line_search_count
2100 pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2103 IF (pos(line_search_count) .LT. pos(lsm))
THEN
2104 IF (energy(line_search_count) .LT. energy(lsm))
THEN
2106 lsm = line_search_count
2108 lsl = line_search_count
2111 IF (energy(line_search_count) .LT. energy(lsm))
THEN
2113 lsm = line_search_count
2115 lsr = line_search_count
2118 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl))
THEN
2119 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2121 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2123 IF ((pos(lsr) - pos(lsl)) .LT. 1.0e-3_dp*pos(lsr))
THEN
2124 new_direction = .true.
2130 ds_min = pos(line_search_count + 1)
2131 ds = pos(line_search_count + 1) - pos(line_search_count)
2136 matrix_h%local_data = real(cmat_u%local_data, kind=
dp)
2137 CALL parallel_gemm(
'N',
'N', n, n, n, 1.0_dp, matrix_r, matrix_h, 0.0_dp, matrix_t)
2138 CALL cp_fm_to_fm(matrix_t, matrix_r)
2140 CALL cp_fm_release(matrix_r)
2142 CALL cp_fm_release(matrix_a)
2143 CALL cp_fm_release(matrix_g)
2144 CALL cp_fm_release(matrix_h)
2145 CALL cp_fm_release(matrix_t)
2146 CALL cp_fm_release(matrix_g_search)
2147 CALL cp_fm_release(matrix_g_old)
2156 DEALLOCATE (evals, evals_exp, fval, fvald)
2158 DO idim = 1,
SIZE(c_zij)
2159 zij(1, idim)%local_data = real(c_zij(idim)%local_data,
dp)
2160 zij(2, idim)%local_data = aimag(c_zij(idim)%local_data)
2166 CALL timestop(handle)
2187 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2188 sweeps, out_each, target_time, start_time, restricted)
2190 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2191 TYPE(cp_fm_type),
INTENT(IN) :: zij(:, :), vectors
2192 TYPE(mp_para_env_type),
POINTER :: para_env
2193 INTEGER,
INTENT(IN) :: max_iter
2194 REAL(kind=
dp),
INTENT(IN) :: eps_localization
2196 INTEGER,
INTENT(IN) :: out_each
2197 REAL(
dp) :: target_time, start_time
2198 INTEGER :: restricted
2200 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rot_para'
2202 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2203 nblock, nblock_max, ns_me, nstate, &
2205 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ns_bound
2206 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2207 REAL(kind=
dp) :: xstate
2208 TYPE(cp_fm_type) :: rmat
2209 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2211 CALL timeset(routinen, handle)
2224 IF (restricted > 0)
THEN
2225 IF (output_unit > 0)
THEN
2226 WRITE (output_unit,
'(T4,A,I2,A )')
"JACOBI: for the ROKS method, the last ", restricted,
" orbitals DO NOT ROTATE"
2228 nstate = nstate - restricted
2232 xstate = real(nstate,
dp)/real(para_env%num_pe,
dp)
2233 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2234 DO ip = 1, para_env%num_pe
2235 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2236 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2239 DO ip = 0, para_env%num_pe - 1
2240 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2241 nblock_max = max(nblock_max, nblock)
2245 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2246 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2247 ALLOCATE (cz_ij_loc(dim2))
2249 DO ip = 0, para_env%num_pe - 1
2250 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2253 IF (para_env%mepos == ip)
THEN
2254 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2257 cz_ij_loc(idim)%c_array(j, i) = cmplx(z_ij_loc_re(j, i), z_ij_loc_im(j, i),
dp)
2263 DEALLOCATE (z_ij_loc_re)
2264 DEALLOCATE (z_ij_loc_im)
2266 ALLOCATE (rotmat(nstate, 2*nblock_max))
2268 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2269 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2270 target_time=target_time, start_time=start_time)
2272 ilow1 = ns_bound(para_env%mepos, 1)
2273 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2274 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2275 ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2277 DO ip = 0, para_env%num_pe - 1
2278 z_ij_loc_re = 0.0_dp
2279 z_ij_loc_im = 0.0_dp
2280 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2281 IF (ip == para_env%mepos)
THEN
2286 z_ij_loc_re(j, i) = real(cz_ij_loc(idim)%c_array(j, i),
dp)
2287 z_ij_loc_im(j, i) = aimag(cz_ij_loc(idim)%c_array(j, i))
2291 CALL para_env%bcast(z_ij_loc_re, ip)
2292 CALL para_env%bcast(z_ij_loc_im, ip)
2298 DO ip = 0, para_env%num_pe - 1
2299 z_ij_loc_re = 0.0_dp
2300 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2301 IF (ip == para_env%mepos)
THEN
2306 z_ij_loc_re(j, i) = rotmat(j, i)
2310 CALL para_env%bcast(z_ij_loc_re, ip)
2314 DEALLOCATE (z_ij_loc_re)
2315 DEALLOCATE (z_ij_loc_im)
2317 DEALLOCATE (cz_ij_loc(idim)%c_array)
2319 DEALLOCATE (cz_ij_loc)
2321 CALL para_env%sync()
2323 CALL cp_fm_release(rmat)
2326 DEALLOCATE (ns_bound)
2328 CALL timestop(handle)
2330 END SUBROUTINE jacobi_rot_para
2342 SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2344 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2345 TYPE(cp_cfm_type),
INTENT(IN) :: czij(:)
2346 TYPE(mp_para_env_type),
POINTER :: para_env
2347 INTEGER,
INTENT(IN) :: max_iter
2348 TYPE(cp_cfm_type),
INTENT(IN) :: rmat
2349 REAL(
dp),
INTENT(OUT),
OPTIONAL :: tol_out
2351 CHARACTER(len=*),
PARAMETER :: routinen =
'jacobi_rot_para_1'
2353 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :) :: czij_array
2354 INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2355 nblock, nblock_max, ns_me, nstate, &
2357 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ns_bound
2358 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: rotmat, z_ij_loc_re
2359 REAL(kind=
dp) :: xstate
2360 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2362 CALL timeset(routinen, handle)
2371 xstate = real(nstate,
dp)/real(para_env%num_pe,
dp)
2372 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2373 DO ip = 1, para_env%num_pe
2374 ns_bound(ip - 1, 1) = min(nstate, nint(xstate*(ip - 1))) + 1
2375 ns_bound(ip - 1, 2) = min(nstate, nint(xstate*ip))
2378 DO ip = 0, para_env%num_pe - 1
2379 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2380 nblock_max = max(nblock_max, nblock)
2384 ALLOCATE (czij_array(nstate, nblock_max))
2385 ALLOCATE (cz_ij_loc(dim2))
2387 DO ip = 0, para_env%num_pe - 1
2388 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2391 IF (para_env%mepos == ip)
THEN
2393 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2396 cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2402 DEALLOCATE (czij_array)
2404 ALLOCATE (rotmat(nstate, 2*nblock_max))
2406 CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2407 cz_ij_loc, rotmat, 0, tol_out=tol_out)
2409 ilow1 = ns_bound(para_env%mepos, 1)
2410 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2411 ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2413 DO ip = 0, para_env%num_pe - 1
2414 z_ij_loc_re = 0.0_dp
2415 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2416 IF (ip == para_env%mepos)
THEN
2421 z_ij_loc_re(j, i) = rotmat(j, i)
2425 CALL para_env%bcast(z_ij_loc_re, ip)
2429 DEALLOCATE (z_ij_loc_re)
2431 DEALLOCATE (cz_ij_loc(idim)%c_array)
2433 DEALLOCATE (cz_ij_loc)
2435 CALL para_env%sync()
2438 DEALLOCATE (ns_bound)
2440 CALL timestop(handle)
2442 END SUBROUTINE jacobi_rot_para_1
2466 SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2467 ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2469 REAL(kind=
dp),
INTENT(IN) :: weights(:)
2470 TYPE(mp_para_env_type),
POINTER :: para_env
2471 INTEGER,
INTENT(IN) :: max_iter
2472 INTEGER,
INTENT(OUT) :: sweeps
2473 INTEGER,
INTENT(IN) :: out_each, dim2, nstate, nblock_max
2474 INTEGER,
DIMENSION(0:, :),
INTENT(IN) :: ns_bound
2475 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: cz_ij_loc
2476 REAL(
dp),
DIMENSION(:, :),
INTENT(OUT) :: rotmat
2477 INTEGER,
INTENT(IN) :: output_unit
2478 REAL(
dp),
INTENT(OUT),
OPTIONAL :: tol_out
2479 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: eps_localization
2480 REAL(
dp),
OPTIONAL :: target_time, start_time
2482 COMPLEX(KIND=dp) :: zi, zj
2483 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: c_array_me, c_array_partner
2484 COMPLEX(KIND=dp),
POINTER :: mii(:), mij(:), mjj(:)
2485 INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2486 ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2487 iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2488 ns_recv_from, ns_recv_partner
2489 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: rcount, rdispl
2490 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: list_pair
2491 LOGICAL :: should_stop
2492 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2493 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: rmat_recv_all
2494 REAL(kind=
dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2495 t2, theta, tolerance, zc, zr
2496 TYPE(set_c_1d_type),
DIMENSION(:),
POINTER :: zdiag_all, zdiag_me
2497 TYPE(set_c_2d_type),
DIMENSION(:),
POINTER :: xyz_mix, xyz_mix_ns
2499 NULLIFY (zdiag_all, zdiag_me)
2500 NULLIFY (xyz_mix, xyz_mix_ns)
2501 NULLIFY (mii, mij, mjj)
2503 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2505 ALLOCATE (rcount(para_env%num_pe), stat=istat)
2506 ALLOCATE (rdispl(para_env%num_pe), stat=istat)
2508 tolerance = 1.0e10_dp
2512 npair = (para_env%num_pe + 1)/2
2513 nperm = para_env%num_pe - mod(para_env%num_pe + 1, 2)
2514 ALLOCATE (list_pair(2, npair))
2518 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2519 ii = i - ns_bound(para_env%mepos, 1) + 1
2520 rotmat(i, ii) = 1.0_dp
2523 ALLOCATE (xyz_mix(dim2))
2524 ALLOCATE (xyz_mix_ns(dim2))
2525 ALLOCATE (zdiag_me(dim2))
2526 ALLOCATE (zdiag_all(dim2))
2528 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2529 IF (ns_me /= 0)
THEN
2530 ALLOCATE (c_array_me(nstate, ns_me, dim2))
2532 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2534 ALLOCATE (gmat(nstate, ns_me))
2538 ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2539 zdiag_me(idim)%c_array = cmplx(0.0_dp, 0.0_dp,
dp)
2540 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2541 zdiag_all(idim)%c_array = cmplx(0.0_dp, 0.0_dp,
dp)
2543 ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2544 ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2547 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2549 IF (output_unit > 0)
THEN
2550 WRITE (output_unit,
'(T4,A )')
" Localization by iterative distributed Jacobi rotation"
2551 WRITE (output_unit,
'(T20,A12,T32, A22,T60, A12,A8 )')
"Iteration",
"Functional",
"Tolerance",
" Time "
2554 DO lsweep = 1, max_iter + 1
2556 IF (sweeps == max_iter + 1)
THEN
2557 IF (output_unit > 0)
THEN
2558 WRITE (output_unit, *)
' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2559 WRITE (output_unit, *)
' Present Max. gradient = ', tolerance
2568 CALL eberlein(iperm, para_env, list_pair)
2572 IF (list_pair(1, ipair) == para_env%mepos)
THEN
2573 ip_partner = list_pair(2, ipair)
2575 ELSE IF (list_pair(2, ipair) == para_env%mepos)
THEN
2576 ip_partner = list_pair(1, ipair)
2580 IF (ip_partner >= 0)
THEN
2581 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2587 IF (ns_partner*ns_me /= 0)
THEN
2589 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2591 DO i = 1, ns_me + ns_partner
2592 rmat_loc(i, i) = 1.0_dp
2595 ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2598 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2600 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2604 CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2605 msgout=c_array_partner, source=ip_partner)
2609 ilow1 = ns_bound(para_env%mepos, 1)
2610 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2611 ilow2 = ns_bound(ip_partner, 1)
2612 iup2 = ns_bound(ip_partner, 1) + n2 - 1
2613 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1))
THEN
2629 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2630 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2633 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2634 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2638 DO istate = 1, n1 + n2
2639 DO jstate = istate + 1, n1 + n2
2641 mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2642 mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2643 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2645 CALL get_angle(mii, mjj, mij, weights, theta)
2650 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2651 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2652 xyz_mix(idim)%c_array(i, istate) = zi
2653 xyz_mix(idim)%c_array(i, jstate) = zj
2656 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2657 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2658 xyz_mix(idim)%c_array(istate, i) = zi
2659 xyz_mix(idim)%c_array(jstate, i) = zj
2664 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2665 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2666 rmat_loc(i, istate) = ri
2667 rmat_loc(i, jstate) = rj
2673 CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2674 rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2676 IF (ilow1 < ilow2)
THEN
2680 CALL dgemm(
"N",
"N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2681 n2, 0.0_dp, gmat(:, :), nstate)
2682 CALL dgemm(
"N",
"N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2683 n1 + n2, 1.0_dp, gmat(:, :), nstate)
2685 CALL dgemm(
"N",
"N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2686 rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2690 CALL dgemm(
"N",
"N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2691 n1, 1.0_dp, gmat(:, :), nstate)
2694 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2698 xyz_mix_ns(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp,
dp)
2702 DO jstate = 1, nstate
2704 xyz_mix_ns(idim)%c_array(jstate, istate) = &
2705 xyz_mix_ns(idim)%c_array(jstate, istate) + &
2706 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2711 DO jstate = 1, nstate
2713 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2714 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2720 DEALLOCATE (c_array_partner)
2725 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2732 cz_ij_loc(idim)%c_array(1:nstate, i) = cmplx(0.0_dp, 0.0_dp,
dp)
2736 IF (ns_partner*ns_me /= 0)
THEN
2738 DO i = 1, ns_me + ns_partner
2739 DO j = i + 1, ns_me + ns_partner
2741 rmat_loc(i, j) = rmat_loc(j, i)
2748 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2752 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2759 CALL para_env%allgather(rmat_send, rmat_recv_all)
2762 DO ip = 0, para_env%num_pe - 1
2764 ip_recv_from = mod(para_env%mepos - ip + para_env%num_pe, para_env%num_pe)
2765 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2767 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2769 IF (ns_me /= 0)
THEN
2770 IF (ns_recv_from /= 0)
THEN
2772 ip_recv_partner = -1
2775 IF (list_pair(1, ipair) == ip_recv_from)
THEN
2776 ip_recv_partner = list_pair(2, ipair)
2778 ELSE IF (list_pair(2, ipair) == ip_recv_from)
THEN
2779 ip_recv_partner = list_pair(1, ipair)
2784 IF (ip_recv_partner >= 0)
THEN
2785 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2787 IF (ns_recv_partner > 0)
THEN
2788 il1 = ns_bound(para_env%mepos, 1)
2789 il_recv = ns_bound(ip_recv_from, 1)
2790 il_recv_partner = ns_bound(ip_recv_partner, 1)
2794 DO i = 1, ns_recv_from
2795 ii = il_recv + i - 1
2798 DO k = 1, ns_recv_from
2799 kk = il_recv + k - 1
2800 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2801 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2805 DO i = 1, ns_recv_from
2806 ii = il_recv + i - 1
2809 DO k = 1, ns_recv_partner
2810 kk = il_recv_partner + k - 1
2811 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2812 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2818 il1 = ns_bound(para_env%mepos, 1)
2819 il_recv = ns_bound(ip_recv_from, 1)
2823 DO i = 1, ns_recv_from
2824 ii = il_recv + i - 1
2825 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2834 IF (ns_partner*ns_me /= 0)
THEN
2835 DEALLOCATE (rmat_loc)
2837 DEALLOCATE (xyz_mix(idim)%c_array)
2845 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2846 ii = i - ns_bound(para_env%mepos, 1) + 1
2847 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2848 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2850 rcount(:) =
SIZE(zdiag_me(idim)%c_array)
2852 DO ip = 2, para_env%num_pe
2853 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2856 CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2860 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2861 k = j - ns_bound(para_env%mepos, 1) + 1
2864 DO ip = 0, para_env%num_pe - 1
2865 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2))
THEN
2870 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2872 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2875 zi = zdiag_all(idim)%c_array(ii)
2876 zj = zdiag_all(idim)%c_array(jj)
2877 grad = grad + weights(idim)*real(4.0_dp*conjg(cz_ij_loc(idim)%c_array(i, k))*(zj - zi),
dp)
2879 gmax = max(gmax, abs(grad))
2883 CALL para_env%max(gmax)
2885 IF (
PRESENT(tol_out)) tol_out = tolerance
2888 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2889 k = i - ns_bound(para_env%mepos, 1) + 1
2891 zr = real(cz_ij_loc(idim)%c_array(i, k),
dp)
2892 zc = aimag(cz_ij_loc(idim)%c_array(i, k))
2893 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/
twopi/
twopi
2896 CALL para_env%sum(func)
2899 IF (output_unit > 0 .AND.
modulo(sweeps, out_each) == 0)
THEN
2900 WRITE (output_unit,
'(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2903 IF (
PRESENT(eps_localization))
THEN
2904 IF (tolerance < eps_localization)
EXIT
2906 IF (
PRESENT(target_time) .AND.
PRESENT(start_time))
THEN
2907 CALL external_control(should_stop,
"LOC", target_time=target_time, start_time=start_time)
2908 IF (should_stop)
EXIT
2914 DEALLOCATE (rmat_recv_all)
2916 DEALLOCATE (rmat_recv)
2917 DEALLOCATE (rmat_send)
2919 DEALLOCATE (c_array_me)
2922 DEALLOCATE (zdiag_me(idim)%c_array)
2923 DEALLOCATE (zdiag_all(idim)%c_array)
2925 DEALLOCATE (zdiag_me)
2926 DEALLOCATE (zdiag_all)
2927 DEALLOCATE (xyz_mix)
2929 IF (ns_me /= 0)
THEN
2930 DEALLOCATE (xyz_mix_ns(idim)%c_array)
2933 DEALLOCATE (xyz_mix_ns)
2934 IF (ns_me /= 0)
THEN
2940 DEALLOCATE (list_pair)
2942 END SUBROUTINE jacobi_rot_para_core
2950 SUBROUTINE eberlein(iperm, para_env, list_pair)
2951 INTEGER,
INTENT(IN) :: iperm
2952 TYPE(mp_para_env_type),
POINTER :: para_env
2953 INTEGER,
DIMENSION(:, :) :: list_pair
2955 INTEGER :: i, ii, jj, npair
2957 npair = (para_env%num_pe + 1)/2
2958 IF (iperm == 1)
THEN
2960 DO i = 0, para_env%num_pe - 1
2961 ii = ((i + 1) + 1)/2
2962 jj = mod((i + 1) + 1, 2) + 1
2963 list_pair(jj, ii) = i
2965 IF (mod(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2966 ELSEIF (mod(iperm, 2) == 0)
THEN
2968 jj = list_pair(1, npair)
2970 list_pair(1, i) = list_pair(1, i - 1)
2972 list_pair(1, 2) = list_pair(2, 1)
2973 list_pair(2, 1) = jj
2976 jj = list_pair(2, 1)
2978 list_pair(2, i) = list_pair(2, i + 1)
2980 list_pair(2, npair) = jj
2983 END SUBROUTINE eberlein
2993 TYPE(cp_fm_type),
INTENT(IN) :: vectors
2994 TYPE(dbcsr_p_type),
DIMENSION(:, :),
POINTER :: op_sm_set
2995 TYPE(cp_fm_type),
DIMENSION(:, :),
INTENT(IN) :: zij_fm_set
2997 CHARACTER(len=*),
PARAMETER :: routinen =
'zij_matrix'
2999 INTEGER :: handle, i, j, nao, nmoloc
3000 TYPE(cp_fm_type) :: opvec
3002 CALL timeset(routinen, handle)
3005 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3010 DO i = 1,
SIZE(zij_fm_set, 2)
3011 DO j = 1,
SIZE(zij_fm_set, 1)
3014 CALL parallel_gemm(
"T",
"N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3019 CALL cp_fm_release(opvec)
3020 CALL timestop(handle)
3030 TYPE(cp_fm_type),
INTENT(IN) :: vectors
3032 CHARACTER(len=*),
PARAMETER :: routinen =
'scdm_qrfact'
3034 INTEGER :: handle, ncolt, nrowt
3035 REAL(kind=
dp),
DIMENSION(:),
POINTER :: tau
3036 TYPE(cp_fm_struct_type),
POINTER :: cstruct
3037 TYPE(cp_fm_type) :: ctp, qf, tmp
3039 CALL timeset(routinen, handle)
3042 nrowt = vectors%matrix_struct%ncol_global
3043 ncolt = vectors%matrix_struct%nrow_global
3046 vectors%matrix_struct%context, nrowt, ncolt, vectors%matrix_struct%ncol_block, &
3047 vectors%matrix_struct%nrow_block, first_p_pos=vectors%matrix_struct%first_p_pos)
3051 ALLOCATE (tau(nrowt))
3060 context=ctp%matrix_struct%context, nrow_global=ctp%matrix_struct%nrow_global, &
3061 ncol_global=ctp%matrix_struct%nrow_global)
3072 CALL cp_fm_to_fm(vectors, tmp)
3073 CALL parallel_gemm(
'N',
'N', ncolt, nrowt, nrowt, 1.0_dp, tmp, qf, 0.0_dp, vectors)
3076 CALL cp_fm_release(ctp)
3077 CALL cp_fm_release(tmp)
3078 CALL cp_fm_release(qf)
3081 CALL timestop(handle)
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
methods related to the blacs parallel environment
Basic linear algebra operations for complex full matrices.
subroutine, public cp_cfm_scale_and_add(alpha, matrix_a, beta, matrix_b)
Scale and add two BLACS matrices (a = alpha*a + beta*b).
subroutine, public cp_cfm_rot_rows(matrix, irow, jrow, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
subroutine, public cp_cfm_schur_product(matrix_a, matrix_b, matrix_c)
Computes the element-wise (Schur) product of two matrices: C = A \circ B .
subroutine, public cp_cfm_column_scale(matrix_a, scaling)
Scales columns of the full matrix by corresponding factors.
subroutine, public cp_cfm_rot_cols(matrix, icol, jcol, cs, sn)
Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
subroutine, public cp_cfm_trace(matrix_a, matrix_b, trace)
Returns the trace of matrix_a^T matrix_b, i.e sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) .
used for collecting diagonalization schemes available for cp_cfm_type
subroutine, public cp_cfm_heevd(matrix, eigenvectors, eigenvalues)
Perform a diagonalisation of a complex matrix.
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
basic linear algebra operations for full matrices
subroutine, public cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
compute a QR factorization with column pivoting of a M-by-N distributed matrix sub( A ) = A(IA:IA+M-1...
real(kind=dp) function, public cp_fm_frobenius_norm(matrix_a)
computes the Frobenius norm of matrix_a
subroutine, public cp_fm_transpose(matrix, matrixt)
transposes a matrix matrixt = matrix ^ T
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_scale(alpha, matrix_a)
scales a matrix matrix_a = alpha * matrix_b
subroutine, public cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, alpha)
multiplies in place by a triangular matrix: matrix_b = alpha op(triangular_matrix) matrix_b or (if si...
subroutine, public cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) with orthonormal column...
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Defines the basic variable types.
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
real(kind=dp), parameter, public twopi
Routines for calculating a complex matrix exponential.
subroutine, public get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
optimization function for pade/taylor order and number of squaring steps
subroutine, public exp_pade_real(exp_H, matrix, nsquare, npade)
exponential of a real matrix, calculated using pade approximation together with scaling and squaring
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Localization methods such as 2x2 Jacobi rotations Steepest Decents Conjugate Gradient.
subroutine, public rotate_orbitals(rmat, vectors)
...
subroutine, public jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, sweeps, out_each, target_time, start_time, restricted)
wrapper for the jacobi routines, should be removed if jacobi_rot_para can deal with serial para_envs.
subroutine, public zij_matrix(vectors, op_sm_set, zij_fm_set)
...
subroutine, public direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
use the exponential parametrization as described in to perform a direct mini Gerd Berghold et al....
subroutine, public initialize_weights(cell, weights)
...
subroutine, public crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, eps_localization, iterations, converged)
yet another crazy try, computes the angles needed to rotate the orbitals first and rotates them all a...
subroutine, public approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
...
subroutine, public scdm_qrfact(vectors)
...
subroutine, public jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
combine jacobi rotations (serial) and conjugate gradient with golden section line search for partiall...
Exchange and Correlation functional calculations.