45#include "./base/base_uses.f90"
51 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'iterate_matrix'
54 REAL(KIND=
dp),
DIMENSION(:),
ALLOCATABLE :: eigvals
55 REAL(KIND=
dp),
DIMENSION(:, :),
ALLOCATABLE :: eigvecs
59 MODULE PROCEDURE purify_mcweeny_orth, purify_mcweeny_nonorth
87 REAL(kind=
dp),
INTENT(INOUT) :: det
88 REAL(kind=
dp),
INTENT(IN) :: threshold
90 CHARACTER(LEN=*),
PARAMETER :: routinen =
'determinant'
92 INTEGER :: handle, i, max_iter_lanczos, nsize, &
93 order_lanczos, sign_iter, unit_nr
94 INTEGER(KIND=int_8) :: flop1
95 INTEGER,
SAVE :: recursion_depth = 0
96 REAL(kind=
dp) :: det0, eps_lanczos, frobnorm, maxnorm, &
97 occ_matrix, t1, t2, trace
98 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: diagonal
102 CALL timeset(routinen, handle)
106 IF (logger%para_env%is_source())
THEN
117 matrix_type=dbcsr_type_no_symmetry)
119 matrix_type=dbcsr_type_no_symmetry)
121 matrix_type=dbcsr_type_no_symmetry)
127 ALLOCATE (diagonal(nsize))
129 CALL group%sum(diagonal)
130 det = product(diagonal)
134 diagonal(:) = 1.0_dp/(sqrt(diagonal(:)))
140 DEALLOCATE (diagonal)
148 filter_eps=threshold)
153 filter_eps=threshold)
158 IF (unit_nr > 0)
THEN
159 IF (recursion_depth == 0)
THEN
160 WRITE (unit_nr,
'()')
162 WRITE (unit_nr,
'(T6,A28,1X,I15)') &
163 "Recursive iteration:", recursion_depth
165 WRITE (unit_nr,
'(T6,A28,1X,F15.10)') &
166 "Frobenius norm:", frobnorm
170 IF (frobnorm >= 1.0_dp)
THEN
175 eps_lanczos = 1.0e-4_dp
176 max_iter_lanczos = 40
181 threshold=threshold, &
182 order=order_lanczos, &
183 eps_lanczos=eps_lanczos, &
184 max_iter_lanczos=max_iter_lanczos)
185 recursion_depth = recursion_depth + 1
187 recursion_depth = recursion_depth - 1
197 IF (unit_nr > 0)
WRITE (unit_nr, *)
210 filter_eps=threshold, &
216 trace = trace*sign_iter/(1.0_dp*(i + 1))
217 sign_iter = -sign_iter
227 IF (unit_nr > 0)
THEN
228 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F7.5,F16.10,F10.3,F11.3)') &
229 "Determinant iter", i, occ_matrix, &
231 flop1/(1.0e6_dp*max(0.001_dp, t2 - t1))
236 IF (maxnorm < threshold)
EXIT
240 IF (unit_nr > 0)
THEN
241 WRITE (unit_nr,
'()')
247 IF (unit_nr > 0)
THEN
248 IF (recursion_depth == 0)
THEN
249 WRITE (unit_nr,
'(T6,A28,1X,F15.10)') &
250 "Final determinant:", det
251 WRITE (unit_nr,
'()')
253 WRITE (unit_nr,
'(T6,A28,1X,F15.10)') &
254 "Recursive determinant:", det
263 CALL timestop(handle)
284 SUBROUTINE invert_taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, &
285 norm_convergence, filter_eps, accelerator_order, &
286 max_iter_lanczos, eps_lanczos, silent)
288 TYPE(dbcsr_type),
INTENT(INOUT),
TARGET :: matrix_inverse, matrix
289 REAL(kind=dp),
INTENT(IN) :: threshold
290 LOGICAL,
INTENT(IN),
OPTIONAL :: use_inv_as_guess
291 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: norm_convergence, filter_eps
292 INTEGER,
INTENT(IN),
OPTIONAL :: accelerator_order, max_iter_lanczos
293 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: eps_lanczos
294 LOGICAL,
INTENT(IN),
OPTIONAL :: silent
296 CHARACTER(LEN=*),
PARAMETER :: routinen =
'invert_Taylor'
298 INTEGER :: accelerator_type, handle, i, &
299 my_max_iter_lanczos, nrows, unit_nr
300 INTEGER(KIND=int_8) :: flop2
301 LOGICAL :: converged, use_inv_guess
302 REAL(kind=dp) :: coeff, convergence, maxnorm_matrix, &
303 my_eps_lanczos, occ_matrix, t1, t2
304 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: p_diagonal
305 TYPE(cp_logger_type),
POINTER :: logger
306 TYPE(dbcsr_type),
TARGET :: tmp1, tmp2, tmp3_sym
308 CALL timeset(routinen, handle)
310 logger => cp_get_default_logger()
311 IF (logger%para_env%is_source())
THEN
312 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
316 IF (
PRESENT(silent))
THEN
317 IF (silent) unit_nr = -1
320 convergence = threshold
321 IF (
PRESENT(norm_convergence)) convergence = norm_convergence
324 IF (
PRESENT(accelerator_order)) accelerator_type = accelerator_order
325 IF (accelerator_type > 1) accelerator_type = 1
327 use_inv_guess = .false.
328 IF (
PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
330 my_max_iter_lanczos = 64
331 my_eps_lanczos = 1.0e-3_dp
332 IF (
PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
333 IF (
PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
335 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
336 CALL dbcsr_create(tmp2, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
337 CALL dbcsr_create(tmp3_sym, template=matrix_inverse)
339 CALL dbcsr_get_info(matrix, nfullrows_total=nrows)
340 ALLOCATE (p_diagonal(nrows))
343 IF (.NOT. use_inv_guess)
THEN
345 SELECT CASE (accelerator_type)
348 CALL dbcsr_desymmetrize(matrix, tmp1)
349 p_diagonal(:) = 0.0_dp
350 CALL dbcsr_set_diag(tmp1, p_diagonal)
353 CALL dbcsr_get_diag(matrix, p_diagonal)
355 IF (p_diagonal(i) /= 0.0_dp)
THEN
356 p_diagonal(i) = 1.0_dp/p_diagonal(i)
359 CALL dbcsr_set(matrix_inverse, 0.0_dp)
360 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
361 CALL dbcsr_set_diag(matrix_inverse, p_diagonal)
363 cpabort(
"Illegal accelerator order")
368 cpabort(
"Guess is NYI")
372 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, matrix_inverse, &
373 0.0_dp, tmp2, filter_eps=filter_eps)
375 IF (unit_nr > 0)
WRITE (unit_nr, *)
382 CALL dbcsr_desymmetrize(matrix_inverse, tmp1)
387 coeff = -1.0_dp*coeff
388 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp2, 0.0_dp, &
390 flop=flop2, filter_eps=filter_eps)
392 CALL dbcsr_add(matrix_inverse, tmp3_sym, 1.0_dp, coeff)
393 CALL dbcsr_release(tmp1)
394 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
395 CALL dbcsr_desymmetrize(tmp3_sym, tmp1)
398 maxnorm_matrix = dbcsr_maxabs(tmp3_sym)
401 occ_matrix = dbcsr_get_occupation(matrix_inverse)
403 IF (unit_nr > 0)
THEN
404 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"Taylor iter", i, occ_matrix, &
405 maxnorm_matrix, t2 - t1, &
406 flop2/(1.0e6_dp*max(0.001_dp, t2 - t1))
407 CALL m_flush(unit_nr)
410 IF (maxnorm_matrix < convergence)
THEN
420 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix, matrix_inverse, 0.0_dp, tmp1, &
421 filter_eps=filter_eps)
422 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
424 maxnorm_matrix = dbcsr_maxabs(tmp1)
425 IF (unit_nr > 0)
THEN
426 WRITE (unit_nr,
'(T6,A,E12.5)')
"Final Taylor error", maxnorm_matrix
427 WRITE (unit_nr,
'()')
428 CALL m_flush(unit_nr)
430 IF (maxnorm_matrix > convergence)
THEN
432 IF (unit_nr > 0)
THEN
433 WRITE (unit_nr, *)
'Final convergence check failed'
437 IF (.NOT. converged)
THEN
438 cpabort(
"Taylor inversion did not converge")
441 CALL dbcsr_release(tmp1)
442 CALL dbcsr_release(tmp2)
443 CALL dbcsr_release(tmp3_sym)
445 DEALLOCATE (p_diagonal)
447 CALL timestop(handle)
471 norm_convergence, filter_eps, accelerator_order, &
472 max_iter_lanczos, eps_lanczos, silent)
474 TYPE(dbcsr_type),
INTENT(INOUT),
TARGET :: matrix_inverse, matrix
475 REAL(kind=dp),
INTENT(IN) :: threshold
476 LOGICAL,
INTENT(IN),
OPTIONAL :: use_inv_as_guess
477 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: norm_convergence, filter_eps
478 INTEGER,
INTENT(IN),
OPTIONAL :: accelerator_order, max_iter_lanczos
479 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: eps_lanczos
480 LOGICAL,
INTENT(IN),
OPTIONAL :: silent
482 CHARACTER(LEN=*),
PARAMETER :: routinen =
'invert_Hotelling'
484 INTEGER :: accelerator_type, handle, i, &
485 my_max_iter_lanczos, unit_nr
486 INTEGER(KIND=int_8) :: flop1, flop2
487 LOGICAL :: arnoldi_converged, converged, &
489 REAL(kind=dp) :: convergence, frob_matrix, gershgorin_norm, max_ev, maxnorm_matrix, min_ev, &
490 my_eps_lanczos, my_filter_eps, occ_matrix, scalingf, t1, t2
491 TYPE(cp_logger_type),
POINTER :: logger
492 TYPE(dbcsr_type),
TARGET :: tmp1, tmp2
497 CALL timeset(routinen, handle)
499 logger => cp_get_default_logger()
500 IF (logger%para_env%is_source())
THEN
501 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
505 IF (
PRESENT(silent))
THEN
506 IF (silent) unit_nr = -1
509 convergence = threshold
510 IF (
PRESENT(norm_convergence)) convergence = norm_convergence
513 IF (
PRESENT(accelerator_order)) accelerator_type = accelerator_order
514 IF (accelerator_type > 1) accelerator_type = 1
516 use_inv_guess = .false.
517 IF (
PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
519 my_max_iter_lanczos = 64
520 my_eps_lanczos = 1.0e-3_dp
521 IF (
PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
522 IF (
PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
524 my_filter_eps = threshold
525 IF (
PRESENT(filter_eps)) my_filter_eps = filter_eps
528 IF (.NOT. use_inv_guess)
THEN
530 SELECT CASE (accelerator_type)
532 gershgorin_norm = dbcsr_gershgorin_norm(matrix)
533 frob_matrix = dbcsr_frobenius_norm(matrix)
534 CALL dbcsr_set(matrix_inverse, 0.0_dp)
535 CALL dbcsr_add_on_diag(matrix_inverse, 1/min(gershgorin_norm, frob_matrix))
538 CALL dbcsr_set(matrix_inverse, 0.0_dp)
539 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
541 cpabort(
"Illegal accelerator order")
545 CALL dbcsr_create(tmp1, template=matrix_inverse)
551 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
555 CALL dbcsr_create(tmp2, template=matrix_inverse)
557 IF (unit_nr > 0)
WRITE (unit_nr, *)
562 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_inverse, matrix, &
563 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
565 IF (accelerator_type == 1)
THEN
568 CALL arnoldi_extremal(tmp1, max_ev, min_ev, threshold=my_eps_lanczos, &
569 max_iter=my_max_iter_lanczos, converged=arnoldi_converged)
578 IF (unit_nr > 0)
THEN
580 WRITE (unit_nr,
'(T6,A,1X,L1,A,E12.3)')
"Lanczos converged: ", arnoldi_converged,
" threshold:", my_eps_lanczos
581 WRITE (unit_nr,
'(T6,A,1X,E12.3,E12.3)')
"Est. extremal eigenvalues:", max_ev, min_ev
582 WRITE (unit_nr,
'(T6,A,1X,E12.3)')
"Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
586 scalingf = 1.9_dp/(max_ev + min_ev)
587 CALL dbcsr_scale(tmp1, scalingf)
588 CALL dbcsr_scale(matrix_inverse, scalingf)
589 min_ev = min_ev*scalingf
600 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
601 maxnorm_matrix = dbcsr_maxabs(tmp1)
602 CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
605 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, matrix_inverse, 0.0_dp, tmp2, &
606 flop=flop2, filter_eps=my_filter_eps)
608 CALL dbcsr_add(matrix_inverse, tmp2, 2.0_dp, -1.0_dp)
610 CALL dbcsr_filter(matrix_inverse, my_filter_eps)
612 occ_matrix = dbcsr_get_occupation(matrix_inverse)
615 IF (accelerator_type == 1)
THEN
616 min_ev = min_ev*(2.0_dp - min_ev)
617 IF (
PRESENT(norm_convergence)) maxnorm_matrix = abs(min_ev - 1.0_dp)
620 IF (unit_nr > 0)
THEN
621 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"Hotelling iter", i, occ_matrix, &
622 maxnorm_matrix, t2 - t1, &
623 (flop1 + flop2)/(1.0e6_dp*max(0.001_dp, t2 - t1))
624 CALL m_flush(unit_nr)
627 IF (maxnorm_matrix < convergence)
THEN
633 IF (accelerator_type == 1)
THEN
634 min_ev = min_ev*2.0_dp/(min_ev + 1.0_dp)
635 CALL dbcsr_scale(matrix_inverse, 2.0_dp/(min_ev + 1.0_dp))
639 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_inverse, matrix, &
640 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
644 IF (.NOT. converged)
THEN
645 cpabort(
"Hotelling inversion did not converge")
649 IF (dbcsr_get_matrix_type(matrix_inverse) == dbcsr_type_no_symmetry)
THEN
650 CALL dbcsr_transposed(tmp2, matrix_inverse)
651 CALL dbcsr_add(matrix_inverse, tmp2, 0.5_dp, 0.5_dp)
654 IF (unit_nr > 0)
THEN
658 WRITE (unit_nr,
'()')
659 CALL m_flush(unit_nr)
662 CALL dbcsr_release(tmp1)
663 CALL dbcsr_release(tmp2)
665 CALL timestop(handle)
683 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
684 REAL(kind=dp),
INTENT(IN) :: threshold
685 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order, iounit
687 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_Newton_Schulz'
689 INTEGER :: count, handle, i, order, unit_nr
690 INTEGER(KIND=int_8) :: flops
691 REAL(kind=dp) :: a0, a1, a2, a3, a4, a5, floptot, &
692 frob_matrix, frob_matrix_base, &
693 gersh_matrix, occ_matrix, prefactor, &
695 TYPE(cp_logger_type),
POINTER :: logger
696 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3, tmp4
698 CALL timeset(routinen, handle)
700 IF (
PRESENT(iounit))
THEN
703 logger => cp_get_default_logger()
704 IF (logger%para_env%is_source())
THEN
705 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
711 IF (
PRESENT(sign_order))
THEN
717 CALL dbcsr_create(tmp1, template=matrix_sign)
719 CALL dbcsr_create(tmp2, template=matrix_sign)
720 IF (abs(order) >= 4)
THEN
721 CALL dbcsr_create(tmp3, template=matrix_sign)
723 IF (abs(order) > 4)
THEN
724 CALL dbcsr_create(tmp4, template=matrix_sign)
727 CALL dbcsr_copy(matrix_sign, matrix)
728 CALL dbcsr_filter(matrix_sign, threshold)
731 frob_matrix = dbcsr_frobenius_norm(matrix_sign)
732 gersh_matrix = dbcsr_gershgorin_norm(matrix_sign)
733 CALL dbcsr_scale(matrix_sign, 1/min(frob_matrix, gersh_matrix))
735 IF (unit_nr > 0)
WRITE (unit_nr, *)
742 CALL dbcsr_multiply(
"N",
"N", -1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
743 filter_eps=threshold, flop=flops)
744 floptot = floptot + flops
747 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
748 CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
749 frob_matrix = dbcsr_frobenius_norm(tmp1)
785 CALL dbcsr_add_on_diag(tmp1, +2.0_dp)
786 occ_matrix = dbcsr_get_occupation(matrix_sign)
787 ELSE IF (order == 3)
THEN
790 CALL dbcsr_copy(tmp2, tmp1)
791 CALL dbcsr_scale(tmp1, 4.0_dp/3.0_dp)
792 CALL dbcsr_add_on_diag(tmp1, 8.0_dp/3.0_dp)
795 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp2, 1.0_dp, tmp1, &
796 filter_eps=threshold, flop=flops)
797 floptot = floptot + flops
798 prefactor = 3.0_dp/8.0_dp
800 ELSE IF (order == 4)
THEN
803 CALL dbcsr_copy(tmp3, tmp1)
804 CALL dbcsr_scale(tmp1, 8.0_dp/5.0_dp)
805 CALL dbcsr_add_on_diag(tmp1, 16.0_dp/5.0_dp)
808 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
809 filter_eps=threshold, flop=flops)
810 floptot = floptot + flops
812 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp/5.0_dp)
814 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
815 filter_eps=threshold, flop=flops)
816 floptot = floptot + flops
818 prefactor = 5.0_dp/16.0_dp
819 ELSE IF (order == -5)
THEN
822 CALL dbcsr_copy(tmp3, tmp1)
823 CALL dbcsr_scale(tmp1, 128.0_dp/70.0_dp)
824 CALL dbcsr_add_on_diag(tmp1, 128.0_dp/35.0_dp)
827 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
828 filter_eps=threshold, flop=flops)
829 floptot = floptot + flops
831 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 48.0_dp/35.0_dp)
833 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
834 filter_eps=threshold, flop=flops)
835 floptot = floptot + flops
837 CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 8.0_dp/7.0_dp)
839 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp4, tmp3, 1.0_dp, tmp1, &
840 filter_eps=threshold, flop=flops)
841 floptot = floptot + flops
843 prefactor = 35.0_dp/128.0_dp
844 ELSE IF (order == 5)
THEN
854 a1 = 23819.0_dp/13720.0_dp
855 a2 = -3734_dp/1715.0_dp
856 a3 = 832591127_dp/188238400.0_dp
860 CALL dbcsr_copy(tmp3, tmp1)
861 CALL dbcsr_add_on_diag(tmp3, a0)
862 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
863 filter_eps=threshold, flop=flops)
864 floptot = floptot + flops
865 CALL dbcsr_add_on_diag(tmp2, a1)
867 CALL dbcsr_add_on_diag(tmp1, a2)
868 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
869 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp2, 0.0_dp, tmp3, &
870 filter_eps=threshold, flop=flops)
871 floptot = floptot + flops
872 CALL dbcsr_add_on_diag(tmp3, a3)
873 CALL dbcsr_copy(tmp1, tmp3)
875 prefactor = 35.0_dp/128.0_dp
876 ELSE IF (order == 6)
THEN
880 CALL dbcsr_copy(tmp3, tmp1)
881 CALL dbcsr_scale(tmp1, 128.0_dp/63.0_dp)
882 CALL dbcsr_add_on_diag(tmp1, 256.0_dp/63.0_dp)
885 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
886 filter_eps=threshold, flop=flops)
887 floptot = floptot + flops
889 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 32.0_dp/21.0_dp)
891 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
892 filter_eps=threshold, flop=flops)
893 floptot = floptot + flops
895 CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 80.0_dp/63.0_dp)
897 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp4, tmp3, 0.0_dp, tmp2, &
898 filter_eps=threshold, flop=flops)
899 floptot = floptot + flops
901 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 10.0_dp/9.0_dp)
903 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
904 filter_eps=threshold, flop=flops)
905 floptot = floptot + flops
907 prefactor = 63.0_dp/256.0_dp
908 ELSE IF (order == 7)
THEN
911 a0 = 1.3686502058092053653287666647611728507211996691324048468010382350359929055186612505791532871573242422_dp
912 a1 = 1.7089671854477436685850554669524985556296280184497503489303331821456795715195510972774979091893741568_dp
913 a2 = -1.3231956603546599107833121193066273961757451236778593922555836895814474509732067051246078326118696968_dp
914 a3 = 3.9876642330847931291749479958277754186675336169578593000744380254770411483327581042259415937710270453_dp
915 a4 = -3.7273299006476825027065704937541279833880400042556351139273912137942678919776364526511485025132991667_dp
916 a5 = 4.9369932474103023792021351907971943220607580694533770325967170245194362399287150565595441897740173578_dp
924 CALL dbcsr_copy(tmp3, tmp1)
925 CALL dbcsr_add_on_diag(tmp3, a0)
926 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
927 filter_eps=threshold, flop=flops)
928 floptot = floptot + flops
929 CALL dbcsr_add_on_diag(tmp2, a1)
932 CALL dbcsr_copy(tmp4, tmp1)
933 CALL dbcsr_add_on_diag(tmp4, a2)
934 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp4, tmp2, 0.0_dp, tmp3, &
935 filter_eps=threshold, flop=flops)
936 floptot = floptot + flops
937 CALL dbcsr_add_on_diag(tmp3, a3)
939 CALL dbcsr_add(tmp2, tmp3, 1.0_dp, 1.0_dp)
940 CALL dbcsr_add_on_diag(tmp2, a4)
941 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
942 filter_eps=threshold, flop=flops)
943 floptot = floptot + flops
944 CALL dbcsr_add_on_diag(tmp1, a5)
946 prefactor = 231.0_dp/1024.0_dp
948 cpabort(
"requested order is not implemented.")
952 CALL dbcsr_multiply(
"N",
"N", prefactor, matrix_sign, tmp1, 0.0_dp, tmp2, &
953 filter_eps=threshold, flop=flops)
954 floptot = floptot + flops
958 CALL dbcsr_copy(matrix_sign, tmp2)
961 occ_matrix = dbcsr_get_occupation(matrix_sign)
963 IF (unit_nr > 0)
THEN
964 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"NS sign iter ", i, occ_matrix, &
965 frob_matrix/frob_matrix_base, t2 - t1, &
966 floptot/(1.0e6_dp*max(0.001_dp, t2 - t1))
967 CALL m_flush(unit_nr)
971 IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base))
EXIT
976 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
977 filter_eps=threshold)
978 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
979 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
980 frob_matrix = dbcsr_frobenius_norm(tmp1)
981 occ_matrix = dbcsr_get_occupation(matrix_sign)
982 IF (unit_nr > 0)
THEN
983 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final NS sign iter", i, occ_matrix, &
984 frob_matrix/frob_matrix_base
985 WRITE (unit_nr,
'()')
986 CALL m_flush(unit_nr)
989 CALL dbcsr_release(tmp1)
990 CALL dbcsr_release(tmp2)
991 IF (abs(order) >= 4)
THEN
992 CALL dbcsr_release(tmp3)
994 IF (abs(order) > 4)
THEN
995 CALL dbcsr_release(tmp4)
998 CALL timestop(handle)
1015 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
1016 REAL(kind=dp),
INTENT(IN) :: threshold
1017 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
1019 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_proot'
1021 INTEGER :: handle, order, unit_nr
1022 INTEGER(KIND=int_8) :: flop0, flop1, flop2
1023 LOGICAL :: converged, symmetrize
1024 REAL(kind=dp) :: frob_matrix, frob_matrix_base, occ_matrix
1025 TYPE(cp_logger_type),
POINTER :: logger
1026 TYPE(dbcsr_type) :: matrix2, matrix_sqrt, matrix_sqrt_inv, &
1029 CALL cite_reference(richters2018)
1031 CALL timeset(routinen, handle)
1033 logger => cp_get_default_logger()
1034 IF (logger%para_env%is_source())
THEN
1035 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1040 IF (
PRESENT(sign_order))
THEN
1046 CALL dbcsr_create(tmp1, template=matrix_sign)
1048 CALL dbcsr_create(tmp2, template=matrix_sign)
1050 CALL dbcsr_create(matrix2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1051 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix, matrix, 0.0_dp, matrix2, &
1052 filter_eps=threshold, flop=flop0)
1058 IF (unit_nr > 0)
WRITE (unit_nr, *)
1060 CALL dbcsr_create(matrix_sqrt, template=matrix2)
1061 CALL dbcsr_create(matrix_sqrt_inv, template=matrix2)
1062 IF (unit_nr > 0)
WRITE (unit_nr, *)
"Threshold=", threshold
1064 symmetrize = .false.
1065 CALL matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix2, threshold, order, &
1066 0.01_dp, 100, symmetrize, converged)
1068 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix, matrix_sqrt_inv, 0.0_dp, matrix_sign, &
1069 filter_eps=threshold, flop=flop1)
1072 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
1073 filter_eps=threshold, flop=flop2)
1074 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1075 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1076 frob_matrix = dbcsr_frobenius_norm(tmp1)
1077 occ_matrix = dbcsr_get_occupation(matrix_sign)
1078 IF (unit_nr > 0)
THEN
1079 WRITE (unit_nr,
'(T6,A,F10.8,E12.3)')
"Final proot sign iter", occ_matrix, &
1080 frob_matrix/frob_matrix_base
1081 WRITE (unit_nr,
'()')
1082 CALL m_flush(unit_nr)
1085 CALL dbcsr_release(tmp1)
1086 CALL dbcsr_release(tmp2)
1087 CALL dbcsr_release(matrix2)
1088 CALL dbcsr_release(matrix_sqrt)
1089 CALL dbcsr_release(matrix_sqrt_inv)
1091 CALL timestop(handle)
1104 SUBROUTINE dense_matrix_sign_newton_schulz(matrix_sign, matrix, matrix_id, threshold, sign_order)
1106 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: matrix_sign
1107 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: matrix
1108 INTEGER,
INTENT(IN),
OPTIONAL :: matrix_id
1109 REAL(kind=dp),
INTENT(IN) :: threshold
1110 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
1112 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dense_matrix_sign_Newton_Schulz'
1114 INTEGER :: handle, i, j, sz, unit_nr
1115 LOGICAL :: converged
1116 REAL(kind=dp) :: frob_matrix, frob_matrix_base, &
1117 gersh_matrix, prefactor, scaling_factor
1118 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp1, tmp2
1119 REAL(kind=dp),
DIMENSION(1) :: work
1120 REAL(kind=dp),
EXTERNAL :: dlange
1121 TYPE(cp_logger_type),
POINTER :: logger
1123 CALL timeset(routinen, handle)
1126 logger => cp_get_default_logger()
1127 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1130 sz =
SIZE(matrix, 1)
1131 frob_matrix = dlange(
'F', sz, sz, matrix, sz, work)
1132 gersh_matrix = dlange(
'1', sz, sz, matrix, sz, work)
1133 scaling_factor = 1/min(frob_matrix, gersh_matrix)
1134 matrix_sign = matrix*scaling_factor
1135 ALLOCATE (tmp1(sz, sz))
1136 ALLOCATE (tmp2(sz, sz))
1140 CALL dgemm(
'N',
'N', sz, sz, sz, -1.0_dp, matrix_sign, sz, matrix_sign, sz, 0.0_dp, tmp1, sz)
1143 frob_matrix_base = dlange(
'F', sz, sz, tmp1, sz, work)
1145 tmp1(j, j) = tmp1(j, j) + 1.0_dp
1147 frob_matrix = dlange(
'F', sz, sz, tmp1, sz, work)
1149 IF (sign_order == 2)
THEN
1153 tmp1(j, j) = tmp1(j, j) + 2.0_dp
1155 ELSE IF (sign_order == 3)
THEN
1157 tmp1 = tmp1*4.0_dp/3.0_dp
1159 tmp1(j, j) = tmp1(j, j) + 8.0_dp/3.0_dp
1161 CALL dgemm(
'N',
'N', sz, sz, sz, 1.0_dp, tmp2, sz, tmp2, sz, 1.0_dp, tmp1, sz)
1162 prefactor = 3.0_dp/8.0_dp
1164 cpabort(
"requested order is not implemented.")
1167 CALL dgemm(
'N',
'N', sz, sz, sz, prefactor, matrix_sign, sz, tmp1, sz, 0.0_dp, tmp2, sz)
1171 IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base))
THEN
1172 WRITE (unit_nr,
'(T6,A,1X,I6,1X,A,1X,I3,E12.3)') &
1173 "Submatrix", matrix_id,
"final NS sign iter", i, frob_matrix/frob_matrix_base
1174 CALL m_flush(unit_nr)
1180 IF (.NOT. converged) &
1181 cpabort(
"dense_matrix_sign_Newton_Schulz did not converge within 100 iterations")
1186 CALL timestop(handle)
1188 END SUBROUTINE dense_matrix_sign_newton_schulz
1200 SUBROUTINE eigdecomp(sm, N, eigvals, eigvecs)
1201 INTEGER,
INTENT(IN) :: n
1202 REAL(kind=dp),
INTENT(IN) :: sm(n, n)
1203 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1204 INTENT(OUT) :: eigvals
1205 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
1206 INTENT(OUT) :: eigvecs
1208 INTEGER :: info, liwork, lwork
1209 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: iwork
1210 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: work
1211 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp
1213 ALLOCATE (eigvecs(n, n), tmp(n, n))
1214 ALLOCATE (eigvals(n))
1217 eigvecs(:, :) = 0.5*(sm + transpose(sm))
1224 CALL dsyevd(
'V',
'U', n, eigvecs, n, eigvals, work, lwork, iwork, liwork, info)
1225 lwork = int(work(1))
1226 liwork = int(iwork(1))
1231 ALLOCATE (work(lwork))
1232 ALLOCATE (iwork(liwork))
1233 CALL dsyevd(
'V',
'U', n, eigvecs, n, eigvals, work, lwork, iwork, liwork, info)
1236 IF (info /= 0) cpabort(
"dsyevd did not succeed")
1239 END SUBROUTINE eigdecomp
1252 SUBROUTINE sign_from_eigdecomp(sm_sign, eigvals, eigvecs, N, mu_correction)
1254 REAL(kind=dp),
INTENT(IN) :: eigvecs(n, n), eigvals(n)
1255 REAL(kind=dp),
INTENT(INOUT) :: sm_sign(n, n)
1256 REAL(kind=dp),
INTENT(IN) :: mu_correction
1259 REAL(kind=dp) :: modified_eigval, tmp(n, n)
1263 modified_eigval = eigvals(i) - mu_correction
1264 IF (modified_eigval > 0)
THEN
1266 ELSE IF (modified_eigval < 0)
THEN
1267 sm_sign(i, i) = -1.0
1275 CALL dgemm(
'N',
'N', n, n, n, 1.0_dp, eigvecs, n, sm_sign, n, 0.0_dp, tmp, n)
1276 CALL dgemm(
'N',
'T', n, n, n, 1.0_dp, tmp, n, eigvecs, n, 0.0_dp, sm_sign, n)
1277 END SUBROUTINE sign_from_eigdecomp
1291 FUNCTION trace_from_eigdecomp(eigvals, eigvecs, firstcol, lastcol, mu_correction)
RESULT(trace)
1292 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1293 INTENT(IN) :: eigvals
1294 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
1295 INTENT(IN) :: eigvecs
1296 INTEGER,
INTENT(IN) :: firstcol, lastcol
1297 REAL(kind=dp),
INTENT(IN) :: mu_correction
1298 REAL(kind=dp) :: trace
1300 INTEGER :: i, j, sm_size
1301 REAL(kind=dp) :: modified_eigval, tmpsum
1302 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: mapped_eigvals
1304 sm_size =
SIZE(eigvals)
1305 ALLOCATE (mapped_eigvals(sm_size))
1308 modified_eigval = eigvals(i) - mu_correction
1309 IF (modified_eigval > 0)
THEN
1310 mapped_eigvals(i) = 1.0
1311 ELSE IF (modified_eigval < 0)
THEN
1312 mapped_eigvals(i) = -1.0
1314 mapped_eigvals(i) = 0.0
1319 DO i = firstcol, lastcol
1322 tmpsum = tmpsum + (eigvecs(i, j)*mapped_eigvals(j)*eigvecs(i, j))
1324 trace = trace - 0.5_dp*tmpsum + 0.5_dp
1326 END FUNCTION trace_from_eigdecomp
1338 SUBROUTINE dense_matrix_sign_direct(sm_sign, sm, N)
1339 INTEGER,
INTENT(IN) :: n
1340 REAL(kind=dp),
INTENT(IN) :: sm(n, n)
1341 REAL(kind=dp),
INTENT(INOUT) :: sm_sign(n, n)
1343 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: eigvals
1344 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: eigvecs
1346 CALL eigdecomp(sm, n, eigvals, eigvecs)
1347 CALL sign_from_eigdecomp(sm_sign, eigvals, eigvecs, n, 0.0_dp)
1349 DEALLOCATE (eigvals, eigvecs)
1350 END SUBROUTINE dense_matrix_sign_direct
1366 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
1367 REAL(kind=dp),
INTENT(IN) :: threshold
1368 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
1369 INTEGER,
INTENT(IN) :: submatrix_sign_method
1371 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_submatrix'
1373 INTEGER :: handle, i, myrank, nblkcols, order, &
1375 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: my_sms
1376 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: sm, sm_sign
1377 TYPE(cp_logger_type),
POINTER :: logger
1378 TYPE(dbcsr_distribution_type) :: dist
1379 TYPE(submatrix_dissection_type) :: dissection
1381 CALL timeset(routinen, handle)
1384 logger => cp_get_default_logger()
1385 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1387 IF (
PRESENT(sign_order))
THEN
1393 CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist)
1394 CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1396 CALL dissection%init(matrix)
1397 CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1403 DO i = 1,
SIZE(my_sms)
1404 WRITE (unit_nr,
'(T3,A,1X,I4,1X,A,1X,I6)')
"Rank", myrank,
"processing submatrix", my_sms(i)
1405 CALL dissection%generate_submatrix(my_sms(i), sm)
1406 sm_size =
SIZE(sm, 1)
1407 ALLOCATE (sm_sign(sm_size, sm_size))
1408 SELECT CASE (submatrix_sign_method)
1409 CASE (ls_scf_submatrix_sign_ns)
1410 CALL dense_matrix_sign_newton_schulz(sm_sign, sm, my_sms(i), threshold, order)
1411 CASE (ls_scf_submatrix_sign_direct, ls_scf_submatrix_sign_direct_muadj, ls_scf_submatrix_sign_direct_muadj_lowmem)
1412 CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1414 cpabort(
"Unkown submatrix sign method.")
1416 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1417 DEALLOCATE (sm, sm_sign)
1422 CALL dissection%communicate_results(matrix_sign)
1423 CALL dissection%final
1425 CALL timestop(handle)
1443 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
1444 REAL(kind=dp),
INTENT(INOUT) :: mu
1445 INTEGER,
INTENT(IN) :: nelectron
1446 REAL(kind=dp),
INTENT(IN) :: threshold
1447 INTEGER,
INTENT(IN) :: variant
1449 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_submatrix_mu_adjust'
1450 REAL(kind=dp),
PARAMETER :: initial_increment = 0.01_dp
1452 INTEGER :: handle, i, j, myrank, nblkcols, &
1453 sm_firstcol, sm_lastcol, sm_size, &
1455 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: my_sms
1456 LOGICAL :: has_mu_high, has_mu_low
1457 REAL(kind=dp) :: increment, mu_high, mu_low, new_mu, trace
1458 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: sm, sm_sign, tmp
1459 TYPE(cp_logger_type),
POINTER :: logger
1460 TYPE(dbcsr_distribution_type) :: dist
1461 TYPE(eigbuf),
ALLOCATABLE,
DIMENSION(:) :: eigbufs
1462 TYPE(mp_comm_type) :: group
1463 TYPE(submatrix_dissection_type) :: dissection
1465 CALL timeset(routinen, handle)
1468 logger => cp_get_default_logger()
1469 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1471 CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist, group=group)
1472 CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1474 CALL dissection%init(matrix)
1475 CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1477 ALLOCATE (eigbufs(
SIZE(my_sms)))
1486 DO i = 1,
SIZE(my_sms)
1487 CALL dissection%generate_submatrix(my_sms(i), sm)
1488 sm_size =
SIZE(sm, 1)
1489 WRITE (unit_nr, *)
"Rank", myrank,
"processing submatrix", my_sms(i),
"size", sm_size
1491 CALL dissection%get_relevant_sm_columns(my_sms(i), sm_firstcol, sm_lastcol)
1493 IF (variant == ls_scf_submatrix_sign_direct_muadj)
THEN
1495 CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=eigbufs(i)%eigvecs)
1499 CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=tmp)
1500 ALLOCATE (eigbufs(i)%eigvecs(sm_firstcol:sm_lastcol, 1:sm_size))
1501 eigbufs(i)%eigvecs(:, :) = tmp(sm_firstcol:sm_lastcol, 1:sm_size)
1503 ALLOCATE (sm_sign(sm_size, sm_size))
1504 CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, tmp, sm_size, 0.0_dp)
1505 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1506 DEALLOCATE (sm_sign, tmp)
1510 trace = trace + trace_from_eigdecomp(eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_firstcol, sm_lastcol, 0.0_dp)
1515 has_mu_low = .false.
1516 has_mu_high = .false.
1517 increment = initial_increment
1520 CALL group%sum(trace)
1521 IF (unit_nr > 0)
WRITE (unit_nr,
'(T2,A,1X,F13.9,1X,F15.9)') &
1522 "Density matrix: mu, trace error: ", new_mu, trace - nelectron
1523 IF (abs(trace - nelectron) < 0.5_dp)
EXIT
1524 IF (trace < nelectron)
THEN
1526 new_mu = new_mu + increment
1528 increment = increment*2
1531 new_mu = new_mu - increment
1532 has_mu_high = .true.
1533 increment = increment*2
1536 IF (has_mu_low .AND. has_mu_high)
THEN
1537 new_mu = (mu_low + mu_high)/2
1538 IF (abs(mu_high - mu_low) < threshold)
EXIT
1547 DO j = 1,
SIZE(my_sms)
1548 sm_size =
SIZE(eigbufs(j)%eigvals)
1549 CALL dissection%get_relevant_sm_columns(my_sms(j), sm_firstcol, sm_lastcol)
1550 trace = trace + trace_from_eigdecomp(eigbufs(j)%eigvals, eigbufs(j)%eigvecs, sm_firstcol, sm_lastcol, new_mu - mu)
1557 IF (variant == ls_scf_submatrix_sign_direct_muadj)
THEN
1562 DO i = 1,
SIZE(my_sms)
1563 WRITE (unit_nr,
'(T3,A,1X,I4,1X,A,1X,I6)')
"Rank", myrank,
"finalizing submatrix", my_sms(i)
1564 sm_size =
SIZE(eigbufs(i)%eigvals)
1565 ALLOCATE (sm_sign(sm_size, sm_size))
1566 CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_size, new_mu - mu)
1567 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1568 DEALLOCATE (sm_sign)
1574 DEALLOCATE (eigbufs)
1577 IF ((variant == ls_scf_submatrix_sign_direct_muadj_lowmem) .AND. (mu /= new_mu))
THEN
1582 DO i = 1,
SIZE(my_sms)
1583 WRITE (unit_nr,
'(T3,A,1X,I4,1X,A,1X,I6)')
"Rank", myrank,
"reprocessing submatrix", my_sms(i)
1584 CALL dissection%generate_submatrix(my_sms(i), sm)
1585 sm_size =
SIZE(sm, 1)
1587 sm(j, j) = sm(j, j) + mu - new_mu
1589 ALLOCATE (sm_sign(sm_size, sm_size))
1590 CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1591 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1592 DEALLOCATE (sm, sm_sign)
1600 CALL dissection%communicate_results(matrix_sign)
1601 CALL dissection%final
1603 CALL timestop(handle)
1625 eps_lanczos, max_iter_lanczos, symmetrize, converged, iounit)
1626 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1627 REAL(kind=dp),
INTENT(IN) :: threshold
1628 INTEGER,
INTENT(IN) :: order
1629 REAL(kind=dp),
INTENT(IN) :: eps_lanczos
1630 INTEGER,
INTENT(IN) :: max_iter_lanczos
1631 LOGICAL,
OPTIONAL :: symmetrize, converged
1632 INTEGER,
INTENT(IN),
OPTIONAL :: iounit
1634 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sqrt_Newton_Schulz'
1636 INTEGER :: handle, i, unit_nr
1637 INTEGER(KIND=int_8) :: flop1, flop2, flop3, flop4, flop5
1638 LOGICAL :: arnoldi_converged, tsym
1639 REAL(kind=dp) :: a, b, c, conv, d, frob_matrix, &
1640 frob_matrix_base, gershgorin_norm, &
1641 max_ev, min_ev, oa, ob, oc, &
1642 occ_matrix, od, scaling, t1, t2
1643 TYPE(cp_logger_type),
POINTER :: logger
1644 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3
1646 CALL timeset(routinen, handle)
1648 IF (
PRESENT(iounit))
THEN
1651 logger => cp_get_default_logger()
1652 IF (logger%para_env%is_source())
THEN
1653 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1659 IF (
PRESENT(converged)) converged = .false.
1660 IF (
PRESENT(symmetrize))
THEN
1667 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1668 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1669 IF (order >= 4)
THEN
1670 CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1673 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1674 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1675 CALL dbcsr_filter(matrix_sqrt_inv, threshold)
1676 CALL dbcsr_copy(matrix_sqrt, matrix)
1679 IF (order == 0)
THEN
1681 gershgorin_norm = dbcsr_gershgorin_norm(matrix_sqrt)
1682 frob_matrix = dbcsr_frobenius_norm(matrix_sqrt)
1683 scaling = 1.0_dp/min(frob_matrix, gershgorin_norm)
1688 CALL arnoldi_extremal(matrix_sqrt, max_ev, min_ev, threshold=eps_lanczos, &
1689 max_iter=max_iter_lanczos, converged=arnoldi_converged)
1690 IF (unit_nr > 0)
THEN
1692 WRITE (unit_nr,
'(T6,A,1X,L1,A,E12.3)')
"Lanczos converged: ", arnoldi_converged,
" threshold:", eps_lanczos
1693 WRITE (unit_nr,
'(T6,A,1X,E12.3,E12.3)')
"Est. extremal eigenvalues:", max_ev, min_ev
1694 WRITE (unit_nr,
'(T6,A,1X,E12.3)')
"Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
1698 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1702 CALL dbcsr_scale(matrix_sqrt, scaling)
1703 CALL dbcsr_filter(matrix_sqrt, threshold)
1704 IF (unit_nr > 0)
THEN
1706 WRITE (unit_nr, *)
"Order=", order
1714 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1715 filter_eps=threshold, flop=flop1)
1716 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1717 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1720 frob_matrix = dbcsr_frobenius_norm(tmp1)
1722 flop4 = 0; flop5 = 0
1726 CALL dbcsr_add_on_diag(tmp1, -2.0_dp)
1727 CALL dbcsr_scale(tmp1, -0.5_dp)
1730 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1731 filter_eps=threshold, flop=flop4)
1733 CALL dbcsr_add(tmp1, tmp2, -4.0_dp, 3.0_dp)
1734 CALL dbcsr_add_on_diag(tmp1, 8.0_dp)
1735 CALL dbcsr_scale(tmp1, 0.125_dp)
1738 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1739 filter_eps=threshold, flop=flop4)
1741 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp1, 0.0_dp, tmp3, &
1742 filter_eps=threshold, flop=flop5)
1743 CALL dbcsr_scale(tmp1, -8.0_dp)
1744 CALL dbcsr_add_on_diag(tmp1, 16.0_dp)
1745 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp)
1746 CALL dbcsr_add(tmp1, tmp3, 1.0_dp, -5.0_dp)
1747 CALL dbcsr_scale(tmp1, 1/16.0_dp)
1755 oa = -40.0_dp/35.0_dp
1756 ob = 48.0_dp/35.0_dp
1757 oc = -64.0_dp/35.0_dp
1758 od = 128.0_dp/35.0_dp
1760 b = ob*(a + 1) - oc - a*(a + 1)**2
1761 c = ob - b - a*(a + 1)
1764 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1765 filter_eps=threshold, flop=flop4)
1766 CALL dbcsr_add(tmp2, tmp1, 1.0_dp, a)
1768 CALL dbcsr_copy(tmp3, tmp2)
1769 CALL dbcsr_add(tmp3, tmp1, 1.0_dp, 1.0_dp)
1770 CALL dbcsr_add_on_diag(tmp3, b)
1772 CALL dbcsr_add_on_diag(tmp2, c)
1774 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
1775 filter_eps=threshold, flop=flop5)
1777 CALL dbcsr_add_on_diag(tmp1, d)
1779 CALL dbcsr_scale(tmp1, 35.0_dp/128.0_dp)
1781 cpabort(
"Illegal order value")
1785 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt, tmp1, 0.0_dp, tmp2, &
1786 filter_eps=threshold, flop=flop2)
1788 CALL dbcsr_copy(matrix_sqrt, tmp2)
1791 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, matrix_sqrt_inv, 0.0_dp, tmp2, &
1792 filter_eps=threshold, flop=flop3)
1794 CALL dbcsr_copy(matrix_sqrt_inv, tmp2)
1796 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1801 conv = frob_matrix/frob_matrix_base
1803 IF (unit_nr > 0)
THEN
1804 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"NS sqrt iter ", i, occ_matrix, &
1806 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0e6_dp*max(0.001_dp, t2 - t1))
1807 CALL m_flush(unit_nr)
1810 IF (abnormal_value(conv)) &
1811 cpabort(
"conv is an abnormal value (NaN/Inf).")
1814 IF ((conv*conv) < threshold)
THEN
1815 IF (
PRESENT(converged)) converged = .true.
1823 IF (unit_nr > 0)
THEN
1824 WRITE (unit_nr,
'(T6,A20)')
"Symmetrizing Results"
1826 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
1827 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
1828 CALL dbcsr_transposed(tmp1, matrix_sqrt)
1829 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
1833 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1834 filter_eps=threshold)
1835 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1836 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1837 frob_matrix = dbcsr_frobenius_norm(tmp1)
1838 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1839 IF (unit_nr > 0)
THEN
1840 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final NS sqrt iter ", i, occ_matrix, &
1841 frob_matrix/frob_matrix_base
1842 WRITE (unit_nr,
'()')
1843 CALL m_flush(unit_nr)
1847 CALL dbcsr_scale(matrix_sqrt, 1/sqrt(scaling))
1848 CALL dbcsr_scale(matrix_sqrt_inv, sqrt(scaling))
1850 CALL dbcsr_release(tmp1)
1851 CALL dbcsr_release(tmp2)
1852 IF (order >= 4)
THEN
1853 CALL dbcsr_release(tmp3)
1856 CALL timestop(handle)
1877 eps_lanczos, max_iter_lanczos, symmetrize, converged)
1878 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1879 REAL(kind=dp),
INTENT(IN) :: threshold
1880 INTEGER,
INTENT(IN) :: order
1881 REAL(kind=dp),
INTENT(IN) :: eps_lanczos
1882 INTEGER,
INTENT(IN) :: max_iter_lanczos
1883 LOGICAL,
OPTIONAL :: symmetrize, converged
1885 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sqrt_proot'
1887 INTEGER :: choose, handle, i, ii, j, unit_nr
1888 INTEGER(KIND=int_8) :: f, flop1, flop2, flop3, flop4, flop5
1889 LOGICAL :: arnoldi_converged, test, tsym
1890 REAL(kind=dp) :: conv, frob_matrix, frob_matrix_base, &
1891 max_ev, min_ev, occ_matrix, scaling, &
1893 TYPE(cp_logger_type),
POINTER :: logger
1894 TYPE(dbcsr_type) :: bk2a, matrixs, rmat, tmp1, tmp2, tmp3
1896 CALL cite_reference(richters2018)
1900 CALL timeset(routinen, handle)
1902 logger => cp_get_default_logger()
1903 IF (logger%para_env%is_source())
THEN
1904 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1909 IF (
PRESENT(converged)) converged = .false.
1910 IF (
PRESENT(symmetrize))
THEN
1917 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1918 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1919 CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1920 CALL dbcsr_create(rmat, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1921 CALL dbcsr_create(matrixs, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1923 CALL dbcsr_copy(matrixs, matrix)
1926 CALL arnoldi_extremal(matrixs, max_ev, min_ev, threshold=eps_lanczos, &
1927 max_iter=max_iter_lanczos, converged=arnoldi_converged)
1928 IF (unit_nr > 0)
THEN
1930 WRITE (unit_nr,
'(T6,A,1X,L1,A,E12.3)')
"Lanczos converged: ", arnoldi_converged,
" threshold:", eps_lanczos
1931 WRITE (unit_nr,
'(T6,A,1X,E12.3,E12.3)')
"Est. extremal eigenvalues:", max_ev, min_ev
1932 WRITE (unit_nr,
'(T6,A,1X,E12.3)')
"Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
1936 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1937 CALL dbcsr_scale(matrixs, scaling)
1938 CALL dbcsr_filter(matrixs, threshold)
1943 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1944 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1947 IF (unit_nr > 0)
THEN
1949 WRITE (unit_nr, *)
"Order=", order
1957 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp1, &
1958 filter_eps=threshold, flop=flop1)
1959 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrixs, tmp1, 0.0_dp, rmat, &
1960 filter_eps=threshold, flop=flop2)
1961 CALL dbcsr_scale(rmat, -1.0_dp)
1962 CALL dbcsr_add_on_diag(rmat, 1.0_dp)
1964 flop4 = 0; flop5 = 0
1965 CALL dbcsr_set(tmp1, 0.0_dp)
1966 CALL dbcsr_add_on_diag(tmp1, 2.0_dp)
1972 CALL dbcsr_copy(tmp2, rmat)
1975 CALL dbcsr_copy(tmp3, tmp2)
1976 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, rmat, 0.0_dp, tmp2, &
1977 filter_eps=threshold, flop=f)
1980 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
1983 CALL dbcsr_create(bk2a, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1984 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrixs, 0.0_dp, tmp3, &
1985 filter_eps=threshold, flop=flop1)
1986 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, tmp3, 0.0_dp, bk2a, &
1987 filter_eps=threshold, flop=flop2)
1988 CALL dbcsr_copy(rmat, bk2a)
1989 CALL dbcsr_add_on_diag(rmat, -1.0_dp)
1991 CALL dbcsr_set(tmp1, 0.0_dp)
1992 CALL dbcsr_add_on_diag(tmp1, 1.0_dp)
1994 CALL dbcsr_set(tmp2, 0.0_dp)
1995 CALL dbcsr_add_on_diag(tmp2, 1.0_dp)
2000 choose = product([(ii, ii=1, order)])/(product([(ii, ii=1, j)])*product([(ii, ii=1, order - j)]))
2001 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, -1.0_dp*(-1)**j*choose)
2004 CALL dbcsr_copy(tmp3, tmp2)
2005 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, bk2a, 0.0_dp, tmp2, &
2006 filter_eps=threshold, flop=f)
2010 CALL dbcsr_release(bk2a)
2013 CALL dbcsr_copy(tmp3, matrix_sqrt_inv)
2014 CALL dbcsr_multiply(
"N",
"N", 0.5_dp, tmp3, tmp1, 0.0_dp, matrix_sqrt_inv, &
2015 filter_eps=threshold, flop=flop4)
2017 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2022 conv = dbcsr_frobenius_norm(rmat)
2024 IF (unit_nr > 0)
THEN
2025 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"PROOT sqrt iter ", i, occ_matrix, &
2027 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0e6_dp*max(0.001_dp, t2 - t1))
2028 CALL m_flush(unit_nr)
2031 IF (abnormal_value(conv)) &
2032 cpabort(
"conv is an abnormal value (NaN/Inf).")
2035 IF ((conv*conv) < threshold)
THEN
2036 IF (
PRESENT(converged)) converged = .true.
2043 CALL dbcsr_scale(matrix_sqrt_inv, sqrt(scaling))
2044 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrix, 0.0_dp, matrix_sqrt, &
2045 filter_eps=threshold, flop=flop5)
2049 IF (unit_nr > 0)
THEN
2050 WRITE (unit_nr,
'(A20)')
"SYMMETRIZING RESULTS"
2052 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
2053 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
2054 CALL dbcsr_transposed(tmp1, matrix_sqrt)
2055 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
2060 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
2061 filter_eps=threshold)
2062 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2063 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2064 frob_matrix = dbcsr_frobenius_norm(tmp1)
2065 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2066 IF (unit_nr > 0)
THEN
2067 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final PROOT S^{-1/2} S^{1/2}-Eins error ", i, occ_matrix, &
2068 frob_matrix/frob_matrix_base
2069 WRITE (unit_nr,
'()')
2070 CALL m_flush(unit_nr)
2074 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp2, &
2075 filter_eps=threshold)
2076 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, tmp2, matrix, 0.0_dp, tmp1, &
2077 filter_eps=threshold)
2078 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2079 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2080 frob_matrix = dbcsr_frobenius_norm(tmp1)
2081 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2082 IF (unit_nr > 0)
THEN
2083 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final PROOT S^{-1/2} S^{-1/2} S-Eins error ", i, occ_matrix, &
2084 frob_matrix/frob_matrix_base
2085 WRITE (unit_nr,
'()')
2086 CALL m_flush(unit_nr)
2090 CALL dbcsr_release(tmp1)
2091 CALL dbcsr_release(tmp2)
2092 CALL dbcsr_release(tmp3)
2093 CALL dbcsr_release(rmat)
2094 CALL dbcsr_release(matrixs)
2096 CALL timestop(handle)
2109 TYPE(dbcsr_type),
INTENT(INOUT) ::
matrix_exp, matrix
2110 REAL(kind=dp),
INTENT(IN) :: omega, alpha, threshold
2112 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_exponential'
2113 REAL(dp),
PARAMETER :: one = 1.0_dp, toll = 1.e-17_dp, &
2116 INTEGER :: handle, i, k, unit_nr
2117 REAL(dp) :: factorial, norm_c, norm_d, norm_scalar
2118 TYPE(cp_logger_type),
POINTER :: logger
2119 TYPE(dbcsr_type) :: b, b_square, c, d, d_product
2121 CALL timeset(routinen, handle)
2123 logger => cp_get_default_logger()
2124 IF (logger%para_env%is_source())
THEN
2125 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2131 norm_scalar = abs(alpha)*dbcsr_frobenius_norm(matrix)
2136 IF ((norm_scalar/2.0_dp**k) <= one)
EXIT
2141 CALL dbcsr_create(c, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2142 CALL dbcsr_copy(c, matrix)
2143 CALL dbcsr_scale(c, alpha_scalar=alpha/2.0_dp**k)
2145 CALL dbcsr_create(d, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2146 CALL dbcsr_copy(d, c)
2153 CALL dbcsr_create(b, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2154 CALL dbcsr_copy(b, d)
2155 CALL dbcsr_add_on_diag(b, alpha=one)
2160 norm_c = toll*dbcsr_frobenius_norm(matrix)
2163 CALL dbcsr_create(d_product, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2168 CALL dbcsr_multiply(
"N",
"N", one, d, c, &
2169 zero, d_product, filter_eps=threshold)
2172 CALL dbcsr_copy(d, d_product)
2176 CALL dbcsr_add(b, d_product, one, factorial)
2179 norm_d = factorial*dbcsr_frobenius_norm(d)
2180 IF (norm_d < norm_c)
EXIT
2184 CALL dbcsr_create(b_square, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2187 CALL dbcsr_multiply(
"N",
"N", one, b, b, &
2188 zero, b_square, filter_eps=threshold)
2190 CALL dbcsr_copy(b, b_square)
2197 CALL dbcsr_scale(
matrix_exp, alpha_scalar=omega)
2200 CALL dbcsr_release(b)
2201 CALL dbcsr_release(c)
2202 CALL dbcsr_release(d)
2203 CALL dbcsr_release(d_product)
2204 CALL dbcsr_release(b_square)
2206 CALL timestop(handle)
2220 SUBROUTINE purify_mcweeny_orth(matrix_p, threshold, max_steps)
2221 TYPE(dbcsr_type),
DIMENSION(:) :: matrix_p
2222 REAL(KIND=dp) :: threshold
2223 INTEGER :: max_steps
2225 CHARACTER(LEN=*),
PARAMETER :: routineN =
'purify_mcweeny_orth'
2227 INTEGER :: handle, i, ispin, unit_nr
2228 REAL(KIND=dp) :: frob_norm, trace
2229 TYPE(cp_logger_type),
POINTER :: logger
2230 TYPE(dbcsr_type) :: matrix_pp, matrix_tmp
2232 CALL timeset(routinen, handle)
2233 logger => cp_get_default_logger()
2234 IF (logger%para_env%is_source())
THEN
2235 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2240 CALL dbcsr_create(matrix_pp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2241 CALL dbcsr_create(matrix_tmp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2242 CALL dbcsr_trace(matrix_p(1), trace)
2244 DO ispin = 1,
SIZE(matrix_p)
2246 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_p(ispin), matrix_p(ispin), &
2247 0.0_dp, matrix_pp, filter_eps=threshold)
2250 CALL dbcsr_copy(matrix_tmp, matrix_pp)
2251 CALL dbcsr_add(matrix_tmp, matrix_p(ispin), 1.0_dp, -1.0_dp)
2252 frob_norm = dbcsr_frobenius_norm(matrix_tmp)
2253 IF (unit_nr > 0)
WRITE (unit_nr,
'(t3,a,f16.8)')
"McWeeny: Deviation of idempotency", frob_norm
2254 IF (unit_nr > 0)
CALL m_flush(unit_nr)
2257 CALL dbcsr_copy(matrix_tmp, matrix_pp)
2258 CALL dbcsr_multiply(
"N",
"N", -2.0_dp, matrix_pp, matrix_p(ispin), &
2259 3.0_dp, matrix_tmp, filter_eps=threshold)
2260 CALL dbcsr_copy(matrix_p(ispin), matrix_tmp)
2263 IF (frob_norm*frob_norm < trace*threshold)
EXIT
2267 CALL dbcsr_release(matrix_pp)
2268 CALL dbcsr_release(matrix_tmp)
2269 CALL timestop(handle)
2270 END SUBROUTINE purify_mcweeny_orth
2283 SUBROUTINE purify_mcweeny_nonorth(matrix_p, matrix_s, threshold, max_steps)
2284 TYPE(dbcsr_type),
DIMENSION(:) :: matrix_p
2285 TYPE(dbcsr_type) :: matrix_s
2286 REAL(KIND=dp) :: threshold
2287 INTEGER :: max_steps
2289 CHARACTER(LEN=*),
PARAMETER :: routineN =
'purify_mcweeny_nonorth'
2291 INTEGER :: handle, i, ispin, unit_nr
2292 REAL(KIND=dp) :: frob_norm, trace
2293 TYPE(cp_logger_type),
POINTER :: logger
2294 TYPE(dbcsr_type) :: matrix_ps, matrix_psp, matrix_test
2296 CALL timeset(routinen, handle)
2298 logger => cp_get_default_logger()
2299 IF (logger%para_env%is_source())
THEN
2300 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2305 CALL dbcsr_create(matrix_ps, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2306 CALL dbcsr_create(matrix_psp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2307 CALL dbcsr_create(matrix_test, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2309 DO ispin = 1,
SIZE(matrix_p)
2311 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_p(ispin), matrix_s, &
2312 0.0_dp, matrix_ps, filter_eps=threshold)
2313 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_ps, matrix_p(ispin), &
2314 0.0_dp, matrix_psp, filter_eps=threshold)
2315 IF (i == 1)
CALL dbcsr_trace(matrix_ps, trace)
2318 CALL dbcsr_copy(matrix_test, matrix_psp)
2319 CALL dbcsr_add(matrix_test, matrix_p(ispin), 1.0_dp, -1.0_dp)
2320 frob_norm = dbcsr_frobenius_norm(matrix_test)
2321 IF (unit_nr > 0)
WRITE (unit_nr,
'(t3,a,2f16.8)')
"McWeeny: Deviation of idempotency", frob_norm
2322 IF (unit_nr > 0)
CALL m_flush(unit_nr)
2325 CALL dbcsr_copy(matrix_p(ispin), matrix_psp)
2326 CALL dbcsr_multiply(
"N",
"N", -2.0_dp, matrix_ps, matrix_psp, &
2327 3.0_dp, matrix_p(ispin), filter_eps=threshold)
2330 IF (frob_norm*frob_norm < trace*threshold)
EXIT
2334 CALL dbcsr_release(matrix_ps)
2335 CALL dbcsr_release(matrix_psp)
2336 CALL dbcsr_release(matrix_test)
2337 CALL timestop(handle)
2338 END SUBROUTINE purify_mcweeny_nonorth
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
arnoldi iteration using dbcsr
subroutine, public arnoldi_extremal(matrix_a, max_ev, min_ev, converged, threshold, max_iter)
simple wrapper to estimate extremal eigenvalues with arnoldi, using the old lanczos interface this hi...
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public richters2018
subroutine, public dbcsr_transposed(transposed, normal, shallow_data_copy, transpose_distribution, use_distribution)
...
subroutine, public dbcsr_scale(matrix, alpha_scalar)
...
character function, public dbcsr_get_matrix_type(matrix)
...
subroutine, public dbcsr_desymmetrize(matrix_a, matrix_b)
...
subroutine, public dbcsr_copy(matrix_b, matrix_a, name, keep_sparsity, keep_imaginary)
...
subroutine, public dbcsr_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, first_row, last_row, first_column, last_column, first_k, last_k, retain_sparsity, filter_eps, flop)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_filter(matrix, eps)
...
real(kind=dp) function, public dbcsr_get_occupation(matrix)
...
subroutine, public dbcsr_set(matrix, alpha)
...
subroutine, public dbcsr_release(matrix)
...
subroutine, public dbcsr_add(matrix_a, matrix_b, alpha_scalar, beta_scalar)
...
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
subroutine, public dbcsr_set_diag(matrix, diag)
Copies the diagonal elements from the given array into the given matrix.
real(dp) function, public dbcsr_gershgorin_norm(matrix)
Compute the gershgorin norm of a dbcsr matrix.
subroutine, public dbcsr_get_diag(matrix, diag)
Copies the diagonal elements from the given matrix into the given array.
subroutine, public dbcsr_add_on_diag(matrix, alpha)
Adds the given scalar to the diagonal of the matrix. Reserves any missing diagonal blocks.
real(dp) function, public dbcsr_maxabs(matrix)
Compute the maxabs norm of a dbcsr matrix.
subroutine, public dbcsr_trace(matrix, trace)
Computes the trace of the given matrix, also known as the sum of its diagonal elements.
real(dp) function, public dbcsr_frobenius_norm(matrix)
Compute the frobenius norm of a dbcsr matrix.
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Routines useful for iterative matrix calculations.
recursive subroutine, public determinant(matrix, det, threshold)
Computes the determinant of a symmetric positive definite matrix using the trace of the matrix logari...
subroutine, public matrix_sign_newton_schulz(matrix_sign, matrix, threshold, sign_order, iounit)
compute the sign a matrix using Newton-Schulz iterations
subroutine, public matrix_sign_submatrix(matrix_sign, matrix, threshold, sign_order, submatrix_sign_method)
Submatrix method.
subroutine, public invert_taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite diagonally dominant matrix
subroutine, public matrix_exponential(matrix_exp, matrix, omega, alpha, threshold)
...
subroutine, public matrix_sign_submatrix_mu_adjust(matrix_sign, matrix, mu, nelectron, threshold, variant)
Submatrix method with internal adjustment of chemical potential.
subroutine, public invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite matrix by Hotelling's method explicit symmetrization makes this ...
subroutine, public matrix_sign_proot(matrix_sign, matrix, threshold, sign_order)
compute the sign a matrix using the general algorithm for the p-th root of Richters et al....
subroutine, public matrix_sqrt_newton_schulz(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged, iounit)
compute the sqrt of a matrix via the sign function and the corresponding Newton-Schulz iterations the...
subroutine, public matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged)
compute the sqrt of a matrix via the general algorithm for the p-th root of Richters et al....
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition of mathematical constants and functions.
real(kind=dp), dimension(0:maxfac), parameter, public ifac
Collection of simple mathematical functions and subroutines.
logical function, public abnormal_value(a)
determines if a value is not normal (e.g. for Inf and Nan) based on IO to work also under optimizatio...
Routines for calculating a complex matrix exponential.
Interface to the message passing library MPI.
type of a logger, at the moment it contains just a print level starting at which level it should be l...