45#include "./base/base_uses.f90"
51 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'iterate_matrix'
54 REAL(KIND=
dp),
DIMENSION(:),
ALLOCATABLE :: eigvals
55 REAL(KIND=
dp),
DIMENSION(:, :),
ALLOCATABLE :: eigvecs
59 MODULE PROCEDURE purify_mcweeny_orth, purify_mcweeny_nonorth
87 REAL(kind=
dp),
INTENT(INOUT) :: det
88 REAL(kind=
dp),
INTENT(IN) :: threshold
90 CHARACTER(LEN=*),
PARAMETER :: routinen =
'determinant'
92 INTEGER :: handle, i, max_iter_lanczos, nsize, &
93 order_lanczos, sign_iter, unit_nr
94 INTEGER(KIND=int_8) :: flop1
95 INTEGER,
SAVE :: recursion_depth = 0
96 REAL(kind=
dp) :: det0, eps_lanczos, frobnorm, maxnorm, &
97 occ_matrix, t1, t2, trace
98 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: diagonal
102 CALL timeset(routinen, handle)
106 IF (logger%para_env%is_source())
THEN
117 matrix_type=dbcsr_type_no_symmetry)
119 matrix_type=dbcsr_type_no_symmetry)
121 matrix_type=dbcsr_type_no_symmetry)
127 ALLOCATE (diagonal(nsize))
129 CALL group%sum(diagonal)
130 det = product(diagonal)
134 diagonal(:) = 1.0_dp/(sqrt(diagonal(:)))
140 DEALLOCATE (diagonal)
148 filter_eps=threshold)
153 filter_eps=threshold)
158 IF (unit_nr > 0)
THEN
159 IF (recursion_depth .EQ. 0)
THEN
160 WRITE (unit_nr,
'()')
162 WRITE (unit_nr,
'(T6,A28,1X,I15)') &
163 "Recursive iteration:", recursion_depth
165 WRITE (unit_nr,
'(T6,A28,1X,F15.10)') &
166 "Frobenius norm:", frobnorm
170 IF (frobnorm .GE. 1.0_dp)
THEN
175 eps_lanczos = 1.0e-4_dp
176 max_iter_lanczos = 40
181 threshold=threshold, &
182 order=order_lanczos, &
183 eps_lanczos=eps_lanczos, &
184 max_iter_lanczos=max_iter_lanczos)
185 recursion_depth = recursion_depth + 1
187 recursion_depth = recursion_depth - 1
197 IF (unit_nr > 0)
WRITE (unit_nr, *)
210 filter_eps=threshold, &
216 trace = trace*sign_iter/(1.0_dp*(i + 1))
217 sign_iter = -sign_iter
227 IF (unit_nr > 0)
THEN
228 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F7.5,F16.10,F10.3,F11.3)') &
229 "Determinant iter", i, occ_matrix, &
231 flop1/(1.0e6_dp*max(0.001_dp, t2 - t1))
236 IF (maxnorm < threshold)
EXIT
240 IF (unit_nr > 0)
THEN
241 WRITE (unit_nr,
'()')
247 IF (unit_nr > 0)
THEN
248 IF (recursion_depth .EQ. 0)
THEN
249 WRITE (unit_nr,
'(T6,A28,1X,F15.10)') &
250 "Final determinant:", det
251 WRITE (unit_nr,
'()')
253 WRITE (unit_nr,
'(T6,A28,1X,F15.10)') &
254 "Recursive determinant:", det
263 CALL timestop(handle)
284 SUBROUTINE invert_taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, &
285 norm_convergence, filter_eps, accelerator_order, &
286 max_iter_lanczos, eps_lanczos, silent)
288 TYPE(dbcsr_type),
INTENT(INOUT),
TARGET :: matrix_inverse, matrix
289 REAL(kind=dp),
INTENT(IN) :: threshold
290 LOGICAL,
INTENT(IN),
OPTIONAL :: use_inv_as_guess
291 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: norm_convergence, filter_eps
292 INTEGER,
INTENT(IN),
OPTIONAL :: accelerator_order, max_iter_lanczos
293 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: eps_lanczos
294 LOGICAL,
INTENT(IN),
OPTIONAL :: silent
296 CHARACTER(LEN=*),
PARAMETER :: routinen =
'invert_Taylor'
298 INTEGER :: accelerator_type, handle, i, &
299 my_max_iter_lanczos, nrows, unit_nr
300 INTEGER(KIND=int_8) :: flop2
301 LOGICAL :: converged, use_inv_guess
302 REAL(kind=dp) :: coeff, convergence, maxnorm_matrix, &
303 my_eps_lanczos, occ_matrix, t1, t2
304 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: p_diagonal
305 TYPE(cp_logger_type),
POINTER :: logger
306 TYPE(dbcsr_type),
TARGET :: tmp1, tmp2, tmp3_sym
308 CALL timeset(routinen, handle)
310 logger => cp_get_default_logger()
311 IF (logger%para_env%is_source())
THEN
312 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
316 IF (
PRESENT(silent))
THEN
317 IF (silent) unit_nr = -1
320 convergence = threshold
321 IF (
PRESENT(norm_convergence)) convergence = norm_convergence
324 IF (
PRESENT(accelerator_order)) accelerator_type = accelerator_order
325 IF (accelerator_type .GT. 1) accelerator_type = 1
327 use_inv_guess = .false.
328 IF (
PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
330 my_max_iter_lanczos = 64
331 my_eps_lanczos = 1.0e-3_dp
332 IF (
PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
333 IF (
PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
335 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
336 CALL dbcsr_create(tmp2, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
337 CALL dbcsr_create(tmp3_sym, template=matrix_inverse)
339 CALL dbcsr_get_info(matrix, nfullrows_total=nrows)
340 ALLOCATE (p_diagonal(nrows))
343 IF (.NOT. use_inv_guess)
THEN
345 SELECT CASE (accelerator_type)
348 CALL dbcsr_desymmetrize(matrix, tmp1)
349 p_diagonal(:) = 0.0_dp
350 CALL dbcsr_set_diag(tmp1, p_diagonal)
353 CALL dbcsr_get_diag(matrix, p_diagonal)
355 IF (p_diagonal(i) .NE. 0.0_dp)
THEN
356 p_diagonal(i) = 1.0_dp/p_diagonal(i)
359 CALL dbcsr_set(matrix_inverse, 0.0_dp)
360 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
361 CALL dbcsr_set_diag(matrix_inverse, p_diagonal)
363 cpabort(
"Illegal accelerator order")
368 cpabort(
"Guess is NYI")
372 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, matrix_inverse, &
373 0.0_dp, tmp2, filter_eps=filter_eps)
375 IF (unit_nr > 0)
WRITE (unit_nr, *)
382 CALL dbcsr_desymmetrize(matrix_inverse, tmp1)
387 coeff = -1.0_dp*coeff
388 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp2, 0.0_dp, &
390 flop=flop2, filter_eps=filter_eps)
392 CALL dbcsr_add(matrix_inverse, tmp3_sym, 1.0_dp, coeff)
393 CALL dbcsr_release(tmp1)
394 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
395 CALL dbcsr_desymmetrize(tmp3_sym, tmp1)
398 maxnorm_matrix = dbcsr_maxabs(tmp3_sym)
401 occ_matrix = dbcsr_get_occupation(matrix_inverse)
403 IF (unit_nr > 0)
THEN
404 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"Taylor iter", i, occ_matrix, &
405 maxnorm_matrix, t2 - t1, &
406 flop2/(1.0e6_dp*max(0.001_dp, t2 - t1))
407 CALL m_flush(unit_nr)
410 IF (maxnorm_matrix < convergence)
THEN
420 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix, matrix_inverse, 0.0_dp, tmp1, &
421 filter_eps=filter_eps)
422 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
424 maxnorm_matrix = dbcsr_maxabs(tmp1)
425 IF (unit_nr > 0)
THEN
426 WRITE (unit_nr,
'(T6,A,E12.5)')
"Final Taylor error", maxnorm_matrix
427 WRITE (unit_nr,
'()')
428 CALL m_flush(unit_nr)
430 IF (maxnorm_matrix > convergence)
THEN
432 IF (unit_nr > 0)
THEN
433 WRITE (unit_nr, *)
'Final convergence check failed'
437 IF (.NOT. converged)
THEN
438 cpabort(
"Taylor inversion did not converge")
441 CALL dbcsr_release(tmp1)
442 CALL dbcsr_release(tmp2)
443 CALL dbcsr_release(tmp3_sym)
445 DEALLOCATE (p_diagonal)
447 CALL timestop(handle)
471 norm_convergence, filter_eps, accelerator_order, &
472 max_iter_lanczos, eps_lanczos, silent)
474 TYPE(dbcsr_type),
INTENT(INOUT),
TARGET :: matrix_inverse, matrix
475 REAL(kind=dp),
INTENT(IN) :: threshold
476 LOGICAL,
INTENT(IN),
OPTIONAL :: use_inv_as_guess
477 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: norm_convergence, filter_eps
478 INTEGER,
INTENT(IN),
OPTIONAL :: accelerator_order, max_iter_lanczos
479 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: eps_lanczos
480 LOGICAL,
INTENT(IN),
OPTIONAL :: silent
482 CHARACTER(LEN=*),
PARAMETER :: routinen =
'invert_Hotelling'
484 INTEGER :: accelerator_type, handle, i, &
485 my_max_iter_lanczos, unit_nr
486 INTEGER(KIND=int_8) :: flop1, flop2
487 LOGICAL :: arnoldi_converged, converged, &
489 REAL(kind=dp) :: convergence, frob_matrix, gershgorin_norm, max_ev, maxnorm_matrix, min_ev, &
490 my_eps_lanczos, my_filter_eps, occ_matrix, scalingf, t1, t2
491 TYPE(cp_logger_type),
POINTER :: logger
492 TYPE(dbcsr_type),
TARGET :: tmp1, tmp2
497 CALL timeset(routinen, handle)
499 logger => cp_get_default_logger()
500 IF (logger%para_env%is_source())
THEN
501 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
505 IF (
PRESENT(silent))
THEN
506 IF (silent) unit_nr = -1
509 convergence = threshold
510 IF (
PRESENT(norm_convergence)) convergence = norm_convergence
513 IF (
PRESENT(accelerator_order)) accelerator_type = accelerator_order
514 IF (accelerator_type .GT. 1) accelerator_type = 1
516 use_inv_guess = .false.
517 IF (
PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
519 my_max_iter_lanczos = 64
520 my_eps_lanczos = 1.0e-3_dp
521 IF (
PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
522 IF (
PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
524 my_filter_eps = threshold
525 IF (
PRESENT(filter_eps)) my_filter_eps = filter_eps
528 IF (.NOT. use_inv_guess)
THEN
530 SELECT CASE (accelerator_type)
532 gershgorin_norm = dbcsr_gershgorin_norm(matrix)
533 frob_matrix = dbcsr_frobenius_norm(matrix)
534 CALL dbcsr_set(matrix_inverse, 0.0_dp)
535 CALL dbcsr_add_on_diag(matrix_inverse, 1/min(gershgorin_norm, frob_matrix))
538 CALL dbcsr_set(matrix_inverse, 0.0_dp)
539 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
541 cpabort(
"Illegal accelerator order")
545 CALL dbcsr_create(tmp1, template=matrix_inverse)
551 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
555 CALL dbcsr_create(tmp2, template=matrix_inverse)
557 IF (unit_nr > 0)
WRITE (unit_nr, *)
562 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_inverse, matrix, &
563 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
565 IF (accelerator_type == 1)
THEN
568 CALL arnoldi_extremal(tmp1, max_ev, min_ev, threshold=my_eps_lanczos, &
569 max_iter=my_max_iter_lanczos, converged=arnoldi_converged)
578 IF (unit_nr > 0)
THEN
580 WRITE (unit_nr,
'(T6,A,1X,L1,A,E12.3)')
"Lanczos converged: ", arnoldi_converged,
" threshold:", my_eps_lanczos
581 WRITE (unit_nr,
'(T6,A,1X,E12.3,E12.3)')
"Est. extremal eigenvalues:", max_ev, min_ev
582 WRITE (unit_nr,
'(T6,A,1X,E12.3)')
"Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
586 scalingf = 1.9_dp/(max_ev + min_ev)
587 CALL dbcsr_scale(tmp1, scalingf)
588 CALL dbcsr_scale(matrix_inverse, scalingf)
589 min_ev = min_ev*scalingf
600 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
601 maxnorm_matrix = dbcsr_maxabs(tmp1)
602 CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
605 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, matrix_inverse, 0.0_dp, tmp2, &
606 flop=flop2, filter_eps=my_filter_eps)
608 CALL dbcsr_add(matrix_inverse, tmp2, 2.0_dp, -1.0_dp)
610 CALL dbcsr_filter(matrix_inverse, my_filter_eps)
612 occ_matrix = dbcsr_get_occupation(matrix_inverse)
615 IF (accelerator_type == 1)
THEN
616 min_ev = min_ev*(2.0_dp - min_ev)
617 IF (
PRESENT(norm_convergence)) maxnorm_matrix = abs(min_ev - 1.0_dp)
620 IF (unit_nr > 0)
THEN
621 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"Hotelling iter", i, occ_matrix, &
622 maxnorm_matrix, t2 - t1, &
623 (flop1 + flop2)/(1.0e6_dp*max(0.001_dp, t2 - t1))
624 CALL m_flush(unit_nr)
627 IF (maxnorm_matrix < convergence)
THEN
633 IF (accelerator_type == 1)
THEN
634 min_ev = min_ev*2.0_dp/(min_ev + 1.0_dp)
635 CALL dbcsr_scale(matrix_inverse, 2.0_dp/(min_ev + 1.0_dp))
639 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_inverse, matrix, &
640 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
644 IF (.NOT. converged)
THEN
645 cpabort(
"Hotelling inversion did not converge")
649 IF (dbcsr_get_matrix_type(matrix_inverse) == dbcsr_type_no_symmetry)
THEN
650 CALL dbcsr_transposed(tmp2, matrix_inverse)
651 CALL dbcsr_add(matrix_inverse, tmp2, 0.5_dp, 0.5_dp)
654 IF (unit_nr > 0)
THEN
658 WRITE (unit_nr,
'()')
659 CALL m_flush(unit_nr)
662 CALL dbcsr_release(tmp1)
663 CALL dbcsr_release(tmp2)
665 CALL timestop(handle)
683 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
684 REAL(kind=dp),
INTENT(IN) :: threshold
685 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order, iounit
687 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_Newton_Schulz'
689 INTEGER :: count, handle, i, order, unit_nr
690 INTEGER(KIND=int_8) :: flops
691 REAL(kind=dp) :: a0, a1, a2, a3, a4, a5, floptot, &
692 frob_matrix, frob_matrix_base, &
693 gersh_matrix, occ_matrix, prefactor, &
695 TYPE(cp_logger_type),
POINTER :: logger
696 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3, tmp4
698 CALL timeset(routinen, handle)
700 IF (
PRESENT(iounit))
THEN
703 logger => cp_get_default_logger()
704 IF (logger%para_env%is_source())
THEN
705 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
711 IF (
PRESENT(sign_order))
THEN
717 CALL dbcsr_create(tmp1, template=matrix_sign)
719 CALL dbcsr_create(tmp2, template=matrix_sign)
720 IF (abs(order) .GE. 4)
THEN
721 CALL dbcsr_create(tmp3, template=matrix_sign)
723 IF (abs(order) .GT. 4)
THEN
724 CALL dbcsr_create(tmp4, template=matrix_sign)
727 CALL dbcsr_copy(matrix_sign, matrix)
728 CALL dbcsr_filter(matrix_sign, threshold)
731 frob_matrix = dbcsr_frobenius_norm(matrix_sign)
732 gersh_matrix = dbcsr_gershgorin_norm(matrix_sign)
733 CALL dbcsr_scale(matrix_sign, 1/min(frob_matrix, gersh_matrix))
735 IF (unit_nr > 0)
WRITE (unit_nr, *)
742 CALL dbcsr_multiply(
"N",
"N", -1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
743 filter_eps=threshold, flop=flops)
744 floptot = floptot + flops
747 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
748 CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
749 frob_matrix = dbcsr_frobenius_norm(tmp1)
781 IF (order .EQ. 2)
THEN
785 CALL dbcsr_add_on_diag(tmp1, +2.0_dp)
786 occ_matrix = dbcsr_get_occupation(matrix_sign)
787 ELSE IF (order .EQ. 3)
THEN
790 CALL dbcsr_copy(tmp2, tmp1)
791 CALL dbcsr_scale(tmp1, 4.0_dp/3.0_dp)
792 CALL dbcsr_add_on_diag(tmp1, 8.0_dp/3.0_dp)
795 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp2, 1.0_dp, tmp1, &
796 filter_eps=threshold, flop=flops)
797 floptot = floptot + flops
798 prefactor = 3.0_dp/8.0_dp
800 ELSE IF (order .EQ. 4)
THEN
803 CALL dbcsr_copy(tmp3, tmp1)
804 CALL dbcsr_scale(tmp1, 8.0_dp/5.0_dp)
805 CALL dbcsr_add_on_diag(tmp1, 16.0_dp/5.0_dp)
808 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
809 filter_eps=threshold, flop=flops)
810 floptot = floptot + flops
812 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp/5.0_dp)
814 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
815 filter_eps=threshold, flop=flops)
816 floptot = floptot + flops
818 prefactor = 5.0_dp/16.0_dp
819 ELSE IF (order .EQ. -5)
THEN
822 CALL dbcsr_copy(tmp3, tmp1)
823 CALL dbcsr_scale(tmp1, 128.0_dp/70.0_dp)
824 CALL dbcsr_add_on_diag(tmp1, 128.0_dp/35.0_dp)
827 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
828 filter_eps=threshold, flop=flops)
829 floptot = floptot + flops
831 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 48.0_dp/35.0_dp)
833 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
834 filter_eps=threshold, flop=flops)
835 floptot = floptot + flops
837 CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 8.0_dp/7.0_dp)
839 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp4, tmp3, 1.0_dp, tmp1, &
840 filter_eps=threshold, flop=flops)
841 floptot = floptot + flops
843 prefactor = 35.0_dp/128.0_dp
844 ELSE IF (order .EQ. 5)
THEN
854 a1 = 23819.0_dp/13720.0_dp
855 a2 = -3734_dp/1715.0_dp
856 a3 = 832591127_dp/188238400.0_dp
860 CALL dbcsr_copy(tmp3, tmp1)
861 CALL dbcsr_add_on_diag(tmp3, a0)
862 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
863 filter_eps=threshold, flop=flops)
864 floptot = floptot + flops
865 CALL dbcsr_add_on_diag(tmp2, a1)
867 CALL dbcsr_add_on_diag(tmp1, a2)
868 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
869 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp2, 0.0_dp, tmp3, &
870 filter_eps=threshold, flop=flops)
871 floptot = floptot + flops
872 CALL dbcsr_add_on_diag(tmp3, a3)
873 CALL dbcsr_copy(tmp1, tmp3)
875 prefactor = 35.0_dp/128.0_dp
876 ELSE IF (order .EQ. 6)
THEN
880 CALL dbcsr_copy(tmp3, tmp1)
881 CALL dbcsr_scale(tmp1, 128.0_dp/63.0_dp)
882 CALL dbcsr_add_on_diag(tmp1, 256.0_dp/63.0_dp)
885 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
886 filter_eps=threshold, flop=flops)
887 floptot = floptot + flops
889 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 32.0_dp/21.0_dp)
891 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
892 filter_eps=threshold, flop=flops)
893 floptot = floptot + flops
895 CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 80.0_dp/63.0_dp)
897 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp4, tmp3, 0.0_dp, tmp2, &
898 filter_eps=threshold, flop=flops)
899 floptot = floptot + flops
901 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 10.0_dp/9.0_dp)
903 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
904 filter_eps=threshold, flop=flops)
905 floptot = floptot + flops
907 prefactor = 63.0_dp/256.0_dp
908 ELSE IF (order .EQ. 7)
THEN
911 a0 = 1.3686502058092053653287666647611728507211996691324048468010382350359929055186612505791532871573242422_dp
912 a1 = 1.7089671854477436685850554669524985556296280184497503489303331821456795715195510972774979091893741568_dp
913 a2 = -1.3231956603546599107833121193066273961757451236778593922555836895814474509732067051246078326118696968_dp
914 a3 = 3.9876642330847931291749479958277754186675336169578593000744380254770411483327581042259415937710270453_dp
915 a4 = -3.7273299006476825027065704937541279833880400042556351139273912137942678919776364526511485025132991667_dp
916 a5 = 4.9369932474103023792021351907971943220607580694533770325967170245194362399287150565595441897740173578_dp
924 CALL dbcsr_copy(tmp3, tmp1)
925 CALL dbcsr_add_on_diag(tmp3, a0)
926 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
927 filter_eps=threshold, flop=flops)
928 floptot = floptot + flops
929 CALL dbcsr_add_on_diag(tmp2, a1)
932 CALL dbcsr_copy(tmp4, tmp1)
933 CALL dbcsr_add_on_diag(tmp4, a2)
934 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp4, tmp2, 0.0_dp, tmp3, &
935 filter_eps=threshold, flop=flops)
936 floptot = floptot + flops
937 CALL dbcsr_add_on_diag(tmp3, a3)
939 CALL dbcsr_add(tmp2, tmp3, 1.0_dp, 1.0_dp)
940 CALL dbcsr_add_on_diag(tmp2, a4)
941 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
942 filter_eps=threshold, flop=flops)
943 floptot = floptot + flops
944 CALL dbcsr_add_on_diag(tmp1, a5)
946 prefactor = 231.0_dp/1024.0_dp
948 cpabort(
"requested order is not implemented.")
952 CALL dbcsr_multiply(
"N",
"N", prefactor, matrix_sign, tmp1, 0.0_dp, tmp2, &
953 filter_eps=threshold, flop=flops)
954 floptot = floptot + flops
958 CALL dbcsr_copy(matrix_sign, tmp2)
961 occ_matrix = dbcsr_get_occupation(matrix_sign)
963 IF (unit_nr > 0)
THEN
964 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"NS sign iter ", i, occ_matrix, &
965 frob_matrix/frob_matrix_base, t2 - t1, &
966 floptot/(1.0e6_dp*max(0.001_dp, t2 - t1))
967 CALL m_flush(unit_nr)
971 IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base))
EXIT
976 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
977 filter_eps=threshold)
978 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
979 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
980 frob_matrix = dbcsr_frobenius_norm(tmp1)
981 occ_matrix = dbcsr_get_occupation(matrix_sign)
982 IF (unit_nr > 0)
THEN
983 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final NS sign iter", i, occ_matrix, &
984 frob_matrix/frob_matrix_base
985 WRITE (unit_nr,
'()')
986 CALL m_flush(unit_nr)
989 CALL dbcsr_release(tmp1)
990 CALL dbcsr_release(tmp2)
991 IF (abs(order) .GE. 4)
THEN
992 CALL dbcsr_release(tmp3)
994 IF (abs(order) .GT. 4)
THEN
995 CALL dbcsr_release(tmp4)
998 CALL timestop(handle)
1015 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
1016 REAL(kind=dp),
INTENT(IN) :: threshold
1017 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
1019 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_proot'
1021 INTEGER :: handle, order, unit_nr
1022 INTEGER(KIND=int_8) :: flop0, flop1, flop2
1023 LOGICAL :: converged, symmetrize
1024 REAL(kind=dp) :: frob_matrix, frob_matrix_base, occ_matrix
1025 TYPE(cp_logger_type),
POINTER :: logger
1026 TYPE(dbcsr_type) :: matrix2, matrix_sqrt, matrix_sqrt_inv, &
1029 CALL cite_reference(richters2018)
1031 CALL timeset(routinen, handle)
1033 logger => cp_get_default_logger()
1034 IF (logger%para_env%is_source())
THEN
1035 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1040 IF (
PRESENT(sign_order))
THEN
1046 CALL dbcsr_create(tmp1, template=matrix_sign)
1048 CALL dbcsr_create(tmp2, template=matrix_sign)
1050 CALL dbcsr_create(matrix2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1051 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix, matrix, 0.0_dp, matrix2, &
1052 filter_eps=threshold, flop=flop0)
1058 IF (unit_nr > 0)
WRITE (unit_nr, *)
1060 CALL dbcsr_create(matrix_sqrt, template=matrix2)
1061 CALL dbcsr_create(matrix_sqrt_inv, template=matrix2)
1062 IF (unit_nr > 0)
WRITE (unit_nr, *)
"Threshold=", threshold
1064 symmetrize = .false.
1065 CALL matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix2, threshold, order, &
1066 0.01_dp, 100, symmetrize, converged)
1068 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix, matrix_sqrt_inv, 0.0_dp, matrix_sign, &
1069 filter_eps=threshold, flop=flop1)
1072 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
1073 filter_eps=threshold, flop=flop2)
1074 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1075 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1076 frob_matrix = dbcsr_frobenius_norm(tmp1)
1077 occ_matrix = dbcsr_get_occupation(matrix_sign)
1078 IF (unit_nr > 0)
THEN
1079 WRITE (unit_nr,
'(T6,A,F10.8,E12.3)')
"Final proot sign iter", occ_matrix, &
1080 frob_matrix/frob_matrix_base
1081 WRITE (unit_nr,
'()')
1082 CALL m_flush(unit_nr)
1085 CALL dbcsr_release(tmp1)
1086 CALL dbcsr_release(tmp2)
1087 CALL dbcsr_release(matrix2)
1088 CALL dbcsr_release(matrix_sqrt)
1089 CALL dbcsr_release(matrix_sqrt_inv)
1091 CALL timestop(handle)
1104 SUBROUTINE dense_matrix_sign_newton_schulz(matrix_sign, matrix, matrix_id, threshold, sign_order)
1106 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: matrix_sign
1107 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: matrix
1108 INTEGER,
INTENT(IN),
OPTIONAL :: matrix_id
1109 REAL(kind=dp),
INTENT(IN) :: threshold
1110 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
1112 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dense_matrix_sign_Newton_Schulz'
1114 INTEGER :: handle, i, j, sz, unit_nr
1115 LOGICAL :: converged
1116 REAL(kind=dp) :: frob_matrix, frob_matrix_base, &
1117 gersh_matrix, prefactor, scaling_factor
1118 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp1, tmp2
1119 REAL(kind=dp),
DIMENSION(1) :: work
1120 REAL(kind=dp),
EXTERNAL :: dlange
1121 TYPE(cp_logger_type),
POINTER :: logger
1123 CALL timeset(routinen, handle)
1126 logger => cp_get_default_logger()
1127 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1130 sz =
SIZE(matrix, 1)
1131 frob_matrix = dlange(
'F', sz, sz, matrix, sz, work)
1132 gersh_matrix = dlange(
'1', sz, sz, matrix, sz, work)
1133 scaling_factor = 1/min(frob_matrix, gersh_matrix)
1134 matrix_sign = matrix*scaling_factor
1135 ALLOCATE (tmp1(sz, sz))
1136 ALLOCATE (tmp2(sz, sz))
1140 CALL dgemm(
'N',
'N', sz, sz, sz, -1.0_dp, matrix_sign, sz, matrix_sign, sz, 0.0_dp, tmp1, sz)
1143 frob_matrix_base = dlange(
'F', sz, sz, tmp1, sz, work)
1145 tmp1(j, j) = tmp1(j, j) + 1.0_dp
1147 frob_matrix = dlange(
'F', sz, sz, tmp1, sz, work)
1149 IF (sign_order .EQ. 2)
THEN
1153 tmp1(j, j) = tmp1(j, j) + 2.0_dp
1155 ELSE IF (sign_order .EQ. 3)
THEN
1157 tmp1 = tmp1*4.0_dp/3.0_dp
1159 tmp1(j, j) = tmp1(j, j) + 8.0_dp/3.0_dp
1161 CALL dgemm(
'N',
'N', sz, sz, sz, 1.0_dp, tmp2, sz, tmp2, sz, 1.0_dp, tmp1, sz)
1162 prefactor = 3.0_dp/8.0_dp
1164 cpabort(
"requested order is not implemented.")
1167 CALL dgemm(
'N',
'N', sz, sz, sz, prefactor, matrix_sign, sz, tmp1, sz, 0.0_dp, tmp2, sz)
1171 IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base))
THEN
1172 WRITE (unit_nr,
'(T6,A,1X,I6,1X,A,1X,I3,E12.3)') &
1173 "Submatrix", matrix_id,
"final NS sign iter", i, frob_matrix/frob_matrix_base
1174 CALL m_flush(unit_nr)
1180 IF (.NOT. converged) &
1181 cpabort(
"dense_matrix_sign_Newton_Schulz did not converge within 100 iterations")
1186 CALL timestop(handle)
1188 END SUBROUTINE dense_matrix_sign_newton_schulz
1200 SUBROUTINE eigdecomp(sm, N, eigvals, eigvecs)
1201 INTEGER,
INTENT(IN) :: n
1202 REAL(kind=dp),
INTENT(IN) :: sm(n, n)
1203 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1204 INTENT(OUT) :: eigvals
1205 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
1206 INTENT(OUT) :: eigvecs
1208 INTEGER :: info, liwork, lwork
1209 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: iwork
1210 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: work
1211 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp
1213 ALLOCATE (eigvecs(n, n), tmp(n, n))
1214 ALLOCATE (eigvals(n))
1217 eigvecs(:, :) = 0.5*(sm + transpose(sm))
1224 CALL dsyevd(
'V',
'U', n, eigvecs, n, eigvals, work, lwork, iwork, liwork, info)
1225 lwork = int(work(1))
1226 liwork = int(iwork(1))
1231 ALLOCATE (work(lwork))
1232 ALLOCATE (iwork(liwork))
1233 CALL dsyevd(
'V',
'U', n, eigvecs, n, eigvals, work, lwork, iwork, liwork, info)
1236 IF (info .NE. 0) cpabort(
"dsyevd did not succeed")
1239 END SUBROUTINE eigdecomp
1252 SUBROUTINE sign_from_eigdecomp(sm_sign, eigvals, eigvecs, N, mu_correction)
1254 REAL(kind=dp),
INTENT(IN) :: eigvecs(n, n), eigvals(n)
1255 REAL(kind=dp),
INTENT(INOUT) :: sm_sign(n, n)
1256 REAL(kind=dp),
INTENT(IN) :: mu_correction
1259 REAL(kind=dp) :: modified_eigval, tmp(n, n)
1263 modified_eigval = eigvals(i) - mu_correction
1264 IF (modified_eigval > 0)
THEN
1266 ELSE IF (modified_eigval < 0)
THEN
1267 sm_sign(i, i) = -1.0
1275 CALL dgemm(
'N',
'N', n, n, n, 1.0_dp, eigvecs, n, sm_sign, n, 0.0_dp, tmp, n)
1276 CALL dgemm(
'N',
'T', n, n, n, 1.0_dp, tmp, n, eigvecs, n, 0.0_dp, sm_sign, n)
1277 END SUBROUTINE sign_from_eigdecomp
1291 FUNCTION trace_from_eigdecomp(eigvals, eigvecs, firstcol, lastcol, mu_correction)
RESULT(trace)
1292 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1293 INTENT(IN) :: eigvals
1294 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
1295 INTENT(IN) :: eigvecs
1296 INTEGER,
INTENT(IN) :: firstcol, lastcol
1297 REAL(kind=dp),
INTENT(IN) :: mu_correction
1298 REAL(kind=dp) :: trace
1300 INTEGER :: i, j, sm_size
1301 REAL(kind=dp) :: modified_eigval, tmpsum
1302 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: mapped_eigvals
1304 sm_size =
SIZE(eigvals)
1305 ALLOCATE (mapped_eigvals(sm_size))
1308 modified_eigval = eigvals(i) - mu_correction
1309 IF (modified_eigval > 0)
THEN
1310 mapped_eigvals(i) = 1.0
1311 ELSE IF (modified_eigval < 0)
THEN
1312 mapped_eigvals(i) = -1.0
1314 mapped_eigvals(i) = 0.0
1319 DO i = firstcol, lastcol
1322 tmpsum = tmpsum + (eigvecs(i, j)*mapped_eigvals(j)*eigvecs(i, j))
1324 trace = trace - 0.5_dp*tmpsum + 0.5_dp
1326 END FUNCTION trace_from_eigdecomp
1338 SUBROUTINE dense_matrix_sign_direct(sm_sign, sm, N)
1339 INTEGER,
INTENT(IN) :: n
1340 REAL(kind=dp),
INTENT(IN) :: sm(n, n)
1341 REAL(kind=dp),
INTENT(INOUT) :: sm_sign(n, n)
1343 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: eigvals
1344 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: eigvecs
1346 CALL eigdecomp(sm, n, eigvals, eigvecs)
1347 CALL sign_from_eigdecomp(sm_sign, eigvals, eigvecs, n, 0.0_dp)
1349 DEALLOCATE (eigvals, eigvecs)
1350 END SUBROUTINE dense_matrix_sign_direct
1366 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
1367 REAL(kind=dp),
INTENT(IN) :: threshold
1368 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
1369 INTEGER,
INTENT(IN) :: submatrix_sign_method
1371 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_submatrix'
1373 INTEGER :: handle, i, myrank, nblkcols, order, &
1375 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: my_sms
1376 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: sm, sm_sign
1377 TYPE(cp_logger_type),
POINTER :: logger
1378 TYPE(dbcsr_distribution_type) :: dist
1379 TYPE(submatrix_dissection_type) :: dissection
1381 CALL timeset(routinen, handle)
1384 logger => cp_get_default_logger()
1385 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1387 IF (
PRESENT(sign_order))
THEN
1393 CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist)
1394 CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1396 CALL dissection%init(matrix)
1397 CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1403 DO i = 1,
SIZE(my_sms)
1404 WRITE (unit_nr,
'(T3,A,1X,I4,1X,A,1X,I6)')
"Rank", myrank,
"processing submatrix", my_sms(i)
1405 CALL dissection%generate_submatrix(my_sms(i), sm)
1406 sm_size =
SIZE(sm, 1)
1407 ALLOCATE (sm_sign(sm_size, sm_size))
1408 SELECT CASE (submatrix_sign_method)
1409 CASE (ls_scf_submatrix_sign_ns)
1410 CALL dense_matrix_sign_newton_schulz(sm_sign, sm, my_sms(i), threshold, order)
1411 CASE (ls_scf_submatrix_sign_direct, ls_scf_submatrix_sign_direct_muadj, ls_scf_submatrix_sign_direct_muadj_lowmem)
1412 CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1414 cpabort(
"Unkown submatrix sign method.")
1416 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1417 DEALLOCATE (sm, sm_sign)
1422 CALL dissection%communicate_results(matrix_sign)
1423 CALL dissection%final
1425 CALL timestop(handle)
1443 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
1444 REAL(kind=dp),
INTENT(INOUT) :: mu
1445 INTEGER,
INTENT(IN) :: nelectron
1446 REAL(kind=dp),
INTENT(IN) :: threshold
1447 INTEGER,
INTENT(IN) :: variant
1449 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_submatrix_mu_adjust'
1450 REAL(kind=dp),
PARAMETER :: initial_increment = 0.01_dp
1452 INTEGER :: handle, i, j, myrank, nblkcols, &
1453 sm_firstcol, sm_lastcol, sm_size, &
1455 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: my_sms
1456 LOGICAL :: has_mu_high, has_mu_low
1457 REAL(kind=dp) :: increment, mu_high, mu_low, new_mu, trace
1458 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: sm, sm_sign, tmp
1459 TYPE(cp_logger_type),
POINTER :: logger
1460 TYPE(dbcsr_distribution_type) :: dist
1461 TYPE(eigbuf),
ALLOCATABLE,
DIMENSION(:) :: eigbufs
1462 TYPE(mp_comm_type) :: group
1463 TYPE(submatrix_dissection_type) :: dissection
1465 CALL timeset(routinen, handle)
1468 logger => cp_get_default_logger()
1469 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1471 CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist, group=group)
1472 CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1474 CALL dissection%init(matrix)
1475 CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1477 ALLOCATE (eigbufs(
SIZE(my_sms)))
1486 DO i = 1,
SIZE(my_sms)
1487 CALL dissection%generate_submatrix(my_sms(i), sm)
1488 sm_size =
SIZE(sm, 1)
1489 WRITE (unit_nr, *)
"Rank", myrank,
"processing submatrix", my_sms(i),
"size", sm_size
1491 CALL dissection%get_relevant_sm_columns(my_sms(i), sm_firstcol, sm_lastcol)
1493 IF (variant .EQ. ls_scf_submatrix_sign_direct_muadj)
THEN
1495 CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=eigbufs(i)%eigvecs)
1499 CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=tmp)
1500 ALLOCATE (eigbufs(i)%eigvecs(sm_firstcol:sm_lastcol, 1:sm_size))
1501 eigbufs(i)%eigvecs(:, :) = tmp(sm_firstcol:sm_lastcol, 1:sm_size)
1503 ALLOCATE (sm_sign(sm_size, sm_size))
1504 CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, tmp, sm_size, 0.0_dp)
1505 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1506 DEALLOCATE (sm_sign, tmp)
1510 trace = trace + trace_from_eigdecomp(eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_firstcol, sm_lastcol, 0.0_dp)
1515 has_mu_low = .false.
1516 has_mu_high = .false.
1517 increment = initial_increment
1520 CALL group%sum(trace)
1521 IF (unit_nr > 0)
WRITE (unit_nr,
'(T2,A,1X,F13.9,1X,F15.9)') &
1522 "Density matrix: mu, trace error: ", new_mu, trace - nelectron
1523 IF (abs(trace - nelectron) < 0.5_dp)
EXIT
1524 IF (trace < nelectron)
THEN
1526 new_mu = new_mu + increment
1528 increment = increment*2
1531 new_mu = new_mu - increment
1532 has_mu_high = .true.
1533 increment = increment*2
1536 IF (has_mu_low .AND. has_mu_high)
THEN
1537 new_mu = (mu_low + mu_high)/2
1538 IF (abs(mu_high - mu_low) < threshold)
EXIT
1547 DO j = 1,
SIZE(my_sms)
1548 sm_size =
SIZE(eigbufs(j)%eigvals)
1549 CALL dissection%get_relevant_sm_columns(my_sms(j), sm_firstcol, sm_lastcol)
1550 trace = trace + trace_from_eigdecomp(eigbufs(j)%eigvals, eigbufs(j)%eigvecs, sm_firstcol, sm_lastcol, new_mu - mu)
1557 IF (variant .EQ. ls_scf_submatrix_sign_direct_muadj)
THEN
1562 DO i = 1,
SIZE(my_sms)
1563 WRITE (unit_nr,
'(T3,A,1X,I4,1X,A,1X,I6)')
"Rank", myrank,
"finalizing submatrix", my_sms(i)
1564 sm_size =
SIZE(eigbufs(i)%eigvals)
1565 ALLOCATE (sm_sign(sm_size, sm_size))
1566 CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_size, new_mu - mu)
1567 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1568 DEALLOCATE (sm_sign)
1574 DEALLOCATE (eigbufs)
1577 IF ((variant .EQ. ls_scf_submatrix_sign_direct_muadj_lowmem) .AND. (mu .NE. new_mu))
THEN
1582 DO i = 1,
SIZE(my_sms)
1583 WRITE (unit_nr,
'(T3,A,1X,I4,1X,A,1X,I6)')
"Rank", myrank,
"reprocessing submatrix", my_sms(i)
1584 CALL dissection%generate_submatrix(my_sms(i), sm)
1585 sm_size =
SIZE(sm, 1)
1587 sm(j, j) = sm(j, j) + mu - new_mu
1589 ALLOCATE (sm_sign(sm_size, sm_size))
1590 CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1591 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1592 DEALLOCATE (sm, sm_sign)
1600 CALL dissection%communicate_results(matrix_sign)
1601 CALL dissection%final
1603 CALL timestop(handle)
1625 eps_lanczos, max_iter_lanczos, symmetrize, converged, iounit)
1626 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1627 REAL(kind=dp),
INTENT(IN) :: threshold
1628 INTEGER,
INTENT(IN) :: order
1629 REAL(kind=dp),
INTENT(IN) :: eps_lanczos
1630 INTEGER,
INTENT(IN) :: max_iter_lanczos
1631 LOGICAL,
OPTIONAL :: symmetrize, converged
1632 INTEGER,
INTENT(IN),
OPTIONAL :: iounit
1634 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sqrt_Newton_Schulz'
1636 INTEGER :: handle, i, unit_nr
1637 INTEGER(KIND=int_8) :: flop1, flop2, flop3, flop4, flop5
1638 LOGICAL :: arnoldi_converged, tsym
1639 REAL(kind=dp) :: a, b, c, conv, d, frob_matrix, &
1640 frob_matrix_base, gershgorin_norm, &
1641 max_ev, min_ev, oa, ob, oc, &
1642 occ_matrix, od, scaling, t1, t2
1643 TYPE(cp_logger_type),
POINTER :: logger
1644 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3
1646 CALL timeset(routinen, handle)
1648 IF (
PRESENT(iounit))
THEN
1651 logger => cp_get_default_logger()
1652 IF (logger%para_env%is_source())
THEN
1653 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1659 IF (
PRESENT(converged)) converged = .false.
1660 IF (
PRESENT(symmetrize))
THEN
1667 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1668 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1669 IF (order .GE. 4)
THEN
1670 CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1673 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1674 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1675 CALL dbcsr_filter(matrix_sqrt_inv, threshold)
1676 CALL dbcsr_copy(matrix_sqrt, matrix)
1679 IF (order == 0)
THEN
1681 gershgorin_norm = dbcsr_gershgorin_norm(matrix_sqrt)
1682 frob_matrix = dbcsr_frobenius_norm(matrix_sqrt)
1683 scaling = 1.0_dp/min(frob_matrix, gershgorin_norm)
1688 CALL arnoldi_extremal(matrix_sqrt, max_ev, min_ev, threshold=eps_lanczos, &
1689 max_iter=max_iter_lanczos, converged=arnoldi_converged)
1690 IF (unit_nr > 0)
THEN
1692 WRITE (unit_nr,
'(T6,A,1X,L1,A,E12.3)')
"Lanczos converged: ", arnoldi_converged,
" threshold:", eps_lanczos
1693 WRITE (unit_nr,
'(T6,A,1X,E12.3,E12.3)')
"Est. extremal eigenvalues:", max_ev, min_ev
1694 WRITE (unit_nr,
'(T6,A,1X,E12.3)')
"Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
1698 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1702 CALL dbcsr_scale(matrix_sqrt, scaling)
1703 CALL dbcsr_filter(matrix_sqrt, threshold)
1704 IF (unit_nr > 0)
THEN
1706 WRITE (unit_nr, *)
"Order=", order
1714 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1715 filter_eps=threshold, flop=flop1)
1716 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1717 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1720 frob_matrix = dbcsr_frobenius_norm(tmp1)
1722 flop4 = 0; flop5 = 0
1726 CALL dbcsr_add_on_diag(tmp1, -2.0_dp)
1727 CALL dbcsr_scale(tmp1, -0.5_dp)
1730 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1731 filter_eps=threshold, flop=flop4)
1733 CALL dbcsr_add(tmp1, tmp2, -4.0_dp, 3.0_dp)
1734 CALL dbcsr_add_on_diag(tmp1, 8.0_dp)
1735 CALL dbcsr_scale(tmp1, 0.125_dp)
1738 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1739 filter_eps=threshold, flop=flop4)
1741 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp1, 0.0_dp, tmp3, &
1742 filter_eps=threshold, flop=flop5)
1743 CALL dbcsr_scale(tmp1, -8.0_dp)
1744 CALL dbcsr_add_on_diag(tmp1, 16.0_dp)
1745 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp)
1746 CALL dbcsr_add(tmp1, tmp3, 1.0_dp, -5.0_dp)
1747 CALL dbcsr_scale(tmp1, 1/16.0_dp)
1755 oa = -40.0_dp/35.0_dp
1756 ob = 48.0_dp/35.0_dp
1757 oc = -64.0_dp/35.0_dp
1758 od = 128.0_dp/35.0_dp
1760 b = ob*(a + 1) - oc - a*(a + 1)**2
1761 c = ob - b - a*(a + 1)
1764 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1765 filter_eps=threshold, flop=flop4)
1766 CALL dbcsr_add(tmp2, tmp1, 1.0_dp, a)
1768 CALL dbcsr_copy(tmp3, tmp2)
1769 CALL dbcsr_add(tmp3, tmp1, 1.0_dp, 1.0_dp)
1770 CALL dbcsr_add_on_diag(tmp3, b)
1772 CALL dbcsr_add_on_diag(tmp2, c)
1774 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
1775 filter_eps=threshold, flop=flop5)
1777 CALL dbcsr_add_on_diag(tmp1, d)
1779 CALL dbcsr_scale(tmp1, 35.0_dp/128.0_dp)
1781 cpabort(
"Illegal order value")
1785 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt, tmp1, 0.0_dp, tmp2, &
1786 filter_eps=threshold, flop=flop2)
1788 CALL dbcsr_copy(matrix_sqrt, tmp2)
1791 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, matrix_sqrt_inv, 0.0_dp, tmp2, &
1792 filter_eps=threshold, flop=flop3)
1794 CALL dbcsr_copy(matrix_sqrt_inv, tmp2)
1796 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1801 conv = frob_matrix/frob_matrix_base
1803 IF (unit_nr > 0)
THEN
1804 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"NS sqrt iter ", i, occ_matrix, &
1806 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0e6_dp*max(0.001_dp, t2 - t1))
1807 CALL m_flush(unit_nr)
1810 IF (abnormal_value(conv)) &
1811 cpabort(
"conv is an abnormal value (NaN/Inf).")
1814 IF ((conv*conv) < threshold)
THEN
1815 IF (
PRESENT(converged)) converged = .true.
1823 IF (unit_nr > 0)
THEN
1824 WRITE (unit_nr,
'(T6,A20)')
"Symmetrizing Results"
1826 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
1827 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
1828 CALL dbcsr_transposed(tmp1, matrix_sqrt)
1829 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
1833 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1834 filter_eps=threshold)
1835 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1836 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1837 frob_matrix = dbcsr_frobenius_norm(tmp1)
1838 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1839 IF (unit_nr > 0)
THEN
1840 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final NS sqrt iter ", i, occ_matrix, &
1841 frob_matrix/frob_matrix_base
1842 WRITE (unit_nr,
'()')
1843 CALL m_flush(unit_nr)
1847 CALL dbcsr_scale(matrix_sqrt, 1/sqrt(scaling))
1848 CALL dbcsr_scale(matrix_sqrt_inv, sqrt(scaling))
1850 CALL dbcsr_release(tmp1)
1851 CALL dbcsr_release(tmp2)
1852 IF (order .GE. 4)
THEN
1853 CALL dbcsr_release(tmp3)
1856 CALL timestop(handle)
1877 eps_lanczos, max_iter_lanczos, symmetrize, converged)
1878 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1879 REAL(kind=dp),
INTENT(IN) :: threshold
1880 INTEGER,
INTENT(IN) :: order
1881 REAL(kind=dp),
INTENT(IN) :: eps_lanczos
1882 INTEGER,
INTENT(IN) :: max_iter_lanczos
1883 LOGICAL,
OPTIONAL :: symmetrize, converged
1885 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sqrt_proot'
1887 INTEGER :: choose, handle, i, ii, j, unit_nr
1888 INTEGER(KIND=int_8) :: f, flop1, flop2, flop3, flop4, flop5
1889 LOGICAL :: arnoldi_converged, test, tsym
1890 REAL(kind=dp) :: conv, frob_matrix, frob_matrix_base, &
1891 max_ev, min_ev, occ_matrix, scaling, &
1893 TYPE(cp_logger_type),
POINTER :: logger
1894 TYPE(dbcsr_type) :: bk2a, matrixs, rmat, tmp1, tmp2, tmp3
1896 CALL cite_reference(richters2018)
1900 CALL timeset(routinen, handle)
1902 logger => cp_get_default_logger()
1903 IF (logger%para_env%is_source())
THEN
1904 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1909 IF (
PRESENT(converged)) converged = .false.
1910 IF (
PRESENT(symmetrize))
THEN
1917 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1918 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1919 CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1920 CALL dbcsr_create(rmat, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1921 CALL dbcsr_create(matrixs, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1923 CALL dbcsr_copy(matrixs, matrix)
1926 CALL arnoldi_extremal(matrixs, max_ev, min_ev, threshold=eps_lanczos, &
1927 max_iter=max_iter_lanczos, converged=arnoldi_converged)
1928 IF (unit_nr > 0)
THEN
1930 WRITE (unit_nr,
'(T6,A,1X,L1,A,E12.3)')
"Lanczos converged: ", arnoldi_converged,
" threshold:", eps_lanczos
1931 WRITE (unit_nr,
'(T6,A,1X,E12.3,E12.3)')
"Est. extremal eigenvalues:", max_ev, min_ev
1932 WRITE (unit_nr,
'(T6,A,1X,E12.3)')
"Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
1936 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1937 CALL dbcsr_scale(matrixs, scaling)
1938 CALL dbcsr_filter(matrixs, threshold)
1943 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1944 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1947 IF (unit_nr > 0)
THEN
1949 WRITE (unit_nr, *)
"Order=", order
1957 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp1, &
1958 filter_eps=threshold, flop=flop1)
1959 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrixs, tmp1, 0.0_dp, rmat, &
1960 filter_eps=threshold, flop=flop2)
1961 CALL dbcsr_scale(rmat, -1.0_dp)
1962 CALL dbcsr_add_on_diag(rmat, 1.0_dp)
1964 flop4 = 0; flop5 = 0
1965 CALL dbcsr_set(tmp1, 0.0_dp)
1966 CALL dbcsr_add_on_diag(tmp1, 2.0_dp)
1972 CALL dbcsr_copy(tmp2, rmat)
1975 CALL dbcsr_copy(tmp3, tmp2)
1976 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, rmat, 0.0_dp, tmp2, &
1977 filter_eps=threshold, flop=f)
1980 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
1983 CALL dbcsr_create(bk2a, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1984 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrixs, 0.0_dp, tmp3, &
1985 filter_eps=threshold, flop=flop1)
1986 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, tmp3, 0.0_dp, bk2a, &
1987 filter_eps=threshold, flop=flop2)
1988 CALL dbcsr_copy(rmat, bk2a)
1989 CALL dbcsr_add_on_diag(rmat, -1.0_dp)
1991 CALL dbcsr_set(tmp1, 0.0_dp)
1992 CALL dbcsr_add_on_diag(tmp1, 1.0_dp)
1994 CALL dbcsr_set(tmp2, 0.0_dp)
1995 CALL dbcsr_add_on_diag(tmp2, 1.0_dp)
2000 choose = product((/(ii, ii=1, order)/))/(product((/(ii, ii=1, j)/))*product((/(ii, ii=1, order - j)/)))
2001 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, -1.0_dp*(-1)**j*choose)
2002 IF (j .LT. order)
THEN
2004 CALL dbcsr_copy(tmp3, tmp2)
2005 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, bk2a, 0.0_dp, tmp2, &
2006 filter_eps=threshold, flop=f)
2010 CALL dbcsr_release(bk2a)
2013 CALL dbcsr_copy(tmp3, matrix_sqrt_inv)
2014 CALL dbcsr_multiply(
"N",
"N", 0.5_dp, tmp3, tmp1, 0.0_dp, matrix_sqrt_inv, &
2015 filter_eps=threshold, flop=flop4)
2017 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2022 conv = dbcsr_frobenius_norm(rmat)
2024 IF (unit_nr > 0)
THEN
2025 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"PROOT sqrt iter ", i, occ_matrix, &
2027 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0e6_dp*max(0.001_dp, t2 - t1))
2028 CALL m_flush(unit_nr)
2031 IF (abnormal_value(conv)) &
2032 cpabort(
"conv is an abnormal value (NaN/Inf).")
2035 IF ((conv*conv) < threshold)
THEN
2036 IF (
PRESENT(converged)) converged = .true.
2043 CALL dbcsr_scale(matrix_sqrt_inv, sqrt(scaling))
2044 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrix, 0.0_dp, matrix_sqrt, &
2045 filter_eps=threshold, flop=flop5)
2049 IF (unit_nr > 0)
THEN
2050 WRITE (unit_nr,
'(A20)')
"SYMMETRIZING RESULTS"
2052 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
2053 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
2054 CALL dbcsr_transposed(tmp1, matrix_sqrt)
2055 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
2060 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
2061 filter_eps=threshold)
2062 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2063 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2064 frob_matrix = dbcsr_frobenius_norm(tmp1)
2065 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2066 IF (unit_nr > 0)
THEN
2067 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final PROOT S^{-1/2} S^{1/2}-Eins error ", i, occ_matrix, &
2068 frob_matrix/frob_matrix_base
2069 WRITE (unit_nr,
'()')
2070 CALL m_flush(unit_nr)
2074 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp2, &
2075 filter_eps=threshold)
2076 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, tmp2, matrix, 0.0_dp, tmp1, &
2077 filter_eps=threshold)
2078 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2079 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2080 frob_matrix = dbcsr_frobenius_norm(tmp1)
2081 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2082 IF (unit_nr > 0)
THEN
2083 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final PROOT S^{-1/2} S^{-1/2} S-Eins error ", i, occ_matrix, &
2084 frob_matrix/frob_matrix_base
2085 WRITE (unit_nr,
'()')
2086 CALL m_flush(unit_nr)
2090 CALL dbcsr_release(tmp1)
2091 CALL dbcsr_release(tmp2)
2092 CALL dbcsr_release(tmp3)
2093 CALL dbcsr_release(rmat)
2094 CALL dbcsr_release(matrixs)
2096 CALL timestop(handle)
2109 TYPE(dbcsr_type),
INTENT(INOUT) ::
matrix_exp, matrix
2110 REAL(kind=dp),
INTENT(IN) :: omega, alpha, threshold
2112 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_exponential'
2113 REAL(dp),
PARAMETER :: one = 1.0_dp, toll = 1.e-17_dp, &
2116 INTEGER :: handle, i, k, unit_nr
2117 REAL(dp) :: factorial, norm_c, norm_d, norm_scalar
2118 TYPE(cp_logger_type),
POINTER :: logger
2119 TYPE(dbcsr_type) :: b, b_square, c, d, d_product
2121 CALL timeset(routinen, handle)
2123 logger => cp_get_default_logger()
2124 IF (logger%para_env%is_source())
THEN
2125 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2131 norm_scalar = abs(alpha)*dbcsr_frobenius_norm(matrix)
2136 IF ((norm_scalar/2.0_dp**k) <= one)
EXIT
2141 CALL dbcsr_create(c, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2142 CALL dbcsr_copy(c, matrix)
2143 CALL dbcsr_scale(c, alpha_scalar=alpha/2.0_dp**k)
2145 CALL dbcsr_create(d, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2146 CALL dbcsr_copy(d, c)
2153 CALL dbcsr_create(b, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2154 CALL dbcsr_copy(b, d)
2155 CALL dbcsr_add_on_diag(b, alpha=one)
2160 norm_c = toll*dbcsr_frobenius_norm(matrix)
2163 CALL dbcsr_create(d_product, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2168 CALL dbcsr_multiply(
"N",
"N", one, d, c, &
2169 zero, d_product, filter_eps=threshold)
2172 CALL dbcsr_copy(d, d_product)
2176 CALL dbcsr_add(b, d_product, one, factorial)
2179 norm_d = factorial*dbcsr_frobenius_norm(d)
2180 IF (norm_d < norm_c)
EXIT
2184 CALL dbcsr_create(b_square, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2187 CALL dbcsr_multiply(
"N",
"N", one, b, b, &
2188 zero, b_square, filter_eps=threshold)
2190 CALL dbcsr_copy(b, b_square)
2197 CALL dbcsr_scale(
matrix_exp, alpha_scalar=omega)
2200 CALL dbcsr_release(b)
2201 CALL dbcsr_release(c)
2202 CALL dbcsr_release(d)
2203 CALL dbcsr_release(d_product)
2204 CALL dbcsr_release(b_square)
2206 CALL timestop(handle)
2220 SUBROUTINE purify_mcweeny_orth(matrix_p, threshold, max_steps)
2221 TYPE(dbcsr_type),
DIMENSION(:) :: matrix_p
2222 REAL(KIND=dp) :: threshold
2223 INTEGER :: max_steps
2225 CHARACTER(LEN=*),
PARAMETER :: routineN =
'purify_mcweeny_orth'
2227 INTEGER :: handle, i, ispin, unit_nr
2228 REAL(KIND=dp) :: frob_norm, trace
2229 TYPE(cp_logger_type),
POINTER :: logger
2230 TYPE(dbcsr_type) :: matrix_pp, matrix_tmp
2232 CALL timeset(routinen, handle)
2233 logger => cp_get_default_logger()
2234 IF (logger%para_env%is_source())
THEN
2235 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2240 CALL dbcsr_create(matrix_pp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2241 CALL dbcsr_create(matrix_tmp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2242 CALL dbcsr_trace(matrix_p(1), trace)
2244 DO ispin = 1,
SIZE(matrix_p)
2246 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_p(ispin), matrix_p(ispin), &
2247 0.0_dp, matrix_pp, filter_eps=threshold)
2250 CALL dbcsr_copy(matrix_tmp, matrix_pp)
2251 CALL dbcsr_add(matrix_tmp, matrix_p(ispin), 1.0_dp, -1.0_dp)
2252 frob_norm = dbcsr_frobenius_norm(matrix_tmp)
2253 IF (unit_nr > 0)
WRITE (unit_nr,
'(t3,a,f16.8)')
"McWeeny: Deviation of idempotency", frob_norm
2254 IF (unit_nr > 0)
CALL m_flush(unit_nr)
2257 CALL dbcsr_copy(matrix_tmp, matrix_pp)
2258 CALL dbcsr_multiply(
"N",
"N", -2.0_dp, matrix_pp, matrix_p(ispin), &
2259 3.0_dp, matrix_tmp, filter_eps=threshold)
2260 CALL dbcsr_copy(matrix_p(ispin), matrix_tmp)
2263 IF (frob_norm*frob_norm < trace*threshold)
EXIT
2267 CALL dbcsr_release(matrix_pp)
2268 CALL dbcsr_release(matrix_tmp)
2269 CALL timestop(handle)
2270 END SUBROUTINE purify_mcweeny_orth
2283 SUBROUTINE purify_mcweeny_nonorth(matrix_p, matrix_s, threshold, max_steps)
2284 TYPE(dbcsr_type),
DIMENSION(:) :: matrix_p
2285 TYPE(dbcsr_type) :: matrix_s
2286 REAL(KIND=dp) :: threshold
2287 INTEGER :: max_steps
2289 CHARACTER(LEN=*),
PARAMETER :: routineN =
'purify_mcweeny_nonorth'
2291 INTEGER :: handle, i, ispin, unit_nr
2292 REAL(KIND=dp) :: frob_norm, trace
2293 TYPE(cp_logger_type),
POINTER :: logger
2294 TYPE(dbcsr_type) :: matrix_ps, matrix_psp, matrix_test
2296 CALL timeset(routinen, handle)
2298 logger => cp_get_default_logger()
2299 IF (logger%para_env%is_source())
THEN
2300 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2305 CALL dbcsr_create(matrix_ps, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2306 CALL dbcsr_create(matrix_psp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2307 CALL dbcsr_create(matrix_test, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2309 DO ispin = 1,
SIZE(matrix_p)
2311 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_p(ispin), matrix_s, &
2312 0.0_dp, matrix_ps, filter_eps=threshold)
2313 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_ps, matrix_p(ispin), &
2314 0.0_dp, matrix_psp, filter_eps=threshold)
2315 IF (i == 1)
CALL dbcsr_trace(matrix_ps, trace)
2318 CALL dbcsr_copy(matrix_test, matrix_psp)
2319 CALL dbcsr_add(matrix_test, matrix_p(ispin), 1.0_dp, -1.0_dp)
2320 frob_norm = dbcsr_frobenius_norm(matrix_test)
2321 IF (unit_nr > 0)
WRITE (unit_nr,
'(t3,a,2f16.8)')
"McWeeny: Deviation of idempotency", frob_norm
2322 IF (unit_nr > 0)
CALL m_flush(unit_nr)
2325 CALL dbcsr_copy(matrix_p(ispin), matrix_psp)
2326 CALL dbcsr_multiply(
"N",
"N", -2.0_dp, matrix_ps, matrix_psp, &
2327 3.0_dp, matrix_p(ispin), filter_eps=threshold)
2330 IF (frob_norm*frob_norm < trace*threshold)
EXIT
2334 CALL dbcsr_release(matrix_ps)
2335 CALL dbcsr_release(matrix_psp)
2336 CALL dbcsr_release(matrix_test)
2337 CALL timestop(handle)
2338 END SUBROUTINE purify_mcweeny_nonorth
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
arnoldi iteration using dbcsr
subroutine, public arnoldi_extremal(matrix_a, max_ev, min_ev, converged, threshold, max_iter)
simple wrapper to estimate extremal eigenvalues with arnoldi, using the old lanczos interface this hi...
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public richters2018
subroutine, public dbcsr_transposed(transposed, normal, shallow_data_copy, transpose_distribution, use_distribution)
...
subroutine, public dbcsr_scale(matrix, alpha_scalar)
...
character function, public dbcsr_get_matrix_type(matrix)
...
subroutine, public dbcsr_desymmetrize(matrix_a, matrix_b)
...
subroutine, public dbcsr_copy(matrix_b, matrix_a, name, keep_sparsity, keep_imaginary)
...
subroutine, public dbcsr_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, first_row, last_row, first_column, last_column, first_k, last_k, retain_sparsity, filter_eps, flop)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_filter(matrix, eps)
...
real(kind=dp) function, public dbcsr_get_occupation(matrix)
...
subroutine, public dbcsr_set(matrix, alpha)
...
subroutine, public dbcsr_release(matrix)
...
subroutine, public dbcsr_add(matrix_a, matrix_b, alpha_scalar, beta_scalar)
...
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
subroutine, public dbcsr_set_diag(matrix, diag)
Copies the diagonal elements from the given array into the given matrix.
real(dp) function, public dbcsr_gershgorin_norm(matrix)
Compute the gershgorin norm of a dbcsr matrix.
subroutine, public dbcsr_get_diag(matrix, diag)
Copies the diagonal elements from the given matrix into the given array.
subroutine, public dbcsr_add_on_diag(matrix, alpha)
Adds the given scalar to the diagonal of the matrix. Reserves any missing diagonal blocks.
real(dp) function, public dbcsr_maxabs(matrix)
Compute the maxabs norm of a dbcsr matrix.
subroutine, public dbcsr_trace(matrix, trace)
Computes the trace of the given matrix, also known as the sum of its diagonal elements.
real(dp) function, public dbcsr_frobenius_norm(matrix)
Compute the frobenius norm of a dbcsr matrix.
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Routines useful for iterative matrix calculations.
recursive subroutine, public determinant(matrix, det, threshold)
Computes the determinant of a symmetric positive definite matrix using the trace of the matrix logari...
subroutine, public matrix_sign_newton_schulz(matrix_sign, matrix, threshold, sign_order, iounit)
compute the sign a matrix using Newton-Schulz iterations
subroutine, public matrix_sign_submatrix(matrix_sign, matrix, threshold, sign_order, submatrix_sign_method)
Submatrix method.
subroutine, public invert_taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite diagonally dominant matrix
subroutine, public matrix_exponential(matrix_exp, matrix, omega, alpha, threshold)
...
subroutine, public matrix_sign_submatrix_mu_adjust(matrix_sign, matrix, mu, nelectron, threshold, variant)
Submatrix method with internal adjustment of chemical potential.
subroutine, public invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite matrix by Hotelling's method explicit symmetrization makes this ...
subroutine, public matrix_sign_proot(matrix_sign, matrix, threshold, sign_order)
compute the sign a matrix using the general algorithm for the p-th root of Richters et al....
subroutine, public matrix_sqrt_newton_schulz(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged, iounit)
compute the sqrt of a matrix via the sign function and the corresponding Newton-Schulz iterations the...
subroutine, public matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged)
compute the sqrt of a matrix via the general algorithm for the p-th root of Richters et al....
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition of mathematical constants and functions.
real(kind=dp), dimension(0:maxfac), parameter, public ifac
Collection of simple mathematical functions and subroutines.
logical function, public abnormal_value(a)
determines if a value is not normal (e.g. for Inf and Nan) based on IO to work also under optimizatio...
Routines for calculating a complex matrix exponential.
Interface to the message passing library MPI.
type of a logger, at the moment it contains just a print level starting at which level it should be l...