21 USE dbcsr_api,
ONLY: &
22 dbcsr_add, dbcsr_add_on_diag, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
23 dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_filter, dbcsr_frobenius_norm, &
24 dbcsr_gershgorin_norm, dbcsr_get_diag, dbcsr_get_info, dbcsr_get_matrix_type, &
25 dbcsr_get_occupation, dbcsr_multiply, dbcsr_norm, dbcsr_norm_maxabsnorm, dbcsr_p_type, &
26 dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_set_diag, dbcsr_trace, dbcsr_transposed, &
27 dbcsr_type, dbcsr_type_no_symmetry
40 #include "./base/base_uses.f90"
46 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'iterate_matrix'
49 REAL(KIND=
dp),
DIMENSION(:),
ALLOCATABLE :: eigvals
50 REAL(KIND=
dp),
DIMENSION(:, :),
ALLOCATABLE :: eigvecs
53 INTERFACE purify_mcweeny
54 MODULE PROCEDURE purify_mcweeny_orth, purify_mcweeny_nonorth
81 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix
82 REAL(kind=
dp),
INTENT(INOUT) :: det
83 REAL(kind=
dp),
INTENT(IN) :: threshold
85 CHARACTER(LEN=*),
PARAMETER :: routinen =
'determinant'
87 INTEGER :: handle, i, max_iter_lanczos, nsize, &
88 order_lanczos, sign_iter, unit_nr
89 INTEGER(KIND=int_8) :: flop1
90 INTEGER,
SAVE :: recursion_depth = 0
91 REAL(kind=
dp) :: det0, eps_lanczos, frobnorm, maxnorm, &
92 occ_matrix, t1, t2, trace
93 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: diagonal
94 TYPE(cp_logger_type),
POINTER :: logger
95 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3
97 CALL timeset(routinen, handle)
101 IF (logger%para_env%is_source())
THEN
111 CALL dbcsr_create(tmp1, template=matrix, &
112 matrix_type=dbcsr_type_no_symmetry)
113 CALL dbcsr_create(tmp2, template=matrix, &
114 matrix_type=dbcsr_type_no_symmetry)
115 CALL dbcsr_create(tmp3, template=matrix, &
116 matrix_type=dbcsr_type_no_symmetry)
120 TYPE(mp_comm_type) :: group
121 INTEGER :: group_handle
122 CALL dbcsr_get_info(matrix, nfullrows_total=nsize, group=group_handle)
123 CALL group%set_handle(group_handle)
124 ALLOCATE (diagonal(nsize))
125 CALL dbcsr_get_diag(matrix, diagonal)
126 CALL group%sum(diagonal)
127 det = product(diagonal)
131 diagonal(:) = 1.0_dp/(sqrt(diagonal(:)))
133 CALL dbcsr_desymmetrize(matrix, tmp1)
134 CALL dbcsr_set(tmp1, 0.0_dp)
135 CALL dbcsr_set_diag(tmp1, diagonal)
136 CALL dbcsr_filter(tmp1, threshold)
137 DEALLOCATE (diagonal)
141 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, &
145 filter_eps=threshold)
146 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, &
150 filter_eps=threshold)
153 CALL dbcsr_add_on_diag(tmp2, -1.0_dp)
154 frobnorm = dbcsr_frobenius_norm(tmp2)
155 IF (unit_nr > 0)
THEN
156 IF (recursion_depth .EQ. 0)
THEN
157 WRITE (unit_nr,
'()')
159 WRITE (unit_nr,
'(T6,A28,1X,I15)') &
160 "Recursive iteration:", recursion_depth
162 WRITE (unit_nr,
'(T6,A28,1X,F15.10)') &
163 "Frobenius norm:", frobnorm
167 IF (frobnorm .GE. 1.0_dp)
THEN
169 CALL dbcsr_add_on_diag(tmp2, 1.0_dp)
172 eps_lanczos = 1.0e-4_dp
173 max_iter_lanczos = 40
178 threshold=threshold, &
179 order=order_lanczos, &
180 eps_lanczos=eps_lanczos, &
181 max_iter_lanczos=max_iter_lanczos)
182 recursion_depth = recursion_depth + 1
184 recursion_depth = recursion_depth - 1
190 CALL dbcsr_copy(tmp1, tmp2)
194 IF (unit_nr > 0)
WRITE (unit_nr, *)
205 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp2, &
207 filter_eps=threshold, &
209 CALL dbcsr_copy(tmp1, tmp3)
212 CALL dbcsr_trace(tmp1, trace)
213 trace = trace*sign_iter/(1.0_dp*(i + 1))
214 sign_iter = -sign_iter
219 occ_matrix = dbcsr_get_occupation(tmp1)
220 CALL dbcsr_norm(tmp1, &
221 dbcsr_norm_maxabsnorm, norm_scalar=maxnorm)
225 IF (unit_nr > 0)
THEN
226 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F7.5,F16.10,F10.3,F11.3)') &
227 "Determinant iter", i, occ_matrix, &
229 flop1/(1.0e6_dp*max(0.001_dp, t2 - t1))
234 IF (maxnorm < threshold)
EXIT
238 IF (unit_nr > 0)
THEN
239 WRITE (unit_nr,
'()')
245 IF (unit_nr > 0)
THEN
246 IF (recursion_depth .EQ. 0)
THEN
247 WRITE (unit_nr,
'(T6,A28,1X,F15.10)') &
248 "Final determinant:", det
249 WRITE (unit_nr,
'()')
251 WRITE (unit_nr,
'(T6,A28,1X,F15.10)') &
252 "Recursive determinant:", det
257 CALL dbcsr_release(tmp1)
258 CALL dbcsr_release(tmp2)
259 CALL dbcsr_release(tmp3)
261 CALL timestop(handle)
282 SUBROUTINE invert_taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, &
283 norm_convergence, filter_eps, accelerator_order, &
284 max_iter_lanczos, eps_lanczos, silent)
286 TYPE(dbcsr_type),
INTENT(INOUT),
TARGET :: matrix_inverse, matrix
287 REAL(kind=dp),
INTENT(IN) :: threshold
288 LOGICAL,
INTENT(IN),
OPTIONAL :: use_inv_as_guess
289 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: norm_convergence, filter_eps
290 INTEGER,
INTENT(IN),
OPTIONAL :: accelerator_order, max_iter_lanczos
291 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: eps_lanczos
292 LOGICAL,
INTENT(IN),
OPTIONAL :: silent
294 CHARACTER(LEN=*),
PARAMETER :: routinen =
'invert_Taylor'
296 INTEGER :: accelerator_type, handle, i, &
297 my_max_iter_lanczos, nrows, unit_nr
298 INTEGER(KIND=int_8) :: flop2
299 LOGICAL :: converged, use_inv_guess
300 REAL(kind=dp) :: coeff, convergence, maxnorm_matrix, &
301 my_eps_lanczos, occ_matrix, t1, t2
302 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: p_diagonal
303 TYPE(cp_logger_type),
POINTER :: logger
304 TYPE(dbcsr_type),
TARGET :: tmp1, tmp2, tmp3_sym
306 CALL timeset(routinen, handle)
308 logger => cp_get_default_logger()
309 IF (logger%para_env%is_source())
THEN
310 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
314 IF (
PRESENT(silent))
THEN
315 IF (silent) unit_nr = -1
318 convergence = threshold
319 IF (
PRESENT(norm_convergence)) convergence = norm_convergence
322 IF (
PRESENT(accelerator_order)) accelerator_type = accelerator_order
323 IF (accelerator_type .GT. 1) accelerator_type = 1
325 use_inv_guess = .false.
326 IF (
PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
328 my_max_iter_lanczos = 64
329 my_eps_lanczos = 1.0e-3_dp
330 IF (
PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
331 IF (
PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
333 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
334 CALL dbcsr_create(tmp2, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
335 CALL dbcsr_create(tmp3_sym, template=matrix_inverse)
337 CALL dbcsr_get_info(matrix, nfullrows_total=nrows)
338 ALLOCATE (p_diagonal(nrows))
341 IF (.NOT. use_inv_guess)
THEN
343 SELECT CASE (accelerator_type)
346 CALL dbcsr_desymmetrize(matrix, tmp1)
347 p_diagonal(:) = 0.0_dp
348 CALL dbcsr_set_diag(tmp1, p_diagonal)
351 CALL dbcsr_get_diag(matrix, p_diagonal)
353 IF (p_diagonal(i) .NE. 0.0_dp)
THEN
354 p_diagonal(i) = 1.0_dp/p_diagonal(i)
357 CALL dbcsr_set(matrix_inverse, 0.0_dp)
358 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
359 CALL dbcsr_set_diag(matrix_inverse, p_diagonal)
361 cpabort(
"Illegal accelerator order")
366 cpabort(
"Guess is NYI")
370 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, matrix_inverse, &
371 0.0_dp, tmp2, filter_eps=filter_eps)
373 IF (unit_nr > 0)
WRITE (unit_nr, *)
380 CALL dbcsr_desymmetrize(matrix_inverse, tmp1)
385 coeff = -1.0_dp*coeff
386 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp2, 0.0_dp, &
388 flop=flop2, filter_eps=filter_eps)
390 CALL dbcsr_add(matrix_inverse, tmp3_sym, 1.0_dp, coeff)
391 CALL dbcsr_release(tmp1)
392 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
393 CALL dbcsr_desymmetrize(tmp3_sym, tmp1)
396 CALL dbcsr_norm(tmp3_sym, &
397 dbcsr_norm_maxabsnorm, norm_scalar=maxnorm_matrix)
400 occ_matrix = dbcsr_get_occupation(matrix_inverse)
402 IF (unit_nr > 0)
THEN
403 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"Taylor iter", i, occ_matrix, &
404 maxnorm_matrix, t2 - t1, &
405 flop2/(1.0e6_dp*max(0.001_dp, t2 - t1))
406 CALL m_flush(unit_nr)
409 IF (maxnorm_matrix < convergence)
THEN
419 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix, matrix_inverse, 0.0_dp, tmp1, &
420 filter_eps=filter_eps)
421 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
423 CALL dbcsr_norm(tmp1, dbcsr_norm_maxabsnorm, norm_scalar=maxnorm_matrix)
424 IF (unit_nr > 0)
THEN
425 WRITE (unit_nr,
'(T6,A,E12.5)')
"Final Taylor error", maxnorm_matrix
426 WRITE (unit_nr,
'()')
427 CALL m_flush(unit_nr)
429 IF (maxnorm_matrix > convergence)
THEN
431 IF (unit_nr > 0)
THEN
432 WRITE (unit_nr, *)
'Final convergence check failed'
436 IF (.NOT. converged)
THEN
437 cpabort(
"Taylor inversion did not converge")
440 CALL dbcsr_release(tmp1)
441 CALL dbcsr_release(tmp2)
442 CALL dbcsr_release(tmp3_sym)
444 DEALLOCATE (p_diagonal)
446 CALL timestop(handle)
470 norm_convergence, filter_eps, accelerator_order, &
471 max_iter_lanczos, eps_lanczos, silent)
473 TYPE(dbcsr_type),
INTENT(INOUT),
TARGET :: matrix_inverse, matrix
474 REAL(kind=dp),
INTENT(IN) :: threshold
475 LOGICAL,
INTENT(IN),
OPTIONAL :: use_inv_as_guess
476 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: norm_convergence, filter_eps
477 INTEGER,
INTENT(IN),
OPTIONAL :: accelerator_order, max_iter_lanczos
478 REAL(kind=dp),
INTENT(IN),
OPTIONAL :: eps_lanczos
479 LOGICAL,
INTENT(IN),
OPTIONAL :: silent
481 CHARACTER(LEN=*),
PARAMETER :: routinen =
'invert_Hotelling'
483 INTEGER :: accelerator_type, handle, i, &
484 my_max_iter_lanczos, unit_nr
485 INTEGER(KIND=int_8) :: flop1, flop2
486 LOGICAL :: arnoldi_converged, converged, &
488 REAL(kind=dp) :: convergence, frob_matrix, gershgorin_norm, max_ev, maxnorm_matrix, min_ev, &
489 my_eps_lanczos, my_filter_eps, occ_matrix, scalingf, t1, t2
490 TYPE(cp_logger_type),
POINTER :: logger
491 TYPE(dbcsr_type),
TARGET :: tmp1, tmp2
496 CALL timeset(routinen, handle)
498 logger => cp_get_default_logger()
499 IF (logger%para_env%is_source())
THEN
500 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
504 IF (
PRESENT(silent))
THEN
505 IF (silent) unit_nr = -1
508 convergence = threshold
509 IF (
PRESENT(norm_convergence)) convergence = norm_convergence
512 IF (
PRESENT(accelerator_order)) accelerator_type = accelerator_order
513 IF (accelerator_type .GT. 1) accelerator_type = 1
515 use_inv_guess = .false.
516 IF (
PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
518 my_max_iter_lanczos = 64
519 my_eps_lanczos = 1.0e-3_dp
520 IF (
PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
521 IF (
PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
523 my_filter_eps = threshold
524 IF (
PRESENT(filter_eps)) my_filter_eps = filter_eps
527 IF (.NOT. use_inv_guess)
THEN
529 SELECT CASE (accelerator_type)
531 gershgorin_norm = dbcsr_gershgorin_norm(matrix)
532 frob_matrix = dbcsr_frobenius_norm(matrix)
533 CALL dbcsr_set(matrix_inverse, 0.0_dp)
534 CALL dbcsr_add_on_diag(matrix_inverse, 1/min(gershgorin_norm, frob_matrix))
537 CALL dbcsr_set(matrix_inverse, 0.0_dp)
538 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
540 cpabort(
"Illegal accelerator order")
544 CALL dbcsr_create(tmp1, template=matrix_inverse)
550 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
554 CALL dbcsr_create(tmp2, template=matrix_inverse)
556 IF (unit_nr > 0)
WRITE (unit_nr, *)
561 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_inverse, matrix, &
562 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
564 IF (accelerator_type == 1)
THEN
567 CALL arnoldi_extremal(tmp1, max_ev, min_ev, threshold=my_eps_lanczos, &
568 max_iter=my_max_iter_lanczos, converged=arnoldi_converged)
577 IF (unit_nr > 0)
THEN
579 WRITE (unit_nr,
'(T6,A,1X,L1,A,E12.3)')
"Lanczos converged: ", arnoldi_converged,
" threshold:", my_eps_lanczos
580 WRITE (unit_nr,
'(T6,A,1X,E12.3,E12.3)')
"Est. extremal eigenvalues:", max_ev, min_ev
581 WRITE (unit_nr,
'(T6,A,1X,E12.3)')
"Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
585 scalingf = 1.9_dp/(max_ev + min_ev)
586 CALL dbcsr_scale(tmp1, scalingf)
587 CALL dbcsr_scale(matrix_inverse, scalingf)
588 min_ev = min_ev*scalingf
599 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
600 CALL dbcsr_norm(tmp1, &
601 dbcsr_norm_maxabsnorm, norm_scalar=maxnorm_matrix)
602 CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
605 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, matrix_inverse, 0.0_dp, tmp2, &
606 flop=flop2, filter_eps=my_filter_eps)
608 CALL dbcsr_add(matrix_inverse, tmp2, 2.0_dp, -1.0_dp)
610 CALL dbcsr_filter(matrix_inverse, my_filter_eps)
612 occ_matrix = dbcsr_get_occupation(matrix_inverse)
615 IF (accelerator_type == 1)
THEN
616 min_ev = min_ev*(2.0_dp - min_ev)
617 IF (
PRESENT(norm_convergence)) maxnorm_matrix = abs(min_ev - 1.0_dp)
620 IF (unit_nr > 0)
THEN
621 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"Hotelling iter", i, occ_matrix, &
622 maxnorm_matrix, t2 - t1, &
623 (flop1 + flop2)/(1.0e6_dp*max(0.001_dp, t2 - t1))
624 CALL m_flush(unit_nr)
627 IF (maxnorm_matrix < convergence)
THEN
633 IF (accelerator_type == 1)
THEN
634 min_ev = min_ev*2.0_dp/(min_ev + 1.0_dp)
635 CALL dbcsr_scale(matrix_inverse, 2.0_dp/(min_ev + 1.0_dp))
639 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_inverse, matrix, &
640 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
644 IF (.NOT. converged)
THEN
645 cpabort(
"Hotelling inversion did not converge")
649 IF (dbcsr_get_matrix_type(matrix_inverse) == dbcsr_type_no_symmetry)
THEN
650 CALL dbcsr_transposed(tmp2, matrix_inverse)
651 CALL dbcsr_add(matrix_inverse, tmp2, 0.5_dp, 0.5_dp)
654 IF (unit_nr > 0)
THEN
658 WRITE (unit_nr,
'()')
659 CALL m_flush(unit_nr)
662 CALL dbcsr_release(tmp1)
663 CALL dbcsr_release(tmp2)
665 CALL timestop(handle)
682 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
683 REAL(kind=dp),
INTENT(IN) :: threshold
684 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
686 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_Newton_Schulz'
688 INTEGER :: count, handle, i, order, unit_nr
689 INTEGER(KIND=int_8) :: flops
690 REAL(kind=dp) :: a0, a1, a2, a3, a4, a5, floptot, &
691 frob_matrix, frob_matrix_base, &
692 gersh_matrix, occ_matrix, prefactor, &
694 TYPE(cp_logger_type),
POINTER :: logger
695 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3, tmp4
697 CALL timeset(routinen, handle)
699 logger => cp_get_default_logger()
700 IF (logger%para_env%is_source())
THEN
701 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
706 IF (
PRESENT(sign_order))
THEN
712 CALL dbcsr_create(tmp1, template=matrix_sign)
714 CALL dbcsr_create(tmp2, template=matrix_sign)
715 IF (abs(order) .GE. 4)
THEN
716 CALL dbcsr_create(tmp3, template=matrix_sign)
718 IF (abs(order) .GT. 4)
THEN
719 CALL dbcsr_create(tmp4, template=matrix_sign)
722 CALL dbcsr_copy(matrix_sign, matrix)
723 CALL dbcsr_filter(matrix_sign, threshold)
726 frob_matrix = dbcsr_frobenius_norm(matrix_sign)
727 gersh_matrix = dbcsr_gershgorin_norm(matrix_sign)
728 CALL dbcsr_scale(matrix_sign, 1/min(frob_matrix, gersh_matrix))
730 IF (unit_nr > 0)
WRITE (unit_nr, *)
737 CALL dbcsr_multiply(
"N",
"N", -1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
738 filter_eps=threshold, flop=flops)
739 floptot = floptot + flops
742 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
743 CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
744 frob_matrix = dbcsr_frobenius_norm(tmp1)
776 IF (order .EQ. 2)
THEN
780 CALL dbcsr_add_on_diag(tmp1, +2.0_dp)
781 occ_matrix = dbcsr_get_occupation(matrix_sign)
782 ELSE IF (order .EQ. 3)
THEN
785 CALL dbcsr_copy(tmp2, tmp1)
786 CALL dbcsr_scale(tmp1, 4.0_dp/3.0_dp)
787 CALL dbcsr_add_on_diag(tmp1, 8.0_dp/3.0_dp)
790 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp2, 1.0_dp, tmp1, &
791 filter_eps=threshold, flop=flops)
792 floptot = floptot + flops
793 prefactor = 3.0_dp/8.0_dp
795 ELSE IF (order .EQ. 4)
THEN
798 CALL dbcsr_copy(tmp3, tmp1)
799 CALL dbcsr_scale(tmp1, 8.0_dp/5.0_dp)
800 CALL dbcsr_add_on_diag(tmp1, 16.0_dp/5.0_dp)
803 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
804 filter_eps=threshold, flop=flops)
805 floptot = floptot + flops
807 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp/5.0_dp)
809 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
810 filter_eps=threshold, flop=flops)
811 floptot = floptot + flops
813 prefactor = 5.0_dp/16.0_dp
814 ELSE IF (order .EQ. -5)
THEN
817 CALL dbcsr_copy(tmp3, tmp1)
818 CALL dbcsr_scale(tmp1, 128.0_dp/70.0_dp)
819 CALL dbcsr_add_on_diag(tmp1, 128.0_dp/35.0_dp)
822 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
823 filter_eps=threshold, flop=flops)
824 floptot = floptot + flops
826 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 48.0_dp/35.0_dp)
828 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
829 filter_eps=threshold, flop=flops)
830 floptot = floptot + flops
832 CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 8.0_dp/7.0_dp)
834 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp4, tmp3, 1.0_dp, tmp1, &
835 filter_eps=threshold, flop=flops)
836 floptot = floptot + flops
838 prefactor = 35.0_dp/128.0_dp
839 ELSE IF (order .EQ. 5)
THEN
849 a1 = 23819.0_dp/13720.0_dp
850 a2 = -3734_dp/1715.0_dp
851 a3 = 832591127_dp/188238400.0_dp
855 CALL dbcsr_copy(tmp3, tmp1)
856 CALL dbcsr_add_on_diag(tmp3, a0)
857 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
858 filter_eps=threshold, flop=flops)
859 floptot = floptot + flops
860 CALL dbcsr_add_on_diag(tmp2, a1)
862 CALL dbcsr_add_on_diag(tmp1, a2)
863 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
864 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp2, 0.0_dp, tmp3, &
865 filter_eps=threshold, flop=flops)
866 floptot = floptot + flops
867 CALL dbcsr_add_on_diag(tmp3, a3)
868 CALL dbcsr_copy(tmp1, tmp3)
870 prefactor = 35.0_dp/128.0_dp
871 ELSE IF (order .EQ. 6)
THEN
875 CALL dbcsr_copy(tmp3, tmp1)
876 CALL dbcsr_scale(tmp1, 128.0_dp/63.0_dp)
877 CALL dbcsr_add_on_diag(tmp1, 256.0_dp/63.0_dp)
880 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
881 filter_eps=threshold, flop=flops)
882 floptot = floptot + flops
884 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 32.0_dp/21.0_dp)
886 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
887 filter_eps=threshold, flop=flops)
888 floptot = floptot + flops
890 CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 80.0_dp/63.0_dp)
892 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp4, tmp3, 0.0_dp, tmp2, &
893 filter_eps=threshold, flop=flops)
894 floptot = floptot + flops
896 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 10.0_dp/9.0_dp)
898 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
899 filter_eps=threshold, flop=flops)
900 floptot = floptot + flops
902 prefactor = 63.0_dp/256.0_dp
903 ELSE IF (order .EQ. 7)
THEN
906 a0 = 1.3686502058092053653287666647611728507211996691324048468010382350359929055186612505791532871573242422_dp
907 a1 = 1.7089671854477436685850554669524985556296280184497503489303331821456795715195510972774979091893741568_dp
908 a2 = -1.3231956603546599107833121193066273961757451236778593922555836895814474509732067051246078326118696968_dp
909 a3 = 3.9876642330847931291749479958277754186675336169578593000744380254770411483327581042259415937710270453_dp
910 a4 = -3.7273299006476825027065704937541279833880400042556351139273912137942678919776364526511485025132991667_dp
911 a5 = 4.9369932474103023792021351907971943220607580694533770325967170245194362399287150565595441897740173578_dp
919 CALL dbcsr_copy(tmp3, tmp1)
920 CALL dbcsr_add_on_diag(tmp3, a0)
921 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
922 filter_eps=threshold, flop=flops)
923 floptot = floptot + flops
924 CALL dbcsr_add_on_diag(tmp2, a1)
927 CALL dbcsr_copy(tmp4, tmp1)
928 CALL dbcsr_add_on_diag(tmp4, a2)
929 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp4, tmp2, 0.0_dp, tmp3, &
930 filter_eps=threshold, flop=flops)
931 floptot = floptot + flops
932 CALL dbcsr_add_on_diag(tmp3, a3)
934 CALL dbcsr_add(tmp2, tmp3, 1.0_dp, 1.0_dp)
935 CALL dbcsr_add_on_diag(tmp2, a4)
936 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
937 filter_eps=threshold, flop=flops)
938 floptot = floptot + flops
939 CALL dbcsr_add_on_diag(tmp1, a5)
941 prefactor = 231.0_dp/1024.0_dp
943 cpabort(
"requested order is not implemented.")
947 CALL dbcsr_multiply(
"N",
"N", prefactor, matrix_sign, tmp1, 0.0_dp, tmp2, &
948 filter_eps=threshold, flop=flops)
949 floptot = floptot + flops
953 CALL dbcsr_copy(matrix_sign, tmp2)
956 occ_matrix = dbcsr_get_occupation(matrix_sign)
958 IF (unit_nr > 0)
THEN
959 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"NS sign iter ", i, occ_matrix, &
960 frob_matrix/frob_matrix_base, t2 - t1, &
961 floptot/(1.0e6_dp*max(0.001_dp, t2 - t1))
962 CALL m_flush(unit_nr)
966 IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base))
EXIT
971 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
972 filter_eps=threshold)
973 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
974 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
975 frob_matrix = dbcsr_frobenius_norm(tmp1)
976 occ_matrix = dbcsr_get_occupation(matrix_sign)
977 IF (unit_nr > 0)
THEN
978 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final NS sign iter", i, occ_matrix, &
979 frob_matrix/frob_matrix_base
980 WRITE (unit_nr,
'()')
981 CALL m_flush(unit_nr)
984 CALL dbcsr_release(tmp1)
985 CALL dbcsr_release(tmp2)
986 IF (abs(order) .GE. 4)
THEN
987 CALL dbcsr_release(tmp3)
989 IF (abs(order) .GT. 4)
THEN
990 CALL dbcsr_release(tmp4)
993 CALL timestop(handle)
1010 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
1011 REAL(kind=dp),
INTENT(IN) :: threshold
1012 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
1014 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_proot'
1016 INTEGER :: handle, order, unit_nr
1017 INTEGER(KIND=int_8) :: flop0, flop1, flop2
1018 LOGICAL :: converged, symmetrize
1019 REAL(kind=dp) :: frob_matrix, frob_matrix_base, occ_matrix
1020 TYPE(cp_logger_type),
POINTER :: logger
1021 TYPE(dbcsr_type) :: matrix2, matrix_sqrt, matrix_sqrt_inv, &
1024 CALL cite_reference(richters2018)
1026 CALL timeset(routinen, handle)
1028 logger => cp_get_default_logger()
1029 IF (logger%para_env%is_source())
THEN
1030 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1035 IF (
PRESENT(sign_order))
THEN
1041 CALL dbcsr_create(tmp1, template=matrix_sign)
1043 CALL dbcsr_create(tmp2, template=matrix_sign)
1045 CALL dbcsr_create(matrix2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1046 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix, matrix, 0.0_dp, matrix2, &
1047 filter_eps=threshold, flop=flop0)
1053 IF (unit_nr > 0)
WRITE (unit_nr, *)
1055 CALL dbcsr_create(matrix_sqrt, template=matrix2)
1056 CALL dbcsr_create(matrix_sqrt_inv, template=matrix2)
1057 IF (unit_nr > 0)
WRITE (unit_nr, *)
"Threshold=", threshold
1059 symmetrize = .false.
1060 CALL matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix2, threshold, order, &
1061 0.01_dp, 100, symmetrize, converged)
1065 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix, matrix_sqrt_inv, 0.0_dp, matrix_sign, &
1066 filter_eps=threshold, flop=flop1)
1069 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
1070 filter_eps=threshold, flop=flop2)
1071 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1072 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1073 frob_matrix = dbcsr_frobenius_norm(tmp1)
1074 occ_matrix = dbcsr_get_occupation(matrix_sign)
1075 IF (unit_nr > 0)
THEN
1076 WRITE (unit_nr,
'(T6,A,F10.8,E12.3)')
"Final proot sign iter", occ_matrix, &
1077 frob_matrix/frob_matrix_base
1078 WRITE (unit_nr,
'()')
1079 CALL m_flush(unit_nr)
1082 CALL dbcsr_release(tmp1)
1083 CALL dbcsr_release(tmp2)
1084 CALL dbcsr_release(matrix2)
1085 CALL dbcsr_release(matrix_sqrt)
1086 CALL dbcsr_release(matrix_sqrt_inv)
1088 CALL timestop(handle)
1101 SUBROUTINE dense_matrix_sign_newton_schulz(matrix_sign, matrix, matrix_id, threshold, sign_order)
1103 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: matrix_sign
1104 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: matrix
1105 INTEGER,
INTENT(IN),
OPTIONAL :: matrix_id
1106 REAL(kind=dp),
INTENT(IN) :: threshold
1107 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
1109 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dense_matrix_sign_Newton_Schulz'
1111 INTEGER :: handle, i, j, sz, unit_nr
1112 LOGICAL :: converged
1113 REAL(kind=dp) :: frob_matrix, frob_matrix_base, &
1114 gersh_matrix, prefactor, scaling_factor
1115 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp1, tmp2
1116 REAL(kind=dp),
DIMENSION(1) :: work
1117 REAL(kind=dp),
EXTERNAL :: dlange
1118 TYPE(cp_logger_type),
POINTER :: logger
1120 CALL timeset(routinen, handle)
1123 logger => cp_get_default_logger()
1124 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1127 sz =
SIZE(matrix, 1)
1128 frob_matrix = dlange(
'F', sz, sz, matrix, sz, work)
1129 gersh_matrix = dlange(
'1', sz, sz, matrix, sz, work)
1130 scaling_factor = 1/min(frob_matrix, gersh_matrix)
1131 matrix_sign = matrix*scaling_factor
1132 ALLOCATE (tmp1(sz, sz))
1133 ALLOCATE (tmp2(sz, sz))
1137 CALL dgemm(
'N',
'N', sz, sz, sz, -1.0_dp, matrix_sign, sz, matrix_sign, sz, 0.0_dp, tmp1, sz)
1140 frob_matrix_base = dlange(
'F', sz, sz, tmp1, sz, work)
1142 tmp1(j, j) = tmp1(j, j) + 1.0_dp
1144 frob_matrix = dlange(
'F', sz, sz, tmp1, sz, work)
1146 IF (sign_order .EQ. 2)
THEN
1150 tmp1(j, j) = tmp1(j, j) + 2.0_dp
1152 ELSE IF (sign_order .EQ. 3)
THEN
1154 tmp1 = tmp1*4.0_dp/3.0_dp
1156 tmp1(j, j) = tmp1(j, j) + 8.0_dp/3.0_dp
1158 CALL dgemm(
'N',
'N', sz, sz, sz, 1.0_dp, tmp2, sz, tmp2, sz, 1.0_dp, tmp1, sz)
1159 prefactor = 3.0_dp/8.0_dp
1161 cpabort(
"requested order is not implemented.")
1164 CALL dgemm(
'N',
'N', sz, sz, sz, prefactor, matrix_sign, sz, tmp1, sz, 0.0_dp, tmp2, sz)
1168 IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base))
THEN
1169 WRITE (unit_nr,
'(T6,A,1X,I6,1X,A,1X,I3,E12.3)') &
1170 "Submatrix", matrix_id,
"final NS sign iter", i, frob_matrix/frob_matrix_base
1171 CALL m_flush(unit_nr)
1177 IF (.NOT. converged) &
1178 cpabort(
"dense_matrix_sign_Newton_Schulz did not converge within 100 iterations")
1183 CALL timestop(handle)
1185 END SUBROUTINE dense_matrix_sign_newton_schulz
1197 SUBROUTINE eigdecomp(sm, N, eigvals, eigvecs)
1198 INTEGER,
INTENT(IN) :: n
1199 REAL(kind=dp),
INTENT(IN) :: sm(n, n)
1200 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1201 INTENT(OUT) :: eigvals
1202 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
1203 INTENT(OUT) :: eigvecs
1205 INTEGER :: info, liwork, lwork
1206 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: iwork
1207 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: work
1208 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: tmp
1210 ALLOCATE (eigvecs(n, n), tmp(n, n))
1211 ALLOCATE (eigvals(n))
1214 eigvecs(:, :) = 0.5*(sm + transpose(sm))
1221 CALL dsyevd(
'V',
'U', n, eigvecs, n, eigvals, work, lwork, iwork, liwork, info)
1222 lwork = int(work(1))
1223 liwork = int(iwork(1))
1228 ALLOCATE (work(lwork))
1229 ALLOCATE (iwork(liwork))
1230 CALL dsyevd(
'V',
'U', n, eigvecs, n, eigvals, work, lwork, iwork, liwork, info)
1233 IF (info .NE. 0) cpabort(
"dsyevd did not succeed")
1236 END SUBROUTINE eigdecomp
1249 SUBROUTINE sign_from_eigdecomp(sm_sign, eigvals, eigvecs, N, mu_correction)
1251 REAL(kind=dp),
INTENT(IN) :: eigvecs(n, n), eigvals(n)
1252 REAL(kind=dp),
INTENT(INOUT) :: sm_sign(n, n)
1253 REAL(kind=dp),
INTENT(IN) :: mu_correction
1256 REAL(kind=dp) :: modified_eigval, tmp(n, n)
1260 modified_eigval = eigvals(i) - mu_correction
1261 IF (modified_eigval > 0)
THEN
1263 ELSE IF (modified_eigval < 0)
THEN
1264 sm_sign(i, i) = -1.0
1272 CALL dgemm(
'N',
'N', n, n, n, 1.0_dp, eigvecs, n, sm_sign, n, 0.0_dp, tmp, n)
1273 CALL dgemm(
'N',
'T', n, n, n, 1.0_dp, tmp, n, eigvecs, n, 0.0_dp, sm_sign, n)
1274 END SUBROUTINE sign_from_eigdecomp
1288 FUNCTION trace_from_eigdecomp(eigvals, eigvecs, firstcol, lastcol, mu_correction)
RESULT(trace)
1289 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1290 INTENT(IN) :: eigvals
1291 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
1292 INTENT(IN) :: eigvecs
1293 INTEGER,
INTENT(IN) :: firstcol, lastcol
1294 REAL(kind=dp),
INTENT(IN) :: mu_correction
1295 REAL(kind=dp) :: trace
1297 INTEGER :: i, j, sm_size
1298 REAL(kind=dp) :: modified_eigval, tmpsum
1299 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: mapped_eigvals
1301 sm_size =
SIZE(eigvals)
1302 ALLOCATE (mapped_eigvals(sm_size))
1305 modified_eigval = eigvals(i) - mu_correction
1306 IF (modified_eigval > 0)
THEN
1307 mapped_eigvals(i) = 1.0
1308 ELSE IF (modified_eigval < 0)
THEN
1309 mapped_eigvals(i) = -1.0
1311 mapped_eigvals(i) = 0.0
1316 DO i = firstcol, lastcol
1319 tmpsum = tmpsum + (eigvecs(i, j)*mapped_eigvals(j)*eigvecs(i, j))
1321 trace = trace - 0.5_dp*tmpsum + 0.5_dp
1323 END FUNCTION trace_from_eigdecomp
1335 SUBROUTINE dense_matrix_sign_direct(sm_sign, sm, N)
1336 INTEGER,
INTENT(IN) :: n
1337 REAL(kind=dp),
INTENT(IN) :: sm(n, n)
1338 REAL(kind=dp),
INTENT(INOUT) :: sm_sign(n, n)
1340 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: eigvals
1341 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: eigvecs
1343 CALL eigdecomp(sm, n, eigvals, eigvecs)
1344 CALL sign_from_eigdecomp(sm_sign, eigvals, eigvecs, n, 0.0_dp)
1346 DEALLOCATE (eigvals, eigvecs)
1347 END SUBROUTINE dense_matrix_sign_direct
1363 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
1364 REAL(kind=dp),
INTENT(IN) :: threshold
1365 INTEGER,
INTENT(IN),
OPTIONAL :: sign_order
1366 INTEGER,
INTENT(IN) :: submatrix_sign_method
1368 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_submatrix'
1370 INTEGER :: group, handle, i, myrank, nblkcols, &
1371 order, sm_size, unit_nr
1372 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: my_sms
1373 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: sm, sm_sign
1374 TYPE(cp_logger_type),
POINTER :: logger
1375 TYPE(dbcsr_distribution_type) :: dist
1376 TYPE(submatrix_dissection_type) :: dissection
1378 CALL timeset(routinen, handle)
1381 logger => cp_get_default_logger()
1382 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1384 IF (
PRESENT(sign_order))
THEN
1390 CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist, group=group)
1391 CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1393 CALL dissection%init(matrix)
1394 CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1400 DO i = 1,
SIZE(my_sms)
1401 WRITE (unit_nr,
'(T3,A,1X,I4,1X,A,1X,I6)')
"Rank", myrank,
"processing submatrix", my_sms(i)
1402 CALL dissection%generate_submatrix(my_sms(i), sm)
1403 sm_size =
SIZE(sm, 1)
1404 ALLOCATE (sm_sign(sm_size, sm_size))
1405 SELECT CASE (submatrix_sign_method)
1406 CASE (ls_scf_submatrix_sign_ns)
1407 CALL dense_matrix_sign_newton_schulz(sm_sign, sm, my_sms(i), threshold, order)
1408 CASE (ls_scf_submatrix_sign_direct, ls_scf_submatrix_sign_direct_muadj, ls_scf_submatrix_sign_direct_muadj_lowmem)
1409 CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1411 cpabort(
"Unkown submatrix sign method.")
1413 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1414 DEALLOCATE (sm, sm_sign)
1419 CALL dissection%communicate_results(matrix_sign)
1420 CALL dissection%final
1422 CALL timestop(handle)
1440 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sign, matrix
1441 REAL(kind=dp),
INTENT(INOUT) :: mu
1442 INTEGER,
INTENT(IN) :: nelectron
1443 REAL(kind=dp),
INTENT(IN) :: threshold
1444 INTEGER,
INTENT(IN) :: variant
1446 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sign_submatrix_mu_adjust'
1447 REAL(kind=dp),
PARAMETER :: initial_increment = 0.01_dp
1449 INTEGER :: group_handle, handle, i, j, myrank, &
1450 nblkcols, sm_firstcol, sm_lastcol, &
1452 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: my_sms
1453 LOGICAL :: has_mu_high, has_mu_low
1454 REAL(kind=dp) :: increment, mu_high, mu_low, new_mu, trace
1455 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: sm, sm_sign, tmp
1456 TYPE(cp_logger_type),
POINTER :: logger
1457 TYPE(dbcsr_distribution_type) :: dist
1458 TYPE(eigbuf),
ALLOCATABLE,
DIMENSION(:) :: eigbufs
1459 TYPE(mp_comm_type) :: group
1460 TYPE(submatrix_dissection_type) :: dissection
1462 CALL timeset(routinen, handle)
1465 logger => cp_get_default_logger()
1466 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1468 CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist, group=group_handle)
1469 CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1471 CALL group%set_handle(group_handle)
1473 CALL dissection%init(matrix)
1474 CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1476 ALLOCATE (eigbufs(
SIZE(my_sms)))
1485 DO i = 1,
SIZE(my_sms)
1486 CALL dissection%generate_submatrix(my_sms(i), sm)
1487 sm_size =
SIZE(sm, 1)
1488 WRITE (unit_nr, *)
"Rank", myrank,
"processing submatrix", my_sms(i),
"size", sm_size
1490 CALL dissection%get_relevant_sm_columns(my_sms(i), sm_firstcol, sm_lastcol)
1492 IF (variant .EQ. ls_scf_submatrix_sign_direct_muadj)
THEN
1494 CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=eigbufs(i)%eigvecs)
1498 CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=tmp)
1499 ALLOCATE (eigbufs(i)%eigvecs(sm_firstcol:sm_lastcol, 1:sm_size))
1500 eigbufs(i)%eigvecs(:, :) = tmp(sm_firstcol:sm_lastcol, 1:sm_size)
1502 ALLOCATE (sm_sign(sm_size, sm_size))
1503 CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, tmp, sm_size, 0.0_dp)
1504 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1505 DEALLOCATE (sm_sign, tmp)
1509 trace = trace + trace_from_eigdecomp(eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_firstcol, sm_lastcol, 0.0_dp)
1514 has_mu_low = .false.
1515 has_mu_high = .false.
1516 increment = initial_increment
1519 CALL group%sum(trace)
1520 IF (unit_nr > 0)
WRITE (unit_nr,
'(T2,A,1X,F13.9,1X,F15.9)') &
1521 "Density matrix: mu, trace error: ", new_mu, trace - nelectron
1522 IF (abs(trace - nelectron) < 0.5_dp)
EXIT
1523 IF (trace < nelectron)
THEN
1525 new_mu = new_mu + increment
1527 increment = increment*2
1530 new_mu = new_mu - increment
1531 has_mu_high = .true.
1532 increment = increment*2
1535 IF (has_mu_low .AND. has_mu_high)
THEN
1536 new_mu = (mu_low + mu_high)/2
1537 IF (abs(mu_high - mu_low) < threshold)
EXIT
1546 DO j = 1,
SIZE(my_sms)
1547 sm_size =
SIZE(eigbufs(j)%eigvals)
1548 CALL dissection%get_relevant_sm_columns(my_sms(j), sm_firstcol, sm_lastcol)
1549 trace = trace + trace_from_eigdecomp(eigbufs(j)%eigvals, eigbufs(j)%eigvecs, sm_firstcol, sm_lastcol, new_mu - mu)
1556 IF (variant .EQ. ls_scf_submatrix_sign_direct_muadj)
THEN
1561 DO i = 1,
SIZE(my_sms)
1562 WRITE (unit_nr,
'(T3,A,1X,I4,1X,A,1X,I6)')
"Rank", myrank,
"finalizing submatrix", my_sms(i)
1563 sm_size =
SIZE(eigbufs(i)%eigvals)
1564 ALLOCATE (sm_sign(sm_size, sm_size))
1565 CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_size, new_mu - mu)
1566 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1567 DEALLOCATE (sm_sign)
1573 DEALLOCATE (eigbufs)
1576 IF ((variant .EQ. ls_scf_submatrix_sign_direct_muadj_lowmem) .AND. (mu .NE. new_mu))
THEN
1581 DO i = 1,
SIZE(my_sms)
1582 WRITE (unit_nr,
'(T3,A,1X,I4,1X,A,1X,I6)')
"Rank", myrank,
"reprocessing submatrix", my_sms(i)
1583 CALL dissection%generate_submatrix(my_sms(i), sm)
1584 sm_size =
SIZE(sm, 1)
1586 sm(j, j) = sm(j, j) + mu - new_mu
1588 ALLOCATE (sm_sign(sm_size, sm_size))
1589 CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1590 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1591 DEALLOCATE (sm, sm_sign)
1599 CALL dissection%communicate_results(matrix_sign)
1600 CALL dissection%final
1602 CALL timestop(handle)
1623 eps_lanczos, max_iter_lanczos, symmetrize, converged)
1624 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1625 REAL(kind=dp),
INTENT(IN) :: threshold
1626 INTEGER,
INTENT(IN) :: order
1627 REAL(kind=dp),
INTENT(IN) :: eps_lanczos
1628 INTEGER,
INTENT(IN) :: max_iter_lanczos
1629 LOGICAL,
OPTIONAL :: symmetrize, converged
1631 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sqrt_Newton_Schulz'
1633 INTEGER :: handle, i, unit_nr
1634 INTEGER(KIND=int_8) :: flop1, flop2, flop3, flop4, flop5
1635 LOGICAL :: arnoldi_converged, tsym
1636 REAL(kind=dp) :: a, b, c, conv, d, frob_matrix, &
1637 frob_matrix_base, gershgorin_norm, &
1638 max_ev, min_ev, oa, ob, oc, &
1639 occ_matrix, od, scaling, t1, t2
1640 TYPE(cp_logger_type),
POINTER :: logger
1641 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3
1643 CALL timeset(routinen, handle)
1645 logger => cp_get_default_logger()
1646 IF (logger%para_env%is_source())
THEN
1647 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1652 IF (
PRESENT(converged)) converged = .false.
1653 IF (
PRESENT(symmetrize))
THEN
1660 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1661 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1662 IF (order .GE. 4)
THEN
1663 CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1666 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1667 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1668 CALL dbcsr_filter(matrix_sqrt_inv, threshold)
1669 CALL dbcsr_copy(matrix_sqrt, matrix)
1672 IF (order == 0)
THEN
1674 gershgorin_norm = dbcsr_gershgorin_norm(matrix_sqrt)
1675 frob_matrix = dbcsr_frobenius_norm(matrix_sqrt)
1676 scaling = 1.0_dp/min(frob_matrix, gershgorin_norm)
1681 CALL arnoldi_extremal(matrix_sqrt, max_ev, min_ev, threshold=eps_lanczos, &
1682 max_iter=max_iter_lanczos, converged=arnoldi_converged)
1683 IF (unit_nr > 0)
THEN
1685 WRITE (unit_nr,
'(T6,A,1X,L1,A,E12.3)')
"Lanczos converged: ", arnoldi_converged,
" threshold:", eps_lanczos
1686 WRITE (unit_nr,
'(T6,A,1X,E12.3,E12.3)')
"Est. extremal eigenvalues:", max_ev, min_ev
1687 WRITE (unit_nr,
'(T6,A,1X,E12.3)')
"Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
1691 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1695 CALL dbcsr_scale(matrix_sqrt, scaling)
1696 CALL dbcsr_filter(matrix_sqrt, threshold)
1697 IF (unit_nr > 0)
THEN
1699 WRITE (unit_nr, *)
"Order=", order
1707 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1708 filter_eps=threshold, flop=flop1)
1709 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1710 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1713 frob_matrix = dbcsr_frobenius_norm(tmp1)
1715 flop4 = 0; flop5 = 0
1719 CALL dbcsr_add_on_diag(tmp1, -2.0_dp)
1720 CALL dbcsr_scale(tmp1, -0.5_dp)
1723 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1724 filter_eps=threshold, flop=flop4)
1726 CALL dbcsr_add(tmp1, tmp2, -4.0_dp, 3.0_dp)
1727 CALL dbcsr_add_on_diag(tmp1, 8.0_dp)
1728 CALL dbcsr_scale(tmp1, 0.125_dp)
1731 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1732 filter_eps=threshold, flop=flop4)
1734 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp1, 0.0_dp, tmp3, &
1735 filter_eps=threshold, flop=flop5)
1736 CALL dbcsr_scale(tmp1, -8.0_dp)
1737 CALL dbcsr_add_on_diag(tmp1, 16.0_dp)
1738 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp)
1739 CALL dbcsr_add(tmp1, tmp3, 1.0_dp, -5.0_dp)
1740 CALL dbcsr_scale(tmp1, 1/16.0_dp)
1748 oa = -40.0_dp/35.0_dp
1749 ob = 48.0_dp/35.0_dp
1750 oc = -64.0_dp/35.0_dp
1751 od = 128.0_dp/35.0_dp
1753 b = ob*(a + 1) - oc - a*(a + 1)**2
1754 c = ob - b - a*(a + 1)
1757 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1758 filter_eps=threshold, flop=flop4)
1759 CALL dbcsr_add(tmp2, tmp1, 1.0_dp, a)
1761 CALL dbcsr_copy(tmp3, tmp2)
1762 CALL dbcsr_add(tmp3, tmp1, 1.0_dp, 1.0_dp)
1763 CALL dbcsr_add_on_diag(tmp3, b)
1765 CALL dbcsr_add_on_diag(tmp2, c)
1767 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
1768 filter_eps=threshold, flop=flop5)
1770 CALL dbcsr_add_on_diag(tmp1, d)
1772 CALL dbcsr_scale(tmp1, 35.0_dp/128.0_dp)
1774 cpabort(
"Illegal order value")
1778 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt, tmp1, 0.0_dp, tmp2, &
1779 filter_eps=threshold, flop=flop2)
1781 CALL dbcsr_copy(matrix_sqrt, tmp2)
1784 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp1, matrix_sqrt_inv, 0.0_dp, tmp2, &
1785 filter_eps=threshold, flop=flop3)
1787 CALL dbcsr_copy(matrix_sqrt_inv, tmp2)
1789 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1794 conv = frob_matrix/frob_matrix_base
1796 IF (unit_nr > 0)
THEN
1797 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"NS sqrt iter ", i, occ_matrix, &
1799 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0e6_dp*max(0.001_dp, t2 - t1))
1800 CALL m_flush(unit_nr)
1803 IF (abnormal_value(conv)) &
1804 cpabort(
"conv is an abnormal value (NaN/Inf).")
1807 IF ((conv*conv) < threshold)
THEN
1808 IF (
PRESENT(converged)) converged = .true.
1816 IF (unit_nr > 0)
THEN
1817 WRITE (unit_nr,
'(T6,A20)')
"Symmetrizing Results"
1819 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
1820 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
1821 CALL dbcsr_transposed(tmp1, matrix_sqrt)
1822 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
1826 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1827 filter_eps=threshold)
1828 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1829 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1830 frob_matrix = dbcsr_frobenius_norm(tmp1)
1831 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1832 IF (unit_nr > 0)
THEN
1833 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final NS sqrt iter ", i, occ_matrix, &
1834 frob_matrix/frob_matrix_base
1835 WRITE (unit_nr,
'()')
1836 CALL m_flush(unit_nr)
1840 CALL dbcsr_scale(matrix_sqrt, 1/sqrt(scaling))
1841 CALL dbcsr_scale(matrix_sqrt_inv, sqrt(scaling))
1843 CALL dbcsr_release(tmp1)
1844 CALL dbcsr_release(tmp2)
1845 IF (order .GE. 4)
THEN
1846 CALL dbcsr_release(tmp3)
1849 CALL timestop(handle)
1870 eps_lanczos, max_iter_lanczos, symmetrize, converged)
1871 TYPE(dbcsr_type),
INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1872 REAL(kind=dp),
INTENT(IN) :: threshold
1873 INTEGER,
INTENT(IN) :: order
1874 REAL(kind=dp),
INTENT(IN) :: eps_lanczos
1875 INTEGER,
INTENT(IN) :: max_iter_lanczos
1876 LOGICAL,
OPTIONAL :: symmetrize, converged
1878 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_sqrt_proot'
1880 INTEGER :: choose, handle, i, ii, j, unit_nr
1881 INTEGER(KIND=int_8) :: f, flop1, flop2, flop3, flop4, flop5
1882 LOGICAL :: arnoldi_converged, test, tsym
1883 REAL(kind=dp) :: conv, frob_matrix, frob_matrix_base, &
1884 max_ev, min_ev, occ_matrix, scaling, &
1886 TYPE(cp_logger_type),
POINTER :: logger
1887 TYPE(dbcsr_type) :: bk2a, matrixs, rmat, tmp1, tmp2, tmp3
1889 CALL cite_reference(richters2018)
1893 CALL timeset(routinen, handle)
1895 logger => cp_get_default_logger()
1896 IF (logger%para_env%is_source())
THEN
1897 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1902 IF (
PRESENT(converged)) converged = .false.
1903 IF (
PRESENT(symmetrize))
THEN
1910 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1911 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1912 CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1913 CALL dbcsr_create(rmat, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1914 CALL dbcsr_create(matrixs, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1916 CALL dbcsr_copy(matrixs, matrix)
1919 CALL arnoldi_extremal(matrixs, max_ev, min_ev, threshold=eps_lanczos, &
1920 max_iter=max_iter_lanczos, converged=arnoldi_converged)
1921 IF (unit_nr > 0)
THEN
1923 WRITE (unit_nr,
'(T6,A,1X,L1,A,E12.3)')
"Lanczos converged: ", arnoldi_converged,
" threshold:", eps_lanczos
1924 WRITE (unit_nr,
'(T6,A,1X,E12.3,E12.3)')
"Est. extremal eigenvalues:", max_ev, min_ev
1925 WRITE (unit_nr,
'(T6,A,1X,E12.3)')
"Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
1929 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1930 CALL dbcsr_scale(matrixs, scaling)
1931 CALL dbcsr_filter(matrixs, threshold)
1936 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1937 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1940 IF (unit_nr > 0)
THEN
1942 WRITE (unit_nr, *)
"Order=", order
1950 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp1, &
1951 filter_eps=threshold, flop=flop1)
1952 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrixs, tmp1, 0.0_dp, rmat, &
1953 filter_eps=threshold, flop=flop2)
1954 CALL dbcsr_scale(rmat, -1.0_dp)
1955 CALL dbcsr_add_on_diag(rmat, 1.0_dp)
1957 flop4 = 0; flop5 = 0
1958 CALL dbcsr_set(tmp1, 0.0_dp)
1959 CALL dbcsr_add_on_diag(tmp1, 2.0_dp)
1965 CALL dbcsr_copy(tmp2, rmat)
1968 CALL dbcsr_copy(tmp3, tmp2)
1969 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, rmat, 0.0_dp, tmp2, &
1970 filter_eps=threshold, flop=f)
1973 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
1976 CALL dbcsr_create(bk2a, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1977 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrixs, 0.0_dp, tmp3, &
1978 filter_eps=threshold, flop=flop1)
1979 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, tmp3, 0.0_dp, bk2a, &
1980 filter_eps=threshold, flop=flop2)
1981 CALL dbcsr_copy(rmat, bk2a)
1982 CALL dbcsr_add_on_diag(rmat, -1.0_dp)
1984 CALL dbcsr_set(tmp1, 0.0_dp)
1985 CALL dbcsr_add_on_diag(tmp1, 1.0_dp)
1987 CALL dbcsr_set(tmp2, 0.0_dp)
1988 CALL dbcsr_add_on_diag(tmp2, 1.0_dp)
1993 choose = product((/(ii, ii=1, order)/))/(product((/(ii, ii=1, j)/))*product((/(ii, ii=1, order - j)/)))
1994 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, -1.0_dp*(-1)**j*choose)
1995 IF (j .LT. order)
THEN
1997 CALL dbcsr_copy(tmp3, tmp2)
1998 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, tmp3, bk2a, 0.0_dp, tmp2, &
1999 filter_eps=threshold, flop=f)
2003 CALL dbcsr_release(bk2a)
2006 CALL dbcsr_copy(tmp3, matrix_sqrt_inv)
2007 CALL dbcsr_multiply(
"N",
"N", 0.5_dp, tmp3, tmp1, 0.0_dp, matrix_sqrt_inv, &
2008 filter_eps=threshold, flop=flop4)
2010 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2015 conv = dbcsr_frobenius_norm(rmat)
2017 IF (unit_nr > 0)
THEN
2018 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)')
"PROOT sqrt iter ", i, occ_matrix, &
2020 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0e6_dp*max(0.001_dp, t2 - t1))
2021 CALL m_flush(unit_nr)
2024 IF (abnormal_value(conv)) &
2025 cpabort(
"conv is an abnormal value (NaN/Inf).")
2028 IF ((conv*conv) < threshold)
THEN
2029 IF (
PRESENT(converged)) converged = .true.
2036 CALL dbcsr_scale(matrix_sqrt_inv, sqrt(scaling))
2037 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_sqrt_inv, matrix, 0.0_dp, matrix_sqrt, &
2038 filter_eps=threshold, flop=flop5)
2042 IF (unit_nr > 0)
THEN
2043 WRITE (unit_nr,
'(A20)')
"SYMMETRIZING RESULTS"
2045 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
2046 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
2047 CALL dbcsr_transposed(tmp1, matrix_sqrt)
2048 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
2053 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
2054 filter_eps=threshold)
2055 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2056 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2057 frob_matrix = dbcsr_frobenius_norm(tmp1)
2058 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2059 IF (unit_nr > 0)
THEN
2060 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final PROOT S^{-1/2} S^{1/2}-Eins error ", i, occ_matrix, &
2061 frob_matrix/frob_matrix_base
2062 WRITE (unit_nr,
'()')
2063 CALL m_flush(unit_nr)
2067 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp2, &
2068 filter_eps=threshold)
2069 CALL dbcsr_multiply(
"N",
"N", +1.0_dp, tmp2, matrix, 0.0_dp, tmp1, &
2070 filter_eps=threshold)
2071 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2072 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2073 frob_matrix = dbcsr_frobenius_norm(tmp1)
2074 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2075 IF (unit_nr > 0)
THEN
2076 WRITE (unit_nr,
'(T6,A,1X,I3,1X,F10.8,E12.3)')
"Final PROOT S^{-1/2} S^{-1/2} S-Eins error ", i, occ_matrix, &
2077 frob_matrix/frob_matrix_base
2078 WRITE (unit_nr,
'()')
2079 CALL m_flush(unit_nr)
2083 CALL dbcsr_release(tmp1)
2084 CALL dbcsr_release(tmp2)
2085 CALL dbcsr_release(tmp3)
2086 CALL dbcsr_release(rmat)
2087 CALL dbcsr_release(matrixs)
2089 CALL timestop(handle)
2102 TYPE(dbcsr_type),
INTENT(INOUT) ::
matrix_exp, matrix
2103 REAL(kind=dp),
INTENT(IN) :: omega, alpha, threshold
2105 CHARACTER(LEN=*),
PARAMETER :: routinen =
'matrix_exponential'
2106 REAL(dp),
PARAMETER :: one = 1.0_dp, toll = 1.e-17_dp, &
2109 INTEGER :: handle, i, k, unit_nr
2110 REAL(dp) :: factorial, norm_c, norm_d, norm_scalar
2111 TYPE(cp_logger_type),
POINTER :: logger
2112 TYPE(dbcsr_type) :: b, b_square, c, d, d_product
2114 CALL timeset(routinen, handle)
2116 logger => cp_get_default_logger()
2117 IF (logger%para_env%is_source())
THEN
2118 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2124 norm_scalar = abs(alpha)*dbcsr_frobenius_norm(matrix)
2129 IF ((norm_scalar/2.0_dp**k) <= one)
EXIT
2134 CALL dbcsr_create(c, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2135 CALL dbcsr_copy(c, matrix)
2136 CALL dbcsr_scale(c, alpha_scalar=alpha/2.0_dp**k)
2138 CALL dbcsr_create(d, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2139 CALL dbcsr_copy(d, c)
2146 CALL dbcsr_create(b, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2147 CALL dbcsr_copy(b, d)
2148 CALL dbcsr_add_on_diag(b, alpha_scalar=one)
2153 norm_c = toll*dbcsr_frobenius_norm(matrix)
2156 CALL dbcsr_create(d_product, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2161 CALL dbcsr_multiply(
"N",
"N", one, d, c, &
2162 zero, d_product, filter_eps=threshold)
2165 CALL dbcsr_copy(d, d_product)
2169 CALL dbcsr_add(b, d_product, one, factorial)
2172 norm_d = factorial*dbcsr_frobenius_norm(d)
2173 IF (norm_d < norm_c)
EXIT
2177 CALL dbcsr_create(b_square, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2180 CALL dbcsr_multiply(
"N",
"N", one, b, b, &
2181 zero, b_square, filter_eps=threshold)
2183 CALL dbcsr_copy(b, b_square)
2190 CALL dbcsr_scale(
matrix_exp, alpha_scalar=omega)
2193 CALL dbcsr_release(b)
2194 CALL dbcsr_release(c)
2195 CALL dbcsr_release(d)
2196 CALL dbcsr_release(d_product)
2197 CALL dbcsr_release(b_square)
2199 CALL timestop(handle)
2213 SUBROUTINE purify_mcweeny_orth(matrix_p, threshold, max_steps)
2214 TYPE(dbcsr_type),
DIMENSION(:) :: matrix_p
2215 REAL(kind=dp) :: threshold
2216 INTEGER :: max_steps
2218 CHARACTER(LEN=*),
PARAMETER :: routinen =
'purify_mcweeny_orth'
2220 INTEGER :: handle, i, ispin, unit_nr
2221 REAL(kind=dp) :: frob_norm, trace
2222 TYPE(cp_logger_type),
POINTER :: logger
2223 TYPE(dbcsr_type) :: matrix_pp, matrix_tmp
2225 CALL timeset(routinen, handle)
2226 logger => cp_get_default_logger()
2227 IF (logger%para_env%is_source())
THEN
2228 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2233 CALL dbcsr_create(matrix_pp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2234 CALL dbcsr_create(matrix_tmp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2235 CALL dbcsr_trace(matrix_p(1), trace)
2237 DO ispin = 1,
SIZE(matrix_p)
2239 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_p(ispin), matrix_p(ispin), &
2240 0.0_dp, matrix_pp, filter_eps=threshold)
2243 CALL dbcsr_copy(matrix_tmp, matrix_pp)
2244 CALL dbcsr_add(matrix_tmp, matrix_p(ispin), 1.0_dp, -1.0_dp)
2245 frob_norm = dbcsr_frobenius_norm(matrix_tmp)
2246 IF (unit_nr > 0)
WRITE (unit_nr,
'(t3,a,f16.8)')
"McWeeny: Deviation of idempotency", frob_norm
2247 IF (unit_nr > 0)
CALL m_flush(unit_nr)
2250 CALL dbcsr_copy(matrix_tmp, matrix_pp)
2251 CALL dbcsr_multiply(
"N",
"N", -2.0_dp, matrix_pp, matrix_p(ispin), &
2252 3.0_dp, matrix_tmp, filter_eps=threshold)
2253 CALL dbcsr_copy(matrix_p(ispin), matrix_tmp)
2256 IF (frob_norm*frob_norm < trace*threshold)
EXIT
2260 CALL dbcsr_release(matrix_pp)
2261 CALL dbcsr_release(matrix_tmp)
2262 CALL timestop(handle)
2263 END SUBROUTINE purify_mcweeny_orth
2276 SUBROUTINE purify_mcweeny_nonorth(matrix_p, matrix_s, threshold, max_steps)
2277 TYPE(dbcsr_type),
DIMENSION(:) :: matrix_p
2278 TYPE(dbcsr_type) :: matrix_s
2279 REAL(kind=dp) :: threshold
2280 INTEGER :: max_steps
2282 CHARACTER(LEN=*),
PARAMETER :: routinen =
'purify_mcweeny_nonorth'
2284 INTEGER :: handle, i, ispin, unit_nr
2285 REAL(kind=dp) :: frob_norm, trace
2286 TYPE(cp_logger_type),
POINTER :: logger
2287 TYPE(dbcsr_type) :: matrix_ps, matrix_psp, matrix_test
2289 CALL timeset(routinen, handle)
2291 logger => cp_get_default_logger()
2292 IF (logger%para_env%is_source())
THEN
2293 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2298 CALL dbcsr_create(matrix_ps, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2299 CALL dbcsr_create(matrix_psp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2300 CALL dbcsr_create(matrix_test, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2302 DO ispin = 1,
SIZE(matrix_p)
2304 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_p(ispin), matrix_s, &
2305 0.0_dp, matrix_ps, filter_eps=threshold)
2306 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, matrix_ps, matrix_p(ispin), &
2307 0.0_dp, matrix_psp, filter_eps=threshold)
2308 IF (i == 1)
CALL dbcsr_trace(matrix_ps, trace)
2311 CALL dbcsr_copy(matrix_test, matrix_psp)
2312 CALL dbcsr_add(matrix_test, matrix_p(ispin), 1.0_dp, -1.0_dp)
2313 frob_norm = dbcsr_frobenius_norm(matrix_test)
2314 IF (unit_nr > 0)
WRITE (unit_nr,
'(t3,a,2f16.8)')
"McWeeny: Deviation of idempotency", frob_norm
2315 IF (unit_nr > 0)
CALL m_flush(unit_nr)
2318 CALL dbcsr_copy(matrix_p(ispin), matrix_psp)
2319 CALL dbcsr_multiply(
"N",
"N", -2.0_dp, matrix_ps, matrix_psp, &
2320 3.0_dp, matrix_p(ispin), filter_eps=threshold)
2323 IF (frob_norm*frob_norm < trace*threshold)
EXIT
2327 CALL dbcsr_release(matrix_ps)
2328 CALL dbcsr_release(matrix_psp)
2329 CALL dbcsr_release(matrix_test)
2330 CALL timestop(handle)
2331 END SUBROUTINE purify_mcweeny_nonorth
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
arnoldi iteration using dbcsr
subroutine, public arnoldi_extremal(matrix_a, max_ev, min_ev, converged, threshold, max_iter)
simple wrapper to estimate extremal eigenvalues with arnoldi, using the old lanczos interface this hi...
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public richters2018
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Routines useful for iterative matrix calculations.
recursive subroutine, public determinant(matrix, det, threshold)
Computes the determinant of a symmetric positive definite matrix using the trace of the matrix logari...
subroutine, public matrix_sqrt_newton_schulz(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged)
compute the sqrt of a matrix via the sign function and the corresponding Newton-Schulz iterations the...
subroutine, public matrix_sign_submatrix(matrix_sign, matrix, threshold, sign_order, submatrix_sign_method)
Submatrix method.
subroutine, public invert_taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite diagonally dominant matrix
subroutine, public matrix_exponential(matrix_exp, matrix, omega, alpha, threshold)
...
subroutine, public matrix_sign_submatrix_mu_adjust(matrix_sign, matrix, mu, nelectron, threshold, variant)
Submatrix method with internal adjustment of chemical potential.
subroutine, public invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite matrix by Hotelling's method explicit symmetrization makes this ...
subroutine, public matrix_sign_proot(matrix_sign, matrix, threshold, sign_order)
compute the sign a matrix using the general algorithm for the p-th root of Richters et al....
subroutine, public matrix_sign_newton_schulz(matrix_sign, matrix, threshold, sign_order)
compute the sign a matrix using Newton-Schulz iterations
subroutine, public matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged)
compute the sqrt of a matrix via the general algorithm for the p-th root of Richters et al....
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition of mathematical constants and functions.
real(kind=dp), dimension(0:maxfac), parameter, public ifac
Collection of simple mathematical functions and subroutines.
logical function, public abnormal_value(a)
determines if a value is not normal (e.g. for Inf and Nan) based on IO to work also under optimizatio...
Routines for calculating a complex matrix exponential.
Interface to the message passing library MPI.