(git:374b731)
Loading...
Searching...
No Matches
iterate_matrix.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7! **************************************************************************************************
8!> \brief Routines useful for iterative matrix calculations
9!> \par History
10!> 2010.10 created [Joost VandeVondele]
11!> \author Joost VandeVondele
12! **************************************************************************************************
14 USE arnoldi_api, ONLY: arnoldi_data_type,&
16 USE bibliography, ONLY: richters2018,&
17 cite_reference
21 USE dbcsr_api, ONLY: &
22 dbcsr_add, dbcsr_add_on_diag, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
23 dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_filter, dbcsr_frobenius_norm, &
24 dbcsr_gershgorin_norm, dbcsr_get_diag, dbcsr_get_info, dbcsr_get_matrix_type, &
25 dbcsr_get_occupation, dbcsr_multiply, dbcsr_norm, dbcsr_norm_maxabsnorm, dbcsr_p_type, &
26 dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_set_diag, dbcsr_trace, dbcsr_transposed, &
27 dbcsr_type, dbcsr_type_no_symmetry
32 USE kinds, ONLY: dp,&
33 int_8
34 USE machine, ONLY: m_flush,&
36 USE mathconstants, ONLY: ifac
37 USE mathlib, ONLY: abnormal_value
40#include "./base/base_uses.f90"
41
42 IMPLICIT NONE
43
44 PRIVATE
45
46 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'iterate_matrix'
47
48 TYPE :: eigbuf
49 REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eigvals
50 REAL(KIND=dp), DIMENSION(:, :), ALLOCATABLE :: eigvecs
51 END TYPE eigbuf
52
54 MODULE PROCEDURE purify_mcweeny_orth, purify_mcweeny_nonorth
55 END INTERFACE
56
60
61CONTAINS
62
63! *****************************************************************************
64!> \brief Computes the determinant of a symmetric positive definite matrix
65!> using the trace of the matrix logarithm via Mercator series:
66!> det(A) = det(S)det(I+X)det(S), where S=diag(sqrt(Aii),..,sqrt(Ann))
67!> det(I+X) = Exp(Trace(Ln(I+X)))
68!> Ln(I+X) = X - X^2/2 + X^3/3 - X^4/4 + ..
69!> The series converges only if the Frobenius norm of X is less than 1.
70!> If it is more than one we compute (recursevily) the determinant of
71!> the square root of (I+X).
72!> \param matrix ...
73!> \param det - determinant
74!> \param threshold ...
75!> \par History
76!> 2015.04 created [Rustam Z Khaliullin]
77!> \author Rustam Z. Khaliullin
78! **************************************************************************************************
79 RECURSIVE SUBROUTINE determinant(matrix, det, threshold)
80
81 TYPE(dbcsr_type), INTENT(INOUT) :: matrix
82 REAL(kind=dp), INTENT(INOUT) :: det
83 REAL(kind=dp), INTENT(IN) :: threshold
84
85 CHARACTER(LEN=*), PARAMETER :: routinen = 'determinant'
86
87 INTEGER :: handle, i, max_iter_lanczos, nsize, &
88 order_lanczos, sign_iter, unit_nr
89 INTEGER(KIND=int_8) :: flop1
90 INTEGER, SAVE :: recursion_depth = 0
91 REAL(kind=dp) :: det0, eps_lanczos, frobnorm, maxnorm, &
92 occ_matrix, t1, t2, trace
93 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: diagonal
94 TYPE(cp_logger_type), POINTER :: logger
95 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3
96
97 CALL timeset(routinen, handle)
98
99 ! get a useful output_unit
100 logger => cp_get_default_logger()
101 IF (logger%para_env%is_source()) THEN
102 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
103 ELSE
104 unit_nr = -1
105 END IF
106
107 ! Note: tmp1 and tmp2 have the same matrix type as the
108 ! initial matrix (tmp3 does not have symmetry constraints)
109 ! this might lead to uninteded results with anti-symmetric
110 ! matrices
111 CALL dbcsr_create(tmp1, template=matrix, &
112 matrix_type=dbcsr_type_no_symmetry)
113 CALL dbcsr_create(tmp2, template=matrix, &
114 matrix_type=dbcsr_type_no_symmetry)
115 CALL dbcsr_create(tmp3, template=matrix, &
116 matrix_type=dbcsr_type_no_symmetry)
117
118 ! compute the product of the diagonal elements
119 block
120 TYPE(mp_comm_type) :: group
121 INTEGER :: group_handle
122 CALL dbcsr_get_info(matrix, nfullrows_total=nsize, group=group_handle)
123 CALL group%set_handle(group_handle)
124 ALLOCATE (diagonal(nsize))
125 CALL dbcsr_get_diag(matrix, diagonal)
126 CALL group%sum(diagonal)
127 det = product(diagonal)
128 END block
129
130 ! create diagonal SQRTI matrix
131 diagonal(:) = 1.0_dp/(sqrt(diagonal(:)))
132 !ROLL CALL dbcsr_copy(tmp1,matrix)
133 CALL dbcsr_desymmetrize(matrix, tmp1)
134 CALL dbcsr_set(tmp1, 0.0_dp)
135 CALL dbcsr_set_diag(tmp1, diagonal)
136 CALL dbcsr_filter(tmp1, threshold)
137 DEALLOCATE (diagonal)
138
139 ! normalize the main diagonal, off-diagonal elements are scaled to
140 ! make the norm of the matrix less than 1
141 CALL dbcsr_multiply("N", "N", 1.0_dp, &
142 matrix, &
143 tmp1, &
144 0.0_dp, tmp3, &
145 filter_eps=threshold)
146 CALL dbcsr_multiply("N", "N", 1.0_dp, &
147 tmp1, &
148 tmp3, &
149 0.0_dp, tmp2, &
150 filter_eps=threshold)
151
152 ! subtract the main diagonal to create matrix X
153 CALL dbcsr_add_on_diag(tmp2, -1.0_dp)
154 frobnorm = dbcsr_frobenius_norm(tmp2)
155 IF (unit_nr > 0) THEN
156 IF (recursion_depth .EQ. 0) THEN
157 WRITE (unit_nr, '()')
158 ELSE
159 WRITE (unit_nr, '(T6,A28,1X,I15)') &
160 "Recursive iteration:", recursion_depth
161 END IF
162 WRITE (unit_nr, '(T6,A28,1X,F15.10)') &
163 "Frobenius norm:", frobnorm
164 CALL m_flush(unit_nr)
165 END IF
166
167 IF (frobnorm .GE. 1.0_dp) THEN
168
169 CALL dbcsr_add_on_diag(tmp2, 1.0_dp)
170 ! these controls should be provided as input
171 order_lanczos = 3
172 eps_lanczos = 1.0e-4_dp
173 max_iter_lanczos = 40
175 tmp3, & ! output sqrt
176 tmp1, & ! output sqrti
177 tmp2, & ! input original
178 threshold=threshold, &
179 order=order_lanczos, &
180 eps_lanczos=eps_lanczos, &
181 max_iter_lanczos=max_iter_lanczos)
182 recursion_depth = recursion_depth + 1
183 CALL determinant(tmp3, det0, threshold)
184 recursion_depth = recursion_depth - 1
185 det = det*det0*det0
186
187 ELSE
188
189 ! create accumulator
190 CALL dbcsr_copy(tmp1, tmp2)
191 ! re-create to make use of symmetry
192 !ROLL CALL dbcsr_create(tmp3,template=matrix)
193
194 IF (unit_nr > 0) WRITE (unit_nr, *)
195
196 ! initialize the sign of the term
197 sign_iter = -1
198 DO i = 1, 100
199
200 t1 = m_walltime()
201
202 ! multiply X^i by X
203 ! note that the first iteration evaluates X^2
204 ! because the trace of X^1 is zero by construction
205 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp2, &
206 0.0_dp, tmp3, &
207 filter_eps=threshold, &
208 flop=flop1)
209 CALL dbcsr_copy(tmp1, tmp3)
210
211 ! get trace
212 CALL dbcsr_trace(tmp1, trace)
213 trace = trace*sign_iter/(1.0_dp*(i + 1))
214 sign_iter = -sign_iter
215
216 ! update the determinant
217 det = det*exp(trace)
218
219 occ_matrix = dbcsr_get_occupation(tmp1)
220 CALL dbcsr_norm(tmp1, &
221 dbcsr_norm_maxabsnorm, norm_scalar=maxnorm)
222
223 t2 = m_walltime()
224
225 IF (unit_nr > 0) THEN
226 WRITE (unit_nr, '(T6,A,1X,I3,1X,F7.5,F16.10,F10.3,F11.3)') &
227 "Determinant iter", i, occ_matrix, &
228 det, t2 - t1, &
229 flop1/(1.0e6_dp*max(0.001_dp, t2 - t1))
230 CALL m_flush(unit_nr)
231 END IF
232
233 ! exit if the trace is close to zero
234 IF (maxnorm < threshold) EXIT
235
236 END DO ! end iterations
237
238 IF (unit_nr > 0) THEN
239 WRITE (unit_nr, '()')
240 CALL m_flush(unit_nr)
241 END IF
242
243 END IF ! decide to do sqrt or not
244
245 IF (unit_nr > 0) THEN
246 IF (recursion_depth .EQ. 0) THEN
247 WRITE (unit_nr, '(T6,A28,1X,F15.10)') &
248 "Final determinant:", det
249 WRITE (unit_nr, '()')
250 ELSE
251 WRITE (unit_nr, '(T6,A28,1X,F15.10)') &
252 "Recursive determinant:", det
253 END IF
254 CALL m_flush(unit_nr)
255 END IF
256
257 CALL dbcsr_release(tmp1)
258 CALL dbcsr_release(tmp2)
259 CALL dbcsr_release(tmp3)
260
261 CALL timestop(handle)
262
263 END SUBROUTINE determinant
264
265! **************************************************************************************************
266!> \brief invert a symmetric positive definite diagonally dominant matrix
267!> \param matrix_inverse ...
268!> \param matrix ...
269!> \param threshold convergence threshold nased on the max abs
270!> \param use_inv_as_guess logical whether input can be used as guess for inverse
271!> \param norm_convergence convergence threshold for the 2-norm, useful for approximate solutions
272!> \param filter_eps filter_eps for matrix multiplications, if not passed nothing is filteres
273!> \param accelerator_order ...
274!> \param max_iter_lanczos ...
275!> \param eps_lanczos ...
276!> \param silent ...
277!> \par History
278!> 2010.10 created [Joost VandeVondele]
279!> 2011.10 guess option added [Rustam Z Khaliullin]
280!> \author Joost VandeVondele
281! **************************************************************************************************
282 SUBROUTINE invert_taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, &
283 norm_convergence, filter_eps, accelerator_order, &
284 max_iter_lanczos, eps_lanczos, silent)
285
286 TYPE(dbcsr_type), INTENT(INOUT), TARGET :: matrix_inverse, matrix
287 REAL(kind=dp), INTENT(IN) :: threshold
288 LOGICAL, INTENT(IN), OPTIONAL :: use_inv_as_guess
289 REAL(kind=dp), INTENT(IN), OPTIONAL :: norm_convergence, filter_eps
290 INTEGER, INTENT(IN), OPTIONAL :: accelerator_order, max_iter_lanczos
291 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_lanczos
292 LOGICAL, INTENT(IN), OPTIONAL :: silent
293
294 CHARACTER(LEN=*), PARAMETER :: routinen = 'invert_Taylor'
295
296 INTEGER :: accelerator_type, handle, i, &
297 my_max_iter_lanczos, nrows, unit_nr
298 INTEGER(KIND=int_8) :: flop2
299 LOGICAL :: converged, use_inv_guess
300 REAL(kind=dp) :: coeff, convergence, maxnorm_matrix, &
301 my_eps_lanczos, occ_matrix, t1, t2
302 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: p_diagonal
303 TYPE(cp_logger_type), POINTER :: logger
304 TYPE(dbcsr_type), TARGET :: tmp1, tmp2, tmp3_sym
305
306 CALL timeset(routinen, handle)
307
308 logger => cp_get_default_logger()
309 IF (logger%para_env%is_source()) THEN
310 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
311 ELSE
312 unit_nr = -1
313 END IF
314 IF (PRESENT(silent)) THEN
315 IF (silent) unit_nr = -1
316 END IF
317
318 convergence = threshold
319 IF (PRESENT(norm_convergence)) convergence = norm_convergence
320
321 accelerator_type = 0
322 IF (PRESENT(accelerator_order)) accelerator_type = accelerator_order
323 IF (accelerator_type .GT. 1) accelerator_type = 1
324
325 use_inv_guess = .false.
326 IF (PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
327
328 my_max_iter_lanczos = 64
329 my_eps_lanczos = 1.0e-3_dp
330 IF (PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
331 IF (PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
332
333 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
334 CALL dbcsr_create(tmp2, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
335 CALL dbcsr_create(tmp3_sym, template=matrix_inverse)
336
337 CALL dbcsr_get_info(matrix, nfullrows_total=nrows)
338 ALLOCATE (p_diagonal(nrows))
339
340 ! generate the initial guess
341 IF (.NOT. use_inv_guess) THEN
342
343 SELECT CASE (accelerator_type)
344 CASE (0)
345 ! use tmp1 to hold off-diagonal elements
346 CALL dbcsr_desymmetrize(matrix, tmp1)
347 p_diagonal(:) = 0.0_dp
348 CALL dbcsr_set_diag(tmp1, p_diagonal)
349 !CALL dbcsr_print(tmp1)
350 ! invert the main diagonal
351 CALL dbcsr_get_diag(matrix, p_diagonal)
352 DO i = 1, nrows
353 IF (p_diagonal(i) .NE. 0.0_dp) THEN
354 p_diagonal(i) = 1.0_dp/p_diagonal(i)
355 END IF
356 END DO
357 CALL dbcsr_set(matrix_inverse, 0.0_dp)
358 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
359 CALL dbcsr_set_diag(matrix_inverse, p_diagonal)
360 CASE DEFAULT
361 cpabort("Illegal accelerator order")
362 END SELECT
363
364 ELSE
365
366 cpabort("Guess is NYI")
367
368 END IF
369
370 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, matrix_inverse, &
371 0.0_dp, tmp2, filter_eps=filter_eps)
372
373 IF (unit_nr > 0) WRITE (unit_nr, *)
374
375 ! scale the approximate inverse to be within the convergence radius
376 t1 = m_walltime()
377
378 ! done with the initial guess, start iterations
379 converged = .false.
380 CALL dbcsr_desymmetrize(matrix_inverse, tmp1)
381 coeff = 1.0_dp
382 DO i = 1, 100
383
384 ! coeff = +/- 1
385 coeff = -1.0_dp*coeff
386 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp2, 0.0_dp, &
387 tmp3_sym, &
388 flop=flop2, filter_eps=filter_eps)
389 !flop=flop2)
390 CALL dbcsr_add(matrix_inverse, tmp3_sym, 1.0_dp, coeff)
391 CALL dbcsr_release(tmp1)
392 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
393 CALL dbcsr_desymmetrize(tmp3_sym, tmp1)
394
395 ! for the convergence check
396 CALL dbcsr_norm(tmp3_sym, &
397 dbcsr_norm_maxabsnorm, norm_scalar=maxnorm_matrix)
398
399 t2 = m_walltime()
400 occ_matrix = dbcsr_get_occupation(matrix_inverse)
401
402 IF (unit_nr > 0) THEN
403 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "Taylor iter", i, occ_matrix, &
404 maxnorm_matrix, t2 - t1, &
405 flop2/(1.0e6_dp*max(0.001_dp, t2 - t1))
406 CALL m_flush(unit_nr)
407 END IF
408
409 IF (maxnorm_matrix < convergence) THEN
410 converged = .true.
411 EXIT
412 END IF
413
414 t1 = m_walltime()
415
416 END DO
417
418 !last convergence check
419 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix_inverse, 0.0_dp, tmp1, &
420 filter_eps=filter_eps)
421 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
422 !frob_matrix = dbcsr_frobenius_norm(tmp1)
423 CALL dbcsr_norm(tmp1, dbcsr_norm_maxabsnorm, norm_scalar=maxnorm_matrix)
424 IF (unit_nr > 0) THEN
425 WRITE (unit_nr, '(T6,A,E12.5)') "Final Taylor error", maxnorm_matrix
426 WRITE (unit_nr, '()')
427 CALL m_flush(unit_nr)
428 END IF
429 IF (maxnorm_matrix > convergence) THEN
430 converged = .false.
431 IF (unit_nr > 0) THEN
432 WRITE (unit_nr, *) 'Final convergence check failed'
433 END IF
434 END IF
435
436 IF (.NOT. converged) THEN
437 cpabort("Taylor inversion did not converge")
438 END IF
439
440 CALL dbcsr_release(tmp1)
441 CALL dbcsr_release(tmp2)
442 CALL dbcsr_release(tmp3_sym)
443
444 DEALLOCATE (p_diagonal)
445
446 CALL timestop(handle)
447
448 END SUBROUTINE invert_taylor
449
450! **************************************************************************************************
451!> \brief invert a symmetric positive definite matrix by Hotelling's method
452!> explicit symmetrization makes this code not suitable for other matrix types
453!> Currently a bit messy with the options, to to be cleaned soon
454!> \param matrix_inverse ...
455!> \param matrix ...
456!> \param threshold convergence threshold nased on the max abs
457!> \param use_inv_as_guess logical whether input can be used as guess for inverse
458!> \param norm_convergence convergence threshold for the 2-norm, useful for approximate solutions
459!> \param filter_eps filter_eps for matrix multiplications, if not passed nothing is filteres
460!> \param accelerator_order ...
461!> \param max_iter_lanczos ...
462!> \param eps_lanczos ...
463!> \param silent ...
464!> \par History
465!> 2010.10 created [Joost VandeVondele]
466!> 2011.10 guess option added [Rustam Z Khaliullin]
467!> \author Joost VandeVondele
468! **************************************************************************************************
469 SUBROUTINE invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, &
470 norm_convergence, filter_eps, accelerator_order, &
471 max_iter_lanczos, eps_lanczos, silent)
472
473 TYPE(dbcsr_type), INTENT(INOUT), TARGET :: matrix_inverse, matrix
474 REAL(kind=dp), INTENT(IN) :: threshold
475 LOGICAL, INTENT(IN), OPTIONAL :: use_inv_as_guess
476 REAL(kind=dp), INTENT(IN), OPTIONAL :: norm_convergence, filter_eps
477 INTEGER, INTENT(IN), OPTIONAL :: accelerator_order, max_iter_lanczos
478 REAL(kind=dp), INTENT(IN), OPTIONAL :: eps_lanczos
479 LOGICAL, INTENT(IN), OPTIONAL :: silent
480
481 CHARACTER(LEN=*), PARAMETER :: routinen = 'invert_Hotelling'
482
483 INTEGER :: accelerator_type, handle, i, &
484 my_max_iter_lanczos, unit_nr
485 INTEGER(KIND=int_8) :: flop1, flop2
486 LOGICAL :: arnoldi_converged, converged, &
487 use_inv_guess
488 REAL(kind=dp) :: convergence, frob_matrix, gershgorin_norm, max_ev, maxnorm_matrix, min_ev, &
489 my_eps_lanczos, my_filter_eps, occ_matrix, scalingf, t1, t2
490 TYPE(cp_logger_type), POINTER :: logger
491 TYPE(dbcsr_type), TARGET :: tmp1, tmp2
492
493 !TYPE(arnoldi_data_type) :: my_arnoldi
494 !TYPE(dbcsr_p_type), DIMENSION(1) :: mymat
495
496 CALL timeset(routinen, handle)
497
498 logger => cp_get_default_logger()
499 IF (logger%para_env%is_source()) THEN
500 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
501 ELSE
502 unit_nr = -1
503 END IF
504 IF (PRESENT(silent)) THEN
505 IF (silent) unit_nr = -1
506 END IF
507
508 convergence = threshold
509 IF (PRESENT(norm_convergence)) convergence = norm_convergence
510
511 accelerator_type = 1
512 IF (PRESENT(accelerator_order)) accelerator_type = accelerator_order
513 IF (accelerator_type .GT. 1) accelerator_type = 1
514
515 use_inv_guess = .false.
516 IF (PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
517
518 my_max_iter_lanczos = 64
519 my_eps_lanczos = 1.0e-3_dp
520 IF (PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
521 IF (PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
522
523 my_filter_eps = threshold
524 IF (PRESENT(filter_eps)) my_filter_eps = filter_eps
525
526 ! generate the initial guess
527 IF (.NOT. use_inv_guess) THEN
528
529 SELECT CASE (accelerator_type)
530 CASE (0)
531 gershgorin_norm = dbcsr_gershgorin_norm(matrix)
532 frob_matrix = dbcsr_frobenius_norm(matrix)
533 CALL dbcsr_set(matrix_inverse, 0.0_dp)
534 CALL dbcsr_add_on_diag(matrix_inverse, 1/min(gershgorin_norm, frob_matrix))
535 CASE (1)
536 ! initialize matrix to unity and use arnoldi (below) to scale it into the convergence range
537 CALL dbcsr_set(matrix_inverse, 0.0_dp)
538 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
539 CASE DEFAULT
540 cpabort("Illegal accelerator order")
541 END SELECT
542
543 ! everything commutes, therefore our all products will be symmetric
544 CALL dbcsr_create(tmp1, template=matrix_inverse)
545
546 ELSE
547
548 ! It is unlikely that our guess will commute with the matrix, therefore the first product will
549 ! be non symmetric
550 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
551
552 END IF
553
554 CALL dbcsr_create(tmp2, template=matrix_inverse)
555
556 IF (unit_nr > 0) WRITE (unit_nr, *)
557
558 ! scale the approximate inverse to be within the convergence radius
559 t1 = m_walltime()
560
561 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_inverse, matrix, &
562 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
563
564 IF (accelerator_type == 1) THEN
565
566 ! scale the matrix to get into the convergence range
567 CALL arnoldi_extremal(tmp1, max_ev, min_ev, threshold=my_eps_lanczos, &
568 max_iter=my_max_iter_lanczos, converged=arnoldi_converged)
569 !mymat(1)%matrix => tmp1
570 !CALL setup_arnoldi_data(my_arnoldi, mymat, max_iter=30, threshold=1.0E-3_dp, selection_crit=1, &
571 ! nval_request=2, nrestarts=2, generalized_ev=.FALSE., iram=.TRUE.)
572 !CALL arnoldi_ev(mymat, my_arnoldi)
573 !max_eV = REAL(get_selected_ritz_val(my_arnoldi, 2), dp)
574 !min_eV = REAL(get_selected_ritz_val(my_arnoldi, 1), dp)
575 !CALL deallocate_arnoldi_data(my_arnoldi)
576
577 IF (unit_nr > 0) THEN
578 WRITE (unit_nr, *)
579 WRITE (unit_nr, '(T6,A,1X,L1,A,E12.3)') "Lanczos converged: ", arnoldi_converged, " threshold:", my_eps_lanczos
580 WRITE (unit_nr, '(T6,A,1X,E12.3,E12.3)') "Est. extremal eigenvalues:", max_ev, min_ev
581 WRITE (unit_nr, '(T6,A,1X,E12.3)') "Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
582 END IF
583
584 ! 2.0 would be the correct scaling however, we should make sure here, that we are in the convergence radius
585 scalingf = 1.9_dp/(max_ev + min_ev)
586 CALL dbcsr_scale(tmp1, scalingf)
587 CALL dbcsr_scale(matrix_inverse, scalingf)
588 min_ev = min_ev*scalingf
589
590 END IF
591
592 ! done with the initial guess, start iterations
593 converged = .false.
594 DO i = 1, 100
595
596 ! tmp1 = S^-1 S
597
598 ! for the convergence check
599 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
600 CALL dbcsr_norm(tmp1, &
601 dbcsr_norm_maxabsnorm, norm_scalar=maxnorm_matrix)
602 CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
603
604 ! tmp2 = S^-1 S S^-1
605 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, matrix_inverse, 0.0_dp, tmp2, &
606 flop=flop2, filter_eps=my_filter_eps)
607 ! S^-1_{n+1} = 2 S^-1 - S^-1 S S^-1
608 CALL dbcsr_add(matrix_inverse, tmp2, 2.0_dp, -1.0_dp)
609
610 CALL dbcsr_filter(matrix_inverse, my_filter_eps)
611 t2 = m_walltime()
612 occ_matrix = dbcsr_get_occupation(matrix_inverse)
613
614 ! use the scalar form of the algorithm to trace the EV
615 IF (accelerator_type == 1) THEN
616 min_ev = min_ev*(2.0_dp - min_ev)
617 IF (PRESENT(norm_convergence)) maxnorm_matrix = abs(min_ev - 1.0_dp)
618 END IF
619
620 IF (unit_nr > 0) THEN
621 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "Hotelling iter", i, occ_matrix, &
622 maxnorm_matrix, t2 - t1, &
623 (flop1 + flop2)/(1.0e6_dp*max(0.001_dp, t2 - t1))
624 CALL m_flush(unit_nr)
625 END IF
626
627 IF (maxnorm_matrix < convergence) THEN
628 converged = .true.
629 EXIT
630 END IF
631
632 ! scale the matrix for improved convergence
633 IF (accelerator_type == 1) THEN
634 min_ev = min_ev*2.0_dp/(min_ev + 1.0_dp)
635 CALL dbcsr_scale(matrix_inverse, 2.0_dp/(min_ev + 1.0_dp))
636 END IF
637
638 t1 = m_walltime()
639 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_inverse, matrix, &
640 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
641
642 END DO
643
644 IF (.NOT. converged) THEN
645 cpabort("Hotelling inversion did not converge")
646 END IF
647
648 ! try to symmetrize the output matrix
649 IF (dbcsr_get_matrix_type(matrix_inverse) == dbcsr_type_no_symmetry) THEN
650 CALL dbcsr_transposed(tmp2, matrix_inverse)
651 CALL dbcsr_add(matrix_inverse, tmp2, 0.5_dp, 0.5_dp)
652 END IF
653
654 IF (unit_nr > 0) THEN
655! WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.8,E12.3)') "Final Hotelling ",i,occ_matrix,&
656! !frob_matrix/frob_matrix_base
657! maxnorm_matrix
658 WRITE (unit_nr, '()')
659 CALL m_flush(unit_nr)
660 END IF
661
662 CALL dbcsr_release(tmp1)
663 CALL dbcsr_release(tmp2)
664
665 CALL timestop(handle)
666
667 END SUBROUTINE invert_hotelling
668
669! **************************************************************************************************
670!> \brief compute the sign a matrix using Newton-Schulz iterations
671!> \param matrix_sign ...
672!> \param matrix ...
673!> \param threshold ...
674!> \param sign_order ...
675!> \par History
676!> 2010.10 created [Joost VandeVondele]
677!> 2019.05 extended to order byxond 2 [Robert Schade]
678!> \author Joost VandeVondele, Robert Schade
679! **************************************************************************************************
680 SUBROUTINE matrix_sign_newton_schulz(matrix_sign, matrix, threshold, sign_order)
681
682 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix
683 REAL(kind=dp), INTENT(IN) :: threshold
684 INTEGER, INTENT(IN), OPTIONAL :: sign_order
685
686 CHARACTER(LEN=*), PARAMETER :: routinen = 'matrix_sign_Newton_Schulz'
687
688 INTEGER :: count, handle, i, order, unit_nr
689 INTEGER(KIND=int_8) :: flops
690 REAL(kind=dp) :: a0, a1, a2, a3, a4, a5, floptot, &
691 frob_matrix, frob_matrix_base, &
692 gersh_matrix, occ_matrix, prefactor, &
693 t1, t2
694 TYPE(cp_logger_type), POINTER :: logger
695 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3, tmp4
696
697 CALL timeset(routinen, handle)
698
699 logger => cp_get_default_logger()
700 IF (logger%para_env%is_source()) THEN
701 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
702 ELSE
703 unit_nr = -1
704 END IF
705
706 IF (PRESENT(sign_order)) THEN
707 order = sign_order
708 ELSE
709 order = 2
710 END IF
711
712 CALL dbcsr_create(tmp1, template=matrix_sign)
713
714 CALL dbcsr_create(tmp2, template=matrix_sign)
715 IF (abs(order) .GE. 4) THEN
716 CALL dbcsr_create(tmp3, template=matrix_sign)
717 END IF
718 IF (abs(order) .GT. 4) THEN
719 CALL dbcsr_create(tmp4, template=matrix_sign)
720 END IF
721
722 CALL dbcsr_copy(matrix_sign, matrix)
723 CALL dbcsr_filter(matrix_sign, threshold)
724
725 ! scale the matrix to get into the convergence range
726 frob_matrix = dbcsr_frobenius_norm(matrix_sign)
727 gersh_matrix = dbcsr_gershgorin_norm(matrix_sign)
728 CALL dbcsr_scale(matrix_sign, 1/min(frob_matrix, gersh_matrix))
729
730 IF (unit_nr > 0) WRITE (unit_nr, *)
731
732 count = 0
733 DO i = 1, 100
734 floptot = 0_dp
735 t1 = m_walltime()
736 ! tmp1 = X * X
737 CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
738 filter_eps=threshold, flop=flops)
739 floptot = floptot + flops
740
741 ! check convergence (frob norm of what should be the identity matrix minus identity matrix)
742 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
743 CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
744 frob_matrix = dbcsr_frobenius_norm(tmp1)
745
746 ! f(y) approx 1/sqrt(1-y)
747 ! f(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4+63/256*y^5+231/1024*y^6
748 ! f2(y)=1+y/2=1/2*(2+y)
749 ! f3(y)=1+y/2+3/8*y^2=3/8*(8/3+4/3*y+y^2)
750 ! f4(y)=1+y/2+3/8*y^2+5/16*y^3=5/16*(16/5+8/5*y+6/5*y^2+y^3)
751 ! f5(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4=35/128*(128/35+128/70*y+48/35*y^2+8/7*y^3+y^4)
752 ! z(y)=(y+a_0)*y+a_1
753 ! f5(y)=35/128*((z(y)+y+a_2)*z(y)+a_3)
754 ! =35/128*((a_1^2+a_1a_2+a_3)+(2*a_0a_1+a_1+a_0a_2)y+(a_0^2+a_0+2a_1+a_2)y^2+(2a_0+1)y^3+y^4)
755 ! a_0=1/14
756 ! a_1=23819/13720
757 ! a_2=1269/980-2a_1=-3734/1715
758 ! a_3=832591127/188238400
759 ! f6(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4+63/256*y^5
760 ! =63/256*(256/63 + (128 y)/63 + (32 y^2)/21 + (80 y^3)/63 + (10 y^4)/9 + y^5)
761 ! f7(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4+63/256*y^5+231/1024*y^6
762 ! =231/1024*(1024/231+512/231*y+128/77*y^2+320/231*y^3+40/33*y^4+12/11*y^5+y^6)
763 ! z(y)=(y+a_0)*y+a_1
764 ! w(y)=(y+a_2)*z(y)+a_3
765 ! f7(y)=(w(y)+z(y)+a_4)*w(y)+a_5
766 ! a_0= 1.3686502058092053653287666647611728507211996691324048468010382350359929055186612505791532871573242422
767 ! a_1= 1.7089671854477436685850554669524985556296280184497503489303331821456795715195510972774979091893741568
768 ! a_2=-1.3231956603546599107833121193066273961757451236778593922555836895814474509732067051246078326118696968
769 ! a_3= 3.9876642330847931291749479958277754186675336169578593000744380254770411483327581042259415937710270453
770 ! a_4=-3.7273299006476825027065704937541279833880400042556351139273912137942678919776364526511485025132991667
771 ! a_5= 4.9369932474103023792021351907971943220607580694533770325967170245194362399287150565595441897740173578
772 !
773 ! y=1-X*X
774
775 ! tmp1 = I-x*x
776 IF (order .EQ. 2) THEN
777 prefactor = 0.5_dp
778
779 ! update the above to 3*I-X*X
780 CALL dbcsr_add_on_diag(tmp1, +2.0_dp)
781 occ_matrix = dbcsr_get_occupation(matrix_sign)
782 ELSE IF (order .EQ. 3) THEN
783 ! with one multiplication
784 ! tmp1=y
785 CALL dbcsr_copy(tmp2, tmp1)
786 CALL dbcsr_scale(tmp1, 4.0_dp/3.0_dp)
787 CALL dbcsr_add_on_diag(tmp1, 8.0_dp/3.0_dp)
788
789 ! tmp2=y^2
790 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp2, 1.0_dp, tmp1, &
791 filter_eps=threshold, flop=flops)
792 floptot = floptot + flops
793 prefactor = 3.0_dp/8.0_dp
794
795 ELSE IF (order .EQ. 4) THEN
796 ! with two multiplications
797 ! tmp1=y
798 CALL dbcsr_copy(tmp3, tmp1)
799 CALL dbcsr_scale(tmp1, 8.0_dp/5.0_dp)
800 CALL dbcsr_add_on_diag(tmp1, 16.0_dp/5.0_dp)
801
802 !
803 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
804 filter_eps=threshold, flop=flops)
805 floptot = floptot + flops
806
807 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp/5.0_dp)
808
809 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
810 filter_eps=threshold, flop=flops)
811 floptot = floptot + flops
812
813 prefactor = 5.0_dp/16.0_dp
814 ELSE IF (order .EQ. -5) THEN
815 ! with three multiplications
816 ! tmp1=y
817 CALL dbcsr_copy(tmp3, tmp1)
818 CALL dbcsr_scale(tmp1, 128.0_dp/70.0_dp)
819 CALL dbcsr_add_on_diag(tmp1, 128.0_dp/35.0_dp)
820
821 !
822 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
823 filter_eps=threshold, flop=flops)
824 floptot = floptot + flops
825
826 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 48.0_dp/35.0_dp)
827
828 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
829 filter_eps=threshold, flop=flops)
830 floptot = floptot + flops
831
832 CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 8.0_dp/7.0_dp)
833
834 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp4, tmp3, 1.0_dp, tmp1, &
835 filter_eps=threshold, flop=flops)
836 floptot = floptot + flops
837
838 prefactor = 35.0_dp/128.0_dp
839 ELSE IF (order .EQ. 5) THEN
840 ! with two multiplications
841 ! z(y)=(y+a_0)*y+a_1
842 ! f5(y)=35/128*((z(y)+y+a_2)*z(y)+a_3)
843 ! =35/128*((a_1^2+a_1a_2+a_3)+(2*a_0a_1+a_1+a_0a_2)y+(a_0^2+a_0+2a_1+a_2)y^2+(2a_0+1)y^3+y^4)
844 ! a_0=1/14
845 ! a_1=23819/13720
846 ! a_2=1269/980-2a_1=-3734/1715
847 ! a_3=832591127/188238400
848 a0 = 1.0_dp/14.0_dp
849 a1 = 23819.0_dp/13720.0_dp
850 a2 = -3734_dp/1715.0_dp
851 a3 = 832591127_dp/188238400.0_dp
852
853 ! tmp1=y
854 ! tmp3=z
855 CALL dbcsr_copy(tmp3, tmp1)
856 CALL dbcsr_add_on_diag(tmp3, a0)
857 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
858 filter_eps=threshold, flop=flops)
859 floptot = floptot + flops
860 CALL dbcsr_add_on_diag(tmp2, a1)
861
862 CALL dbcsr_add_on_diag(tmp1, a2)
863 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
864 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp2, 0.0_dp, tmp3, &
865 filter_eps=threshold, flop=flops)
866 floptot = floptot + flops
867 CALL dbcsr_add_on_diag(tmp3, a3)
868 CALL dbcsr_copy(tmp1, tmp3)
869
870 prefactor = 35.0_dp/128.0_dp
871 ELSE IF (order .EQ. 6) THEN
872 ! with four multiplications
873 ! f6(y)=63/256*(256/63 + (128 y)/63 + (32 y^2)/21 + (80 y^3)/63 + (10 y^4)/9 + y^5)
874 ! tmp1=y
875 CALL dbcsr_copy(tmp3, tmp1)
876 CALL dbcsr_scale(tmp1, 128.0_dp/63.0_dp)
877 CALL dbcsr_add_on_diag(tmp1, 256.0_dp/63.0_dp)
878
879 !
880 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
881 filter_eps=threshold, flop=flops)
882 floptot = floptot + flops
883
884 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 32.0_dp/21.0_dp)
885
886 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
887 filter_eps=threshold, flop=flops)
888 floptot = floptot + flops
889
890 CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 80.0_dp/63.0_dp)
891
892 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp4, tmp3, 0.0_dp, tmp2, &
893 filter_eps=threshold, flop=flops)
894 floptot = floptot + flops
895
896 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 10.0_dp/9.0_dp)
897
898 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
899 filter_eps=threshold, flop=flops)
900 floptot = floptot + flops
901
902 prefactor = 63.0_dp/256.0_dp
903 ELSE IF (order .EQ. 7) THEN
904 ! with three multiplications
905
906 a0 = 1.3686502058092053653287666647611728507211996691324048468010382350359929055186612505791532871573242422_dp
907 a1 = 1.7089671854477436685850554669524985556296280184497503489303331821456795715195510972774979091893741568_dp
908 a2 = -1.3231956603546599107833121193066273961757451236778593922555836895814474509732067051246078326118696968_dp
909 a3 = 3.9876642330847931291749479958277754186675336169578593000744380254770411483327581042259415937710270453_dp
910 a4 = -3.7273299006476825027065704937541279833880400042556351139273912137942678919776364526511485025132991667_dp
911 a5 = 4.9369932474103023792021351907971943220607580694533770325967170245194362399287150565595441897740173578_dp
912 ! =231/1024*(1024/231+512/231*y+128/77*y^2+320/231*y^3+40/33*y^4+12/11*y^5+y^6)
913 ! z(y)=(y+a_0)*y+a_1
914 ! w(y)=(y+a_2)*z(y)+a_3
915 ! f7(y)=(w(y)+z(y)+a_4)*w(y)+a_5
916
917 ! tmp1=y
918 ! tmp3=z
919 CALL dbcsr_copy(tmp3, tmp1)
920 CALL dbcsr_add_on_diag(tmp3, a0)
921 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
922 filter_eps=threshold, flop=flops)
923 floptot = floptot + flops
924 CALL dbcsr_add_on_diag(tmp2, a1)
925
926 ! tmp4=w
927 CALL dbcsr_copy(tmp4, tmp1)
928 CALL dbcsr_add_on_diag(tmp4, a2)
929 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp4, tmp2, 0.0_dp, tmp3, &
930 filter_eps=threshold, flop=flops)
931 floptot = floptot + flops
932 CALL dbcsr_add_on_diag(tmp3, a3)
933
934 CALL dbcsr_add(tmp2, tmp3, 1.0_dp, 1.0_dp)
935 CALL dbcsr_add_on_diag(tmp2, a4)
936 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
937 filter_eps=threshold, flop=flops)
938 floptot = floptot + flops
939 CALL dbcsr_add_on_diag(tmp1, a5)
940
941 prefactor = 231.0_dp/1024.0_dp
942 ELSE
943 cpabort("requested order is not implemented.")
944 END IF
945
946 ! tmp2 = X * prefactor *
947 CALL dbcsr_multiply("N", "N", prefactor, matrix_sign, tmp1, 0.0_dp, tmp2, &
948 filter_eps=threshold, flop=flops)
949 floptot = floptot + flops
950
951 ! done iterating
952 ! CALL dbcsr_filter(tmp2,threshold)
953 CALL dbcsr_copy(matrix_sign, tmp2)
954 t2 = m_walltime()
955
956 occ_matrix = dbcsr_get_occupation(matrix_sign)
957
958 IF (unit_nr > 0) THEN
959 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "NS sign iter ", i, occ_matrix, &
960 frob_matrix/frob_matrix_base, t2 - t1, &
961 floptot/(1.0e6_dp*max(0.001_dp, t2 - t1))
962 CALL m_flush(unit_nr)
963 END IF
964
965 ! frob_matrix/frob_matrix_base < SQRT(threshold)
966 IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base)) EXIT
967
968 END DO
969
970 ! this check is not really needed
971 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
972 filter_eps=threshold)
973 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
974 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
975 frob_matrix = dbcsr_frobenius_norm(tmp1)
976 occ_matrix = dbcsr_get_occupation(matrix_sign)
977 IF (unit_nr > 0) THEN
978 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final NS sign iter", i, occ_matrix, &
979 frob_matrix/frob_matrix_base
980 WRITE (unit_nr, '()')
981 CALL m_flush(unit_nr)
982 END IF
983
984 CALL dbcsr_release(tmp1)
985 CALL dbcsr_release(tmp2)
986 IF (abs(order) .GE. 4) THEN
987 CALL dbcsr_release(tmp3)
988 END IF
989 IF (abs(order) .GT. 4) THEN
990 CALL dbcsr_release(tmp4)
991 END IF
992
993 CALL timestop(handle)
994
995 END SUBROUTINE matrix_sign_newton_schulz
996
997 ! **************************************************************************************************
998!> \brief compute the sign a matrix using the general algorithm for the p-th root of Richters et al.
999!> Commun. Comput. Phys., 25 (2019), pp. 564-585.
1000!> \param matrix_sign ...
1001!> \param matrix ...
1002!> \param threshold ...
1003!> \param sign_order ...
1004!> \par History
1005!> 2019.03 created [Robert Schade]
1006!> \author Robert Schade
1007! **************************************************************************************************
1008 SUBROUTINE matrix_sign_proot(matrix_sign, matrix, threshold, sign_order)
1009
1010 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix
1011 REAL(kind=dp), INTENT(IN) :: threshold
1012 INTEGER, INTENT(IN), OPTIONAL :: sign_order
1013
1014 CHARACTER(LEN=*), PARAMETER :: routinen = 'matrix_sign_proot'
1015
1016 INTEGER :: handle, order, unit_nr
1017 INTEGER(KIND=int_8) :: flop0, flop1, flop2
1018 LOGICAL :: converged, symmetrize
1019 REAL(kind=dp) :: frob_matrix, frob_matrix_base, occ_matrix
1020 TYPE(cp_logger_type), POINTER :: logger
1021 TYPE(dbcsr_type) :: matrix2, matrix_sqrt, matrix_sqrt_inv, &
1022 tmp1, tmp2
1023
1024 CALL cite_reference(richters2018)
1025
1026 CALL timeset(routinen, handle)
1027
1028 logger => cp_get_default_logger()
1029 IF (logger%para_env%is_source()) THEN
1030 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1031 ELSE
1032 unit_nr = -1
1033 END IF
1034
1035 IF (PRESENT(sign_order)) THEN
1036 order = sign_order
1037 ELSE
1038 order = 2
1039 END IF
1040
1041 CALL dbcsr_create(tmp1, template=matrix_sign)
1042
1043 CALL dbcsr_create(tmp2, template=matrix_sign)
1044
1045 CALL dbcsr_create(matrix2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1046 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix, 0.0_dp, matrix2, &
1047 filter_eps=threshold, flop=flop0)
1048 !CALL dbcsr_filter(matrix2, threshold)
1049
1050 !CALL dbcsr_copy(matrix_sign, matrix)
1051 !CALL dbcsr_filter(matrix_sign, threshold)
1052
1053 IF (unit_nr > 0) WRITE (unit_nr, *)
1054
1055 CALL dbcsr_create(matrix_sqrt, template=matrix2)
1056 CALL dbcsr_create(matrix_sqrt_inv, template=matrix2)
1057 IF (unit_nr > 0) WRITE (unit_nr, *) "Threshold=", threshold
1058
1059 symmetrize = .false.
1060 CALL matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix2, threshold, order, &
1061 0.01_dp, 100, symmetrize, converged)
1062! call matrix_sqrt_Newton_Schulz(matrix_sqrt, matrix_sqrt_inv, matrix2, threshold, order, &
1063! 0.01_dp,100, symmetrize,converged)
1064
1065 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix_sqrt_inv, 0.0_dp, matrix_sign, &
1066 filter_eps=threshold, flop=flop1)
1067
1068 ! this check is not really needed
1069 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
1070 filter_eps=threshold, flop=flop2)
1071 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1072 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1073 frob_matrix = dbcsr_frobenius_norm(tmp1)
1074 occ_matrix = dbcsr_get_occupation(matrix_sign)
1075 IF (unit_nr > 0) THEN
1076 WRITE (unit_nr, '(T6,A,F10.8,E12.3)') "Final proot sign iter", occ_matrix, &
1077 frob_matrix/frob_matrix_base
1078 WRITE (unit_nr, '()')
1079 CALL m_flush(unit_nr)
1080 END IF
1081
1082 CALL dbcsr_release(tmp1)
1083 CALL dbcsr_release(tmp2)
1084 CALL dbcsr_release(matrix2)
1085 CALL dbcsr_release(matrix_sqrt)
1086 CALL dbcsr_release(matrix_sqrt_inv)
1087
1088 CALL timestop(handle)
1089
1090 END SUBROUTINE matrix_sign_proot
1091
1092! **************************************************************************************************
1093!> \brief compute the sign of a dense matrix using Newton-Schulz iterations
1094!> \param matrix_sign ...
1095!> \param matrix ...
1096!> \param matrix_id ...
1097!> \param threshold ...
1098!> \param sign_order ...
1099!> \author Michael Lass, Robert Schade
1100! **************************************************************************************************
1101 SUBROUTINE dense_matrix_sign_newton_schulz(matrix_sign, matrix, matrix_id, threshold, sign_order)
1102
1103 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: matrix_sign
1104 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: matrix
1105 INTEGER, INTENT(IN), OPTIONAL :: matrix_id
1106 REAL(kind=dp), INTENT(IN) :: threshold
1107 INTEGER, INTENT(IN), OPTIONAL :: sign_order
1108
1109 CHARACTER(LEN=*), PARAMETER :: routinen = 'dense_matrix_sign_Newton_Schulz'
1110
1111 INTEGER :: handle, i, j, sz, unit_nr
1112 LOGICAL :: converged
1113 REAL(kind=dp) :: frob_matrix, frob_matrix_base, &
1114 gersh_matrix, prefactor, scaling_factor
1115 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp1, tmp2
1116 REAL(kind=dp), DIMENSION(1) :: work
1117 REAL(kind=dp), EXTERNAL :: dlange
1118 TYPE(cp_logger_type), POINTER :: logger
1119
1120 CALL timeset(routinen, handle)
1121
1122 ! print output on all ranks
1123 logger => cp_get_default_logger()
1124 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1125
1126 ! scale the matrix to get into the convergence range
1127 sz = SIZE(matrix, 1)
1128 frob_matrix = dlange('F', sz, sz, matrix, sz, work) !dbcsr_frobenius_norm(matrix_sign)
1129 gersh_matrix = dlange('1', sz, sz, matrix, sz, work) !dbcsr_gershgorin_norm(matrix_sign)
1130 scaling_factor = 1/min(frob_matrix, gersh_matrix)
1131 matrix_sign = matrix*scaling_factor
1132 ALLOCATE (tmp1(sz, sz))
1133 ALLOCATE (tmp2(sz, sz))
1134
1135 converged = .false.
1136 DO i = 1, 100
1137 CALL dgemm('N', 'N', sz, sz, sz, -1.0_dp, matrix_sign, sz, matrix_sign, sz, 0.0_dp, tmp1, sz)
1138
1139 ! check convergence (frob norm of what should be the identity matrix minus identity matrix)
1140 frob_matrix_base = dlange('F', sz, sz, tmp1, sz, work)
1141 DO j = 1, sz
1142 tmp1(j, j) = tmp1(j, j) + 1.0_dp
1143 END DO
1144 frob_matrix = dlange('F', sz, sz, tmp1, sz, work)
1145
1146 IF (sign_order .EQ. 2) THEN
1147 prefactor = 0.5_dp
1148 ! update the above to 3*I-X*X
1149 DO j = 1, sz
1150 tmp1(j, j) = tmp1(j, j) + 2.0_dp
1151 END DO
1152 ELSE IF (sign_order .EQ. 3) THEN
1153 tmp2(:, :) = tmp1
1154 tmp1 = tmp1*4.0_dp/3.0_dp
1155 DO j = 1, sz
1156 tmp1(j, j) = tmp1(j, j) + 8.0_dp/3.0_dp
1157 END DO
1158 CALL dgemm('N', 'N', sz, sz, sz, 1.0_dp, tmp2, sz, tmp2, sz, 1.0_dp, tmp1, sz)
1159 prefactor = 3.0_dp/8.0_dp
1160 ELSE
1161 cpabort("requested order is not implemented.")
1162 END IF
1163
1164 CALL dgemm('N', 'N', sz, sz, sz, prefactor, matrix_sign, sz, tmp1, sz, 0.0_dp, tmp2, sz)
1165 matrix_sign = tmp2
1166
1167 ! frob_matrix/frob_matrix_base < SQRT(threshold)
1168 IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base)) THEN
1169 WRITE (unit_nr, '(T6,A,1X,I6,1X,A,1X,I3,E12.3)') &
1170 "Submatrix", matrix_id, "final NS sign iter", i, frob_matrix/frob_matrix_base
1171 CALL m_flush(unit_nr)
1172 converged = .true.
1173 EXIT
1174 END IF
1175 END DO
1176
1177 IF (.NOT. converged) &
1178 cpabort("dense_matrix_sign_Newton_Schulz did not converge within 100 iterations")
1179
1180 DEALLOCATE (tmp1)
1181 DEALLOCATE (tmp2)
1182
1183 CALL timestop(handle)
1184
1185 END SUBROUTINE dense_matrix_sign_newton_schulz
1186
1187! **************************************************************************************************
1188!> \brief Perform eigendecomposition of a dense matrix
1189!> \param sm ...
1190!> \param N ...
1191!> \param eigvals ...
1192!> \param eigvecs ...
1193!> \par History
1194!> 2020.05 Extracted from dense_matrix_sign_direct [Michael Lass]
1195!> \author Michael Lass, Robert Schade
1196! **************************************************************************************************
1197 SUBROUTINE eigdecomp(sm, N, eigvals, eigvecs)
1198 INTEGER, INTENT(IN) :: n
1199 REAL(kind=dp), INTENT(IN) :: sm(n, n)
1200 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
1201 INTENT(OUT) :: eigvals
1202 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
1203 INTENT(OUT) :: eigvecs
1204
1205 INTEGER :: info, liwork, lwork
1206 INTEGER, ALLOCATABLE, DIMENSION(:) :: iwork
1207 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: work
1208 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp
1209
1210 ALLOCATE (eigvecs(n, n), tmp(n, n))
1211 ALLOCATE (eigvals(n))
1212
1213 ! symmetrize sm
1214 eigvecs(:, :) = 0.5*(sm + transpose(sm))
1215
1216 ! probe optimal sizes for WORK and IWORK
1217 lwork = -1
1218 liwork = -1
1219 ALLOCATE (work(1))
1220 ALLOCATE (iwork(1))
1221 CALL dsyevd('V', 'U', n, eigvecs, n, eigvals, work, lwork, iwork, liwork, info)
1222 lwork = int(work(1))
1223 liwork = int(iwork(1))
1224 DEALLOCATE (iwork)
1225 DEALLOCATE (work)
1226
1227 ! calculate eigenvalues and eigenvectors
1228 ALLOCATE (work(lwork))
1229 ALLOCATE (iwork(liwork))
1230 CALL dsyevd('V', 'U', n, eigvecs, n, eigvals, work, lwork, iwork, liwork, info)
1231 DEALLOCATE (iwork)
1232 DEALLOCATE (work)
1233 IF (info .NE. 0) cpabort("dsyevd did not succeed")
1234
1235 DEALLOCATE (tmp)
1236 END SUBROUTINE eigdecomp
1237
1238! **************************************************************************************************
1239!> \brief Calculate the sign matrix from eigenvalues and eigenvectors of a matrix
1240!> \param sm_sign ...
1241!> \param eigvals ...
1242!> \param eigvecs ...
1243!> \param N ...
1244!> \param mu_correction ...
1245!> \par History
1246!> 2020.05 Extracted from dense_matrix_sign_direct [Michael Lass]
1247!> \author Michael Lass, Robert Schade
1248! **************************************************************************************************
1249 SUBROUTINE sign_from_eigdecomp(sm_sign, eigvals, eigvecs, N, mu_correction)
1250 INTEGER :: n
1251 REAL(kind=dp), INTENT(IN) :: eigvecs(n, n), eigvals(n)
1252 REAL(kind=dp), INTENT(INOUT) :: sm_sign(n, n)
1253 REAL(kind=dp), INTENT(IN) :: mu_correction
1254
1255 INTEGER :: i
1256 REAL(kind=dp) :: modified_eigval, tmp(n, n)
1257
1258 sm_sign = 0
1259 DO i = 1, n
1260 modified_eigval = eigvals(i) - mu_correction
1261 IF (modified_eigval > 0) THEN
1262 sm_sign(i, i) = 1.0
1263 ELSE IF (modified_eigval < 0) THEN
1264 sm_sign(i, i) = -1.0
1265 ELSE
1266 sm_sign(i, i) = 0.0
1267 END IF
1268 END DO
1269
1270 ! Create matrix with eigenvalues in {-1,0,1} and eigenvectors of sm:
1271 ! sm_sign = eigvecs * sm_sign * eigvecs.T
1272 CALL dgemm('N', 'N', n, n, n, 1.0_dp, eigvecs, n, sm_sign, n, 0.0_dp, tmp, n)
1273 CALL dgemm('N', 'T', n, n, n, 1.0_dp, tmp, n, eigvecs, n, 0.0_dp, sm_sign, n)
1274 END SUBROUTINE sign_from_eigdecomp
1275
1276! **************************************************************************************************
1277!> \brief Compute partial trace of a matrix from its eigenvalues and eigenvectors
1278!> \param eigvals ...
1279!> \param eigvecs ...
1280!> \param firstcol ...
1281!> \param lastcol ...
1282!> \param mu_correction ...
1283!> \return ...
1284!> \par History
1285!> 2020.05 Created [Michael Lass]
1286!> \author Michael Lass
1287! **************************************************************************************************
1288 FUNCTION trace_from_eigdecomp(eigvals, eigvecs, firstcol, lastcol, mu_correction) RESULT(trace)
1289 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
1290 INTENT(IN) :: eigvals
1291 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
1292 INTENT(IN) :: eigvecs
1293 INTEGER, INTENT(IN) :: firstcol, lastcol
1294 REAL(kind=dp), INTENT(IN) :: mu_correction
1295 REAL(kind=dp) :: trace
1296
1297 INTEGER :: i, j, sm_size
1298 REAL(kind=dp) :: modified_eigval, tmpsum
1299 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: mapped_eigvals
1300
1301 sm_size = SIZE(eigvals)
1302 ALLOCATE (mapped_eigvals(sm_size))
1303
1304 DO i = 1, sm_size
1305 modified_eigval = eigvals(i) - mu_correction
1306 IF (modified_eigval > 0) THEN
1307 mapped_eigvals(i) = 1.0
1308 ELSE IF (modified_eigval < 0) THEN
1309 mapped_eigvals(i) = -1.0
1310 ELSE
1311 mapped_eigvals(i) = 0.0
1312 END IF
1313 END DO
1314
1315 trace = 0.0_dp
1316 DO i = firstcol, lastcol
1317 tmpsum = 0.0_dp
1318 DO j = 1, sm_size
1319 tmpsum = tmpsum + (eigvecs(i, j)*mapped_eigvals(j)*eigvecs(i, j))
1320 END DO
1321 trace = trace - 0.5_dp*tmpsum + 0.5_dp
1322 END DO
1323 END FUNCTION trace_from_eigdecomp
1324
1325! **************************************************************************************************
1326!> \brief Calculate the sign matrix by direct calculation of all eigenvalues and eigenvectors
1327!> \param sm_sign ...
1328!> \param sm ...
1329!> \param N ...
1330!> \par History
1331!> 2020.02 Created [Michael Lass, Robert Schade]
1332!> 2020.05 Extracted eigdecomp and sign_from_eigdecomp [Michael Lass]
1333!> \author Michael Lass, Robert Schade
1334! **************************************************************************************************
1335 SUBROUTINE dense_matrix_sign_direct(sm_sign, sm, N)
1336 INTEGER, INTENT(IN) :: n
1337 REAL(kind=dp), INTENT(IN) :: sm(n, n)
1338 REAL(kind=dp), INTENT(INOUT) :: sm_sign(n, n)
1339
1340 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvals
1341 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: eigvecs
1342
1343 CALL eigdecomp(sm, n, eigvals, eigvecs)
1344 CALL sign_from_eigdecomp(sm_sign, eigvals, eigvecs, n, 0.0_dp)
1345
1346 DEALLOCATE (eigvals, eigvecs)
1347 END SUBROUTINE dense_matrix_sign_direct
1348
1349! **************************************************************************************************
1350!> \brief Submatrix method
1351!> \param matrix_sign ...
1352!> \param matrix ...
1353!> \param threshold ...
1354!> \param sign_order ...
1355!> \param submatrix_sign_method ...
1356!> \par History
1357!> 2019.03 created [Robert Schade]
1358!> 2019.06 impl. submatrix method [Michael Lass]
1359!> \author Robert Schade, Michael Lass
1360! **************************************************************************************************
1361 SUBROUTINE matrix_sign_submatrix(matrix_sign, matrix, threshold, sign_order, submatrix_sign_method)
1362
1363 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix
1364 REAL(kind=dp), INTENT(IN) :: threshold
1365 INTEGER, INTENT(IN), OPTIONAL :: sign_order
1366 INTEGER, INTENT(IN) :: submatrix_sign_method
1367
1368 CHARACTER(LEN=*), PARAMETER :: routinen = 'matrix_sign_submatrix'
1369
1370 INTEGER :: group, handle, i, myrank, nblkcols, &
1371 order, sm_size, unit_nr
1372 INTEGER, ALLOCATABLE, DIMENSION(:) :: my_sms
1373 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: sm, sm_sign
1374 TYPE(cp_logger_type), POINTER :: logger
1375 TYPE(dbcsr_distribution_type) :: dist
1376 TYPE(submatrix_dissection_type) :: dissection
1377
1378 CALL timeset(routinen, handle)
1379
1380 ! print output on all ranks
1381 logger => cp_get_default_logger()
1382 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1383
1384 IF (PRESENT(sign_order)) THEN
1385 order = sign_order
1386 ELSE
1387 order = 2
1388 END IF
1389
1390 CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist, group=group)
1391 CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1392
1393 CALL dissection%init(matrix)
1394 CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1395
1396 !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1397 !$OMP PRIVATE(sm, sm_sign, sm_size) &
1398 !$OMP SHARED(dissection, myrank, my_sms, order, submatrix_sign_method, threshold, unit_nr)
1399 !$OMP DO SCHEDULE(GUIDED)
1400 DO i = 1, SIZE(my_sms)
1401 WRITE (unit_nr, '(T3,A,1X,I4,1X,A,1X,I6)') "Rank", myrank, "processing submatrix", my_sms(i)
1402 CALL dissection%generate_submatrix(my_sms(i), sm)
1403 sm_size = SIZE(sm, 1)
1404 ALLOCATE (sm_sign(sm_size, sm_size))
1405 SELECT CASE (submatrix_sign_method)
1406 CASE (ls_scf_submatrix_sign_ns)
1407 CALL dense_matrix_sign_newton_schulz(sm_sign, sm, my_sms(i), threshold, order)
1408 CASE (ls_scf_submatrix_sign_direct, ls_scf_submatrix_sign_direct_muadj, ls_scf_submatrix_sign_direct_muadj_lowmem)
1409 CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1410 CASE DEFAULT
1411 cpabort("Unkown submatrix sign method.")
1412 END SELECT
1413 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1414 DEALLOCATE (sm, sm_sign)
1415 END DO
1416 !$OMP END DO
1417 !$OMP END PARALLEL
1418
1419 CALL dissection%communicate_results(matrix_sign)
1420 CALL dissection%final
1421
1422 CALL timestop(handle)
1423
1424 END SUBROUTINE matrix_sign_submatrix
1425
1426! **************************************************************************************************
1427!> \brief Submatrix method with internal adjustment of chemical potential
1428!> \param matrix_sign ...
1429!> \param matrix ...
1430!> \param mu ...
1431!> \param nelectron ...
1432!> \param threshold ...
1433!> \param variant ...
1434!> \par History
1435!> 2020.05 Created [Michael Lass]
1436!> \author Robert Schade, Michael Lass
1437! **************************************************************************************************
1438 SUBROUTINE matrix_sign_submatrix_mu_adjust(matrix_sign, matrix, mu, nelectron, threshold, variant)
1439
1440 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix
1441 REAL(kind=dp), INTENT(INOUT) :: mu
1442 INTEGER, INTENT(IN) :: nelectron
1443 REAL(kind=dp), INTENT(IN) :: threshold
1444 INTEGER, INTENT(IN) :: variant
1445
1446 CHARACTER(LEN=*), PARAMETER :: routinen = 'matrix_sign_submatrix_mu_adjust'
1447 REAL(kind=dp), PARAMETER :: initial_increment = 0.01_dp
1448
1449 INTEGER :: group_handle, handle, i, j, myrank, &
1450 nblkcols, sm_firstcol, sm_lastcol, &
1451 sm_size, unit_nr
1452 INTEGER, ALLOCATABLE, DIMENSION(:) :: my_sms
1453 LOGICAL :: has_mu_high, has_mu_low
1454 REAL(kind=dp) :: increment, mu_high, mu_low, new_mu, trace
1455 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: sm, sm_sign, tmp
1456 TYPE(cp_logger_type), POINTER :: logger
1457 TYPE(dbcsr_distribution_type) :: dist
1458 TYPE(eigbuf), ALLOCATABLE, DIMENSION(:) :: eigbufs
1459 TYPE(mp_comm_type) :: group
1460 TYPE(submatrix_dissection_type) :: dissection
1461
1462 CALL timeset(routinen, handle)
1463
1464 ! print output on all ranks
1465 logger => cp_get_default_logger()
1466 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1467
1468 CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist, group=group_handle)
1469 CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1470
1471 CALL group%set_handle(group_handle)
1472
1473 CALL dissection%init(matrix)
1474 CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1475
1476 ALLOCATE (eigbufs(SIZE(my_sms)))
1477
1478 trace = 0.0_dp
1479
1480 !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1481 !$OMP PRIVATE(sm, sm_sign, sm_size, sm_firstcol, sm_lastcol, j, tmp) &
1482 !$OMP SHARED(dissection, myrank, my_sms, unit_nr, eigbufs, threshold, variant) &
1483 !$OMP REDUCTION(+:trace)
1484 !$OMP DO SCHEDULE(GUIDED)
1485 DO i = 1, SIZE(my_sms)
1486 CALL dissection%generate_submatrix(my_sms(i), sm)
1487 sm_size = SIZE(sm, 1)
1488 WRITE (unit_nr, *) "Rank", myrank, "processing submatrix", my_sms(i), "size", sm_size
1489
1490 CALL dissection%get_relevant_sm_columns(my_sms(i), sm_firstcol, sm_lastcol)
1491
1492 IF (variant .EQ. ls_scf_submatrix_sign_direct_muadj) THEN
1493 ! Store all eigenvectors in buffer. We will use it to compute sm_sign at the end.
1494 CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=eigbufs(i)%eigvecs)
1495 ELSE
1496 ! Only store eigenvectors that are required for mu adjustment.
1497 ! Calculate sm_sign right away in the hope that mu is already correct.
1498 CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=tmp)
1499 ALLOCATE (eigbufs(i)%eigvecs(sm_firstcol:sm_lastcol, 1:sm_size))
1500 eigbufs(i)%eigvecs(:, :) = tmp(sm_firstcol:sm_lastcol, 1:sm_size)
1501
1502 ALLOCATE (sm_sign(sm_size, sm_size))
1503 CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, tmp, sm_size, 0.0_dp)
1504 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1505 DEALLOCATE (sm_sign, tmp)
1506 END IF
1507
1508 DEALLOCATE (sm)
1509 trace = trace + trace_from_eigdecomp(eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_firstcol, sm_lastcol, 0.0_dp)
1510 END DO
1511 !$OMP END DO
1512 !$OMP END PARALLEL
1513
1514 has_mu_low = .false.
1515 has_mu_high = .false.
1516 increment = initial_increment
1517 new_mu = mu
1518 DO i = 1, 30
1519 CALL group%sum(trace)
1520 IF (unit_nr > 0) WRITE (unit_nr, '(T2,A,1X,F13.9,1X,F15.9)') &
1521 "Density matrix: mu, trace error: ", new_mu, trace - nelectron
1522 IF (abs(trace - nelectron) < 0.5_dp) EXIT
1523 IF (trace < nelectron) THEN
1524 mu_low = new_mu
1525 new_mu = new_mu + increment
1526 has_mu_low = .true.
1527 increment = increment*2
1528 ELSE
1529 mu_high = new_mu
1530 new_mu = new_mu - increment
1531 has_mu_high = .true.
1532 increment = increment*2
1533 END IF
1534
1535 IF (has_mu_low .AND. has_mu_high) THEN
1536 new_mu = (mu_low + mu_high)/2
1537 IF (abs(mu_high - mu_low) < threshold) EXIT
1538 END IF
1539
1540 trace = 0
1541 !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1542 !$OMP PRIVATE(i, sm_sign, tmp, sm_size, sm_firstcol, sm_lastcol) &
1543 !$OMP SHARED(dissection, my_sms, unit_nr, eigbufs, mu, new_mu, nelectron) &
1544 !$OMP REDUCTION(+:trace)
1545 !$OMP DO SCHEDULE(GUIDED)
1546 DO j = 1, SIZE(my_sms)
1547 sm_size = SIZE(eigbufs(j)%eigvals)
1548 CALL dissection%get_relevant_sm_columns(my_sms(j), sm_firstcol, sm_lastcol)
1549 trace = trace + trace_from_eigdecomp(eigbufs(j)%eigvals, eigbufs(j)%eigvecs, sm_firstcol, sm_lastcol, new_mu - mu)
1550 END DO
1551 !$OMP END DO
1552 !$OMP END PARALLEL
1553 END DO
1554
1555 ! Finalize sign matrix from eigendecompositions if we kept all eigenvectors
1556 IF (variant .EQ. ls_scf_submatrix_sign_direct_muadj) THEN
1557 !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1558 !$OMP PRIVATE(sm, sm_sign, sm_size, sm_firstcol, sm_lastcol, j) &
1559 !$OMP SHARED(dissection, myrank, my_sms, unit_nr, eigbufs, mu, new_mu)
1560 !$OMP DO SCHEDULE(GUIDED)
1561 DO i = 1, SIZE(my_sms)
1562 WRITE (unit_nr, '(T3,A,1X,I4,1X,A,1X,I6)') "Rank", myrank, "finalizing submatrix", my_sms(i)
1563 sm_size = SIZE(eigbufs(i)%eigvals)
1564 ALLOCATE (sm_sign(sm_size, sm_size))
1565 CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_size, new_mu - mu)
1566 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1567 DEALLOCATE (sm_sign)
1568 END DO
1569 !$OMP END DO
1570 !$OMP END PARALLEL
1571 END IF
1572
1573 DEALLOCATE (eigbufs)
1574
1575 ! If we only stored parts of the eigenvectors and mu has changed, we need to recompute sm_sign
1576 IF ((variant .EQ. ls_scf_submatrix_sign_direct_muadj_lowmem) .AND. (mu .NE. new_mu)) THEN
1577 !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1578 !$OMP PRIVATE(sm, sm_sign, sm_size, sm_firstcol, sm_lastcol, j) &
1579 !$OMP SHARED(dissection, myrank, my_sms, unit_nr, eigbufs, mu, new_mu)
1580 !$OMP DO SCHEDULE(GUIDED)
1581 DO i = 1, SIZE(my_sms)
1582 WRITE (unit_nr, '(T3,A,1X,I4,1X,A,1X,I6)') "Rank", myrank, "reprocessing submatrix", my_sms(i)
1583 CALL dissection%generate_submatrix(my_sms(i), sm)
1584 sm_size = SIZE(sm, 1)
1585 DO j = 1, sm_size
1586 sm(j, j) = sm(j, j) + mu - new_mu
1587 END DO
1588 ALLOCATE (sm_sign(sm_size, sm_size))
1589 CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1590 CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1591 DEALLOCATE (sm, sm_sign)
1592 END DO
1593 !$OMP END DO
1594 !$OMP END PARALLEL
1595 END IF
1596
1597 mu = new_mu
1598
1599 CALL dissection%communicate_results(matrix_sign)
1600 CALL dissection%final
1601
1602 CALL timestop(handle)
1603
1604 END SUBROUTINE matrix_sign_submatrix_mu_adjust
1605
1606! **************************************************************************************************
1607!> \brief compute the sqrt of a matrix via the sign function and the corresponding Newton-Schulz iterations
1608!> the order of the algorithm should be 2..5, 3 or 5 is recommended
1609!> \param matrix_sqrt ...
1610!> \param matrix_sqrt_inv ...
1611!> \param matrix ...
1612!> \param threshold ...
1613!> \param order ...
1614!> \param eps_lanczos ...
1615!> \param max_iter_lanczos ...
1616!> \param symmetrize ...
1617!> \param converged ...
1618!> \par History
1619!> 2010.10 created [Joost VandeVondele]
1620!> \author Joost VandeVondele
1621! **************************************************************************************************
1622 SUBROUTINE matrix_sqrt_newton_schulz(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, &
1623 eps_lanczos, max_iter_lanczos, symmetrize, converged)
1624 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1625 REAL(kind=dp), INTENT(IN) :: threshold
1626 INTEGER, INTENT(IN) :: order
1627 REAL(kind=dp), INTENT(IN) :: eps_lanczos
1628 INTEGER, INTENT(IN) :: max_iter_lanczos
1629 LOGICAL, OPTIONAL :: symmetrize, converged
1630
1631 CHARACTER(LEN=*), PARAMETER :: routinen = 'matrix_sqrt_Newton_Schulz'
1632
1633 INTEGER :: handle, i, unit_nr
1634 INTEGER(KIND=int_8) :: flop1, flop2, flop3, flop4, flop5
1635 LOGICAL :: arnoldi_converged, tsym
1636 REAL(kind=dp) :: a, b, c, conv, d, frob_matrix, &
1637 frob_matrix_base, gershgorin_norm, &
1638 max_ev, min_ev, oa, ob, oc, &
1639 occ_matrix, od, scaling, t1, t2
1640 TYPE(cp_logger_type), POINTER :: logger
1641 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3
1642
1643 CALL timeset(routinen, handle)
1644
1645 logger => cp_get_default_logger()
1646 IF (logger%para_env%is_source()) THEN
1647 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1648 ELSE
1649 unit_nr = -1
1650 END IF
1651
1652 IF (PRESENT(converged)) converged = .false.
1653 IF (PRESENT(symmetrize)) THEN
1654 tsym = symmetrize
1655 ELSE
1656 tsym = .true.
1657 END IF
1658
1659 ! for stability symmetry can not be assumed
1660 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1661 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1662 IF (order .GE. 4) THEN
1663 CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1664 END IF
1665
1666 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1667 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1668 CALL dbcsr_filter(matrix_sqrt_inv, threshold)
1669 CALL dbcsr_copy(matrix_sqrt, matrix)
1670
1671 ! scale the matrix to get into the convergence range
1672 IF (order == 0) THEN
1673
1674 gershgorin_norm = dbcsr_gershgorin_norm(matrix_sqrt)
1675 frob_matrix = dbcsr_frobenius_norm(matrix_sqrt)
1676 scaling = 1.0_dp/min(frob_matrix, gershgorin_norm)
1677
1678 ELSE
1679
1680 ! scale the matrix to get into the convergence range
1681 CALL arnoldi_extremal(matrix_sqrt, max_ev, min_ev, threshold=eps_lanczos, &
1682 max_iter=max_iter_lanczos, converged=arnoldi_converged)
1683 IF (unit_nr > 0) THEN
1684 WRITE (unit_nr, *)
1685 WRITE (unit_nr, '(T6,A,1X,L1,A,E12.3)') "Lanczos converged: ", arnoldi_converged, " threshold:", eps_lanczos
1686 WRITE (unit_nr, '(T6,A,1X,E12.3,E12.3)') "Est. extremal eigenvalues:", max_ev, min_ev
1687 WRITE (unit_nr, '(T6,A,1X,E12.3)') "Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
1688 END IF
1689 ! conservatively assume we get a relatively large error (100*threshold_lanczos) in the estimates
1690 ! and adjust the scaling to be on the safe side
1691 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1692
1693 END IF
1694
1695 CALL dbcsr_scale(matrix_sqrt, scaling)
1696 CALL dbcsr_filter(matrix_sqrt, threshold)
1697 IF (unit_nr > 0) THEN
1698 WRITE (unit_nr, *)
1699 WRITE (unit_nr, *) "Order=", order
1700 END IF
1701
1702 DO i = 1, 100
1703
1704 t1 = m_walltime()
1705
1706 ! tmp1 = Zk * Yk - I
1707 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1708 filter_eps=threshold, flop=flop1)
1709 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1710 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1711
1712 ! check convergence (frob norm of what should be the identity matrix minus identity matrix)
1713 frob_matrix = dbcsr_frobenius_norm(tmp1)
1714
1715 flop4 = 0; flop5 = 0
1716 SELECT CASE (order)
1717 CASE (0, 2)
1718 ! update the above to 0.5*(3*I-Zk*Yk)
1719 CALL dbcsr_add_on_diag(tmp1, -2.0_dp)
1720 CALL dbcsr_scale(tmp1, -0.5_dp)
1721 CASE (3)
1722 ! tmp2 = tmp1 ** 2
1723 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1724 filter_eps=threshold, flop=flop4)
1725 ! tmp1 = 1/16 * (16*I-8*tmp1+6*tmp1**2-5*tmp1**3)
1726 CALL dbcsr_add(tmp1, tmp2, -4.0_dp, 3.0_dp)
1727 CALL dbcsr_add_on_diag(tmp1, 8.0_dp)
1728 CALL dbcsr_scale(tmp1, 0.125_dp)
1729 CASE (4) ! as expensive as case(5), so little need to use it
1730 ! tmp2 = tmp1 ** 2
1731 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1732 filter_eps=threshold, flop=flop4)
1733 ! tmp3 = tmp2 * tmp1
1734 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp1, 0.0_dp, tmp3, &
1735 filter_eps=threshold, flop=flop5)
1736 CALL dbcsr_scale(tmp1, -8.0_dp)
1737 CALL dbcsr_add_on_diag(tmp1, 16.0_dp)
1738 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp)
1739 CALL dbcsr_add(tmp1, tmp3, 1.0_dp, -5.0_dp)
1740 CALL dbcsr_scale(tmp1, 1/16.0_dp)
1741 CASE (5)
1742 ! Knuth's reformulation to evaluate the polynomial of 4th degree in 2 multiplications
1743 ! p = y4+A*y3+B*y2+C*y+D
1744 ! z := y * (y+a); P := (z+y+b) * (z+c) + d.
1745 ! a=(A-1)/2 ; b=B*(a+1)-C-a*(a+1)*(a+1)
1746 ! c=B-b-a*(a+1)
1747 ! d=D-bc
1748 oa = -40.0_dp/35.0_dp
1749 ob = 48.0_dp/35.0_dp
1750 oc = -64.0_dp/35.0_dp
1751 od = 128.0_dp/35.0_dp
1752 a = (oa - 1)/2
1753 b = ob*(a + 1) - oc - a*(a + 1)**2
1754 c = ob - b - a*(a + 1)
1755 d = od - b*c
1756 ! tmp2 = tmp1 ** 2 + a * tmp1
1757 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1758 filter_eps=threshold, flop=flop4)
1759 CALL dbcsr_add(tmp2, tmp1, 1.0_dp, a)
1760 ! tmp3 = tmp2 + tmp1 + b
1761 CALL dbcsr_copy(tmp3, tmp2)
1762 CALL dbcsr_add(tmp3, tmp1, 1.0_dp, 1.0_dp)
1763 CALL dbcsr_add_on_diag(tmp3, b)
1764 ! tmp2 = tmp2 + c
1765 CALL dbcsr_add_on_diag(tmp2, c)
1766 ! tmp1 = tmp2 * tmp3
1767 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
1768 filter_eps=threshold, flop=flop5)
1769 ! tmp1 = tmp1 + d
1770 CALL dbcsr_add_on_diag(tmp1, d)
1771 ! final scale
1772 CALL dbcsr_scale(tmp1, 35.0_dp/128.0_dp)
1773 CASE DEFAULT
1774 cpabort("Illegal order value")
1775 END SELECT
1776
1777 ! tmp2 = Yk * tmp1 = Y(k+1)
1778 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, tmp1, 0.0_dp, tmp2, &
1779 filter_eps=threshold, flop=flop2)
1780 ! CALL dbcsr_filter(tmp2,threshold)
1781 CALL dbcsr_copy(matrix_sqrt, tmp2)
1782
1783 ! tmp2 = tmp1 * Zk = Z(k+1)
1784 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, matrix_sqrt_inv, 0.0_dp, tmp2, &
1785 filter_eps=threshold, flop=flop3)
1786 ! CALL dbcsr_filter(tmp2,threshold)
1787 CALL dbcsr_copy(matrix_sqrt_inv, tmp2)
1788
1789 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1790
1791 ! done iterating
1792 t2 = m_walltime()
1793
1794 conv = frob_matrix/frob_matrix_base
1795
1796 IF (unit_nr > 0) THEN
1797 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "NS sqrt iter ", i, occ_matrix, &
1798 conv, t2 - t1, &
1799 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0e6_dp*max(0.001_dp, t2 - t1))
1800 CALL m_flush(unit_nr)
1801 END IF
1802
1803 IF (abnormal_value(conv)) &
1804 cpabort("conv is an abnormal value (NaN/Inf).")
1805
1806 ! conv < SQRT(threshold)
1807 IF ((conv*conv) < threshold) THEN
1808 IF (PRESENT(converged)) converged = .true.
1809 EXIT
1810 END IF
1811
1812 END DO
1813
1814 ! symmetrize the matrices as this is not guaranteed by the algorithm
1815 IF (tsym) THEN
1816 IF (unit_nr > 0) THEN
1817 WRITE (unit_nr, '(T6,A20)') "Symmetrizing Results"
1818 END IF
1819 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
1820 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
1821 CALL dbcsr_transposed(tmp1, matrix_sqrt)
1822 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
1823 END IF
1824
1825 ! this check is not really needed
1826 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1827 filter_eps=threshold)
1828 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1829 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1830 frob_matrix = dbcsr_frobenius_norm(tmp1)
1831 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1832 IF (unit_nr > 0) THEN
1833 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final NS sqrt iter ", i, occ_matrix, &
1834 frob_matrix/frob_matrix_base
1835 WRITE (unit_nr, '()')
1836 CALL m_flush(unit_nr)
1837 END IF
1838
1839 ! scale to proper end results
1840 CALL dbcsr_scale(matrix_sqrt, 1/sqrt(scaling))
1841 CALL dbcsr_scale(matrix_sqrt_inv, sqrt(scaling))
1842
1843 CALL dbcsr_release(tmp1)
1844 CALL dbcsr_release(tmp2)
1845 IF (order .GE. 4) THEN
1846 CALL dbcsr_release(tmp3)
1847 END IF
1848
1849 CALL timestop(handle)
1850
1851 END SUBROUTINE matrix_sqrt_newton_schulz
1852
1853! **************************************************************************************************
1854!> \brief compute the sqrt of a matrix via the general algorithm for the p-th root of Richters et al.
1855!> Commun. Comput. Phys., 25 (2019), pp. 564-585.
1856!> \param matrix_sqrt ...
1857!> \param matrix_sqrt_inv ...
1858!> \param matrix ...
1859!> \param threshold ...
1860!> \param order ...
1861!> \param eps_lanczos ...
1862!> \param max_iter_lanczos ...
1863!> \param symmetrize ...
1864!> \param converged ...
1865!> \par History
1866!> 2019.04 created [Robert Schade]
1867!> \author Robert Schade
1868! **************************************************************************************************
1869 SUBROUTINE matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, &
1870 eps_lanczos, max_iter_lanczos, symmetrize, converged)
1871 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1872 REAL(kind=dp), INTENT(IN) :: threshold
1873 INTEGER, INTENT(IN) :: order
1874 REAL(kind=dp), INTENT(IN) :: eps_lanczos
1875 INTEGER, INTENT(IN) :: max_iter_lanczos
1876 LOGICAL, OPTIONAL :: symmetrize, converged
1877
1878 CHARACTER(LEN=*), PARAMETER :: routinen = 'matrix_sqrt_proot'
1879
1880 INTEGER :: choose, handle, i, ii, j, unit_nr
1881 INTEGER(KIND=int_8) :: f, flop1, flop2, flop3, flop4, flop5
1882 LOGICAL :: arnoldi_converged, test, tsym
1883 REAL(kind=dp) :: conv, frob_matrix, frob_matrix_base, &
1884 max_ev, min_ev, occ_matrix, scaling, &
1885 t1, t2
1886 TYPE(cp_logger_type), POINTER :: logger
1887 TYPE(dbcsr_type) :: bk2a, matrixs, rmat, tmp1, tmp2, tmp3
1888
1889 CALL cite_reference(richters2018)
1890
1891 test = .false.
1892
1893 CALL timeset(routinen, handle)
1894
1895 logger => cp_get_default_logger()
1896 IF (logger%para_env%is_source()) THEN
1897 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
1898 ELSE
1899 unit_nr = -1
1900 END IF
1901
1902 IF (PRESENT(converged)) converged = .false.
1903 IF (PRESENT(symmetrize)) THEN
1904 tsym = symmetrize
1905 ELSE
1906 tsym = .true.
1907 END IF
1908
1909 ! for stability symmetry can not be assumed
1910 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1911 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1912 CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1913 CALL dbcsr_create(rmat, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1914 CALL dbcsr_create(matrixs, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1915
1916 CALL dbcsr_copy(matrixs, matrix)
1917 IF (1 .EQ. 1) THEN
1918 ! scale the matrix to get into the convergence range
1919 CALL arnoldi_extremal(matrixs, max_ev, min_ev, threshold=eps_lanczos, &
1920 max_iter=max_iter_lanczos, converged=arnoldi_converged)
1921 IF (unit_nr > 0) THEN
1922 WRITE (unit_nr, *)
1923 WRITE (unit_nr, '(T6,A,1X,L1,A,E12.3)') "Lanczos converged: ", arnoldi_converged, " threshold:", eps_lanczos
1924 WRITE (unit_nr, '(T6,A,1X,E12.3,E12.3)') "Est. extremal eigenvalues:", max_ev, min_ev
1925 WRITE (unit_nr, '(T6,A,1X,E12.3)') "Est. condition number :", max_ev/max(min_ev, epsilon(min_ev))
1926 END IF
1927 ! conservatively assume we get a relatively large error (100*threshold_lanczos) in the estimates
1928 ! and adjust the scaling to be on the safe side
1929 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1930 CALL dbcsr_scale(matrixs, scaling)
1931 CALL dbcsr_filter(matrixs, threshold)
1932 ELSE
1933 scaling = 1.0_dp
1934 END IF
1935
1936 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1937 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1938 !CALL dbcsr_filter(matrix_sqrt_inv, threshold)
1939
1940 IF (unit_nr > 0) THEN
1941 WRITE (unit_nr, *)
1942 WRITE (unit_nr, *) "Order=", order
1943 END IF
1944
1945 DO i = 1, 100
1946
1947 t1 = m_walltime()
1948 IF (1 .EQ. 1) THEN
1949 !build R=1-A B_K^2
1950 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp1, &
1951 filter_eps=threshold, flop=flop1)
1952 CALL dbcsr_multiply("N", "N", 1.0_dp, matrixs, tmp1, 0.0_dp, rmat, &
1953 filter_eps=threshold, flop=flop2)
1954 CALL dbcsr_scale(rmat, -1.0_dp)
1955 CALL dbcsr_add_on_diag(rmat, 1.0_dp)
1956
1957 flop4 = 0; flop5 = 0
1958 CALL dbcsr_set(tmp1, 0.0_dp)
1959 CALL dbcsr_add_on_diag(tmp1, 2.0_dp)
1960
1961 flop3 = 0
1962
1963 DO j = 2, order
1964 IF (j .EQ. 2) THEN
1965 CALL dbcsr_copy(tmp2, rmat)
1966 ELSE
1967 f = 0
1968 CALL dbcsr_copy(tmp3, tmp2)
1969 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, rmat, 0.0_dp, tmp2, &
1970 filter_eps=threshold, flop=f)
1971 flop3 = flop3 + f
1972 END IF
1973 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
1974 END DO
1975 ELSE
1976 CALL dbcsr_create(bk2a, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1977 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrixs, 0.0_dp, tmp3, &
1978 filter_eps=threshold, flop=flop1)
1979 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, tmp3, 0.0_dp, bk2a, &
1980 filter_eps=threshold, flop=flop2)
1981 CALL dbcsr_copy(rmat, bk2a)
1982 CALL dbcsr_add_on_diag(rmat, -1.0_dp)
1983
1984 CALL dbcsr_set(tmp1, 0.0_dp)
1985 CALL dbcsr_add_on_diag(tmp1, 1.0_dp)
1986
1987 CALL dbcsr_set(tmp2, 0.0_dp)
1988 CALL dbcsr_add_on_diag(tmp2, 1.0_dp)
1989
1990 flop3 = 0
1991 DO j = 1, order
1992 !choose=factorial(order)/(factorial(j)*factorial(order-j))
1993 choose = product((/(ii, ii=1, order)/))/(product((/(ii, ii=1, j)/))*product((/(ii, ii=1, order - j)/)))
1994 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, -1.0_dp*(-1)**j*choose)
1995 IF (j .LT. order) THEN
1996 f = 0
1997 CALL dbcsr_copy(tmp3, tmp2)
1998 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, bk2a, 0.0_dp, tmp2, &
1999 filter_eps=threshold, flop=f)
2000 flop3 = flop3 + f
2001 END IF
2002 END DO
2003 CALL dbcsr_release(bk2a)
2004 END IF
2005
2006 CALL dbcsr_copy(tmp3, matrix_sqrt_inv)
2007 CALL dbcsr_multiply("N", "N", 0.5_dp, tmp3, tmp1, 0.0_dp, matrix_sqrt_inv, &
2008 filter_eps=threshold, flop=flop4)
2009
2010 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2011
2012 ! done iterating
2013 t2 = m_walltime()
2014
2015 conv = dbcsr_frobenius_norm(rmat)
2016
2017 IF (unit_nr > 0) THEN
2018 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "PROOT sqrt iter ", i, occ_matrix, &
2019 conv, t2 - t1, &
2020 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0e6_dp*max(0.001_dp, t2 - t1))
2021 CALL m_flush(unit_nr)
2022 END IF
2023
2024 IF (abnormal_value(conv)) &
2025 cpabort("conv is an abnormal value (NaN/Inf).")
2026
2027 ! conv < SQRT(threshold)
2028 IF ((conv*conv) < threshold) THEN
2029 IF (PRESENT(converged)) converged = .true.
2030 EXIT
2031 END IF
2032
2033 END DO
2034
2035 ! scale to proper end results
2036 CALL dbcsr_scale(matrix_sqrt_inv, sqrt(scaling))
2037 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix, 0.0_dp, matrix_sqrt, &
2038 filter_eps=threshold, flop=flop5)
2039
2040 ! symmetrize the matrices as this is not guaranteed by the algorithm
2041 IF (tsym) THEN
2042 IF (unit_nr > 0) THEN
2043 WRITE (unit_nr, '(A20)') "SYMMETRIZING RESULTS"
2044 END IF
2045 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
2046 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
2047 CALL dbcsr_transposed(tmp1, matrix_sqrt)
2048 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
2049 END IF
2050
2051 ! this check is not really needed
2052 IF (test) THEN
2053 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
2054 filter_eps=threshold)
2055 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2056 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2057 frob_matrix = dbcsr_frobenius_norm(tmp1)
2058 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2059 IF (unit_nr > 0) THEN
2060 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final PROOT S^{-1/2} S^{1/2}-Eins error ", i, occ_matrix, &
2061 frob_matrix/frob_matrix_base
2062 WRITE (unit_nr, '()')
2063 CALL m_flush(unit_nr)
2064 END IF
2065
2066 ! this check is not really needed
2067 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp2, &
2068 filter_eps=threshold)
2069 CALL dbcsr_multiply("N", "N", +1.0_dp, tmp2, matrix, 0.0_dp, tmp1, &
2070 filter_eps=threshold)
2071 frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2072 CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2073 frob_matrix = dbcsr_frobenius_norm(tmp1)
2074 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2075 IF (unit_nr > 0) THEN
2076 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final PROOT S^{-1/2} S^{-1/2} S-Eins error ", i, occ_matrix, &
2077 frob_matrix/frob_matrix_base
2078 WRITE (unit_nr, '()')
2079 CALL m_flush(unit_nr)
2080 END IF
2081 END IF
2082
2083 CALL dbcsr_release(tmp1)
2084 CALL dbcsr_release(tmp2)
2085 CALL dbcsr_release(tmp3)
2086 CALL dbcsr_release(rmat)
2087 CALL dbcsr_release(matrixs)
2088
2089 CALL timestop(handle)
2090 END SUBROUTINE matrix_sqrt_proot
2091
2092! **************************************************************************************************
2093!> \brief ...
2094!> \param matrix_exp ...
2095!> \param matrix ...
2096!> \param omega ...
2097!> \param alpha ...
2098!> \param threshold ...
2099! **************************************************************************************************
2100 SUBROUTINE matrix_exponential(matrix_exp, matrix, omega, alpha, threshold)
2101 ! compute matrix_exp=omega*exp(alpha*matrix)
2102 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_exp, matrix
2103 REAL(kind=dp), INTENT(IN) :: omega, alpha, threshold
2104
2105 CHARACTER(LEN=*), PARAMETER :: routinen = 'matrix_exponential'
2106 REAL(dp), PARAMETER :: one = 1.0_dp, toll = 1.e-17_dp, &
2107 zero = 0.0_dp
2108
2109 INTEGER :: handle, i, k, unit_nr
2110 REAL(dp) :: factorial, norm_c, norm_d, norm_scalar
2111 TYPE(cp_logger_type), POINTER :: logger
2112 TYPE(dbcsr_type) :: b, b_square, c, d, d_product
2113
2114 CALL timeset(routinen, handle)
2115
2116 logger => cp_get_default_logger()
2117 IF (logger%para_env%is_source()) THEN
2118 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2119 ELSE
2120 unit_nr = -1
2121 END IF
2122
2123 ! Calculate the norm of the matrix alpha*matrix, and scale it until it is less than 1.0
2124 norm_scalar = abs(alpha)*dbcsr_frobenius_norm(matrix)
2125
2126 ! k=scaling parameter
2127 k = 1
2128 DO
2129 IF ((norm_scalar/2.0_dp**k) <= one) EXIT
2130 k = k + 1
2131 END DO
2132
2133 ! copy and scale the input matrix in matrix C and in matrix D
2134 CALL dbcsr_create(c, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2135 CALL dbcsr_copy(c, matrix)
2136 CALL dbcsr_scale(c, alpha_scalar=alpha/2.0_dp**k)
2137
2138 CALL dbcsr_create(d, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2139 CALL dbcsr_copy(d, c)
2140
2141 ! write(*,*)
2142 ! write(*,*)
2143 ! CALL dbcsr_print(D, nodata=.FALSE., matlab_format=.TRUE., variable_name="D", unit_nr=6)
2144
2145 ! set the B matrix as B=Identity+D
2146 CALL dbcsr_create(b, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2147 CALL dbcsr_copy(b, d)
2148 CALL dbcsr_add_on_diag(b, alpha_scalar=one)
2149
2150 ! CALL dbcsr_print(B, nodata=.FALSE., matlab_format=.TRUE., variable_name="B", unit_nr=6)
2151
2152 ! Calculate the norm of C and moltiply by toll to be used as a threshold
2153 norm_c = toll*dbcsr_frobenius_norm(matrix)
2154
2155 ! iteration for the truncated taylor series expansion
2156 CALL dbcsr_create(d_product, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2157 i = 1
2158 DO
2159 i = i + 1
2160 ! compute D_product=D*C
2161 CALL dbcsr_multiply("N", "N", one, d, c, &
2162 zero, d_product, filter_eps=threshold)
2163
2164 ! copy D_product in D
2165 CALL dbcsr_copy(d, d_product)
2166
2167 ! calculate B=B+D_product/fat(i)
2168 factorial = ifac(i)
2169 CALL dbcsr_add(b, d_product, one, factorial)
2170
2171 ! check for convergence using the norm of D (copy of the matrix D_product) and C
2172 norm_d = factorial*dbcsr_frobenius_norm(d)
2173 IF (norm_d < norm_c) EXIT
2174 END DO
2175
2176 ! start the k iteration for the squaring of the matrix
2177 CALL dbcsr_create(b_square, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2178 DO i = 1, k
2179 !compute B_square=B*B
2180 CALL dbcsr_multiply("N", "N", one, b, b, &
2181 zero, b_square, filter_eps=threshold)
2182 ! copy Bsquare in B to iterate
2183 CALL dbcsr_copy(b, b_square)
2184 END DO
2185
2186 ! copy B_square in matrix_exp and
2187 CALL dbcsr_copy(matrix_exp, b_square)
2188
2189 ! scale matrix_exp by omega, matrix_exp=omega*B_square
2190 CALL dbcsr_scale(matrix_exp, alpha_scalar=omega)
2191 ! write(*,*) alpha,omega
2192
2193 CALL dbcsr_release(b)
2194 CALL dbcsr_release(c)
2195 CALL dbcsr_release(d)
2196 CALL dbcsr_release(d_product)
2197 CALL dbcsr_release(b_square)
2198
2199 CALL timestop(handle)
2200
2201 END SUBROUTINE matrix_exponential
2202
2203! **************************************************************************************************
2204!> \brief McWeeny purification of a matrix in the orthonormal basis
2205!> \param matrix_p Matrix to purify (needs to be almost idempotent already)
2206!> \param threshold Threshold used as filter_eps and convergence criteria
2207!> \param max_steps Max number of iterations
2208!> \par History
2209!> 2013.01 created [Florian Schiffmann]
2210!> 2014.07 slightly refactored [Ole Schuett]
2211!> \author Florian Schiffmann
2212! **************************************************************************************************
2213 SUBROUTINE purify_mcweeny_orth(matrix_p, threshold, max_steps)
2214 TYPE(dbcsr_type), DIMENSION(:) :: matrix_p
2215 REAL(KIND=dp) :: threshold
2216 INTEGER :: max_steps
2217
2218 CHARACTER(LEN=*), PARAMETER :: routineN = 'purify_mcweeny_orth'
2219
2220 INTEGER :: handle, i, ispin, unit_nr
2221 REAL(KIND=dp) :: frob_norm, trace
2222 TYPE(cp_logger_type), POINTER :: logger
2223 TYPE(dbcsr_type) :: matrix_pp, matrix_tmp
2224
2225 CALL timeset(routinen, handle)
2226 logger => cp_get_default_logger()
2227 IF (logger%para_env%is_source()) THEN
2228 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2229 ELSE
2230 unit_nr = -1
2231 END IF
2232
2233 CALL dbcsr_create(matrix_pp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2234 CALL dbcsr_create(matrix_tmp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2235 CALL dbcsr_trace(matrix_p(1), trace)
2236
2237 DO ispin = 1, SIZE(matrix_p)
2238 DO i = 1, max_steps
2239 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p(ispin), matrix_p(ispin), &
2240 0.0_dp, matrix_pp, filter_eps=threshold)
2241
2242 ! test convergence
2243 CALL dbcsr_copy(matrix_tmp, matrix_pp)
2244 CALL dbcsr_add(matrix_tmp, matrix_p(ispin), 1.0_dp, -1.0_dp)
2245 frob_norm = dbcsr_frobenius_norm(matrix_tmp) ! tmp = PP - P
2246 IF (unit_nr > 0) WRITE (unit_nr, '(t3,a,f16.8)') "McWeeny: Deviation of idempotency", frob_norm
2247 IF (unit_nr > 0) CALL m_flush(unit_nr)
2248
2249 ! construct new P
2250 CALL dbcsr_copy(matrix_tmp, matrix_pp)
2251 CALL dbcsr_multiply("N", "N", -2.0_dp, matrix_pp, matrix_p(ispin), &
2252 3.0_dp, matrix_tmp, filter_eps=threshold)
2253 CALL dbcsr_copy(matrix_p(ispin), matrix_tmp) ! tmp = 3PP - 2PPP
2254
2255 ! frob_norm < SQRT(trace*threshold)
2256 IF (frob_norm*frob_norm < trace*threshold) EXIT
2257 END DO
2258 END DO
2259
2260 CALL dbcsr_release(matrix_pp)
2261 CALL dbcsr_release(matrix_tmp)
2262 CALL timestop(handle)
2263 END SUBROUTINE purify_mcweeny_orth
2264
2265! **************************************************************************************************
2266!> \brief McWeeny purification of a matrix in the non-orthonormal basis
2267!> \param matrix_p Matrix to purify (needs to be almost idempotent already)
2268!> \param matrix_s Overlap-Matrix
2269!> \param threshold Threshold used as filter_eps and convergence criteria
2270!> \param max_steps Max number of iterations
2271!> \par History
2272!> 2013.01 created [Florian Schiffmann]
2273!> 2014.07 slightly refactored [Ole Schuett]
2274!> \author Florian Schiffmann
2275! **************************************************************************************************
2276 SUBROUTINE purify_mcweeny_nonorth(matrix_p, matrix_s, threshold, max_steps)
2277 TYPE(dbcsr_type), DIMENSION(:) :: matrix_p
2278 TYPE(dbcsr_type) :: matrix_s
2279 REAL(KIND=dp) :: threshold
2280 INTEGER :: max_steps
2281
2282 CHARACTER(LEN=*), PARAMETER :: routineN = 'purify_mcweeny_nonorth'
2283
2284 INTEGER :: handle, i, ispin, unit_nr
2285 REAL(KIND=dp) :: frob_norm, trace
2286 TYPE(cp_logger_type), POINTER :: logger
2287 TYPE(dbcsr_type) :: matrix_ps, matrix_psp, matrix_test
2288
2289 CALL timeset(routinen, handle)
2290
2291 logger => cp_get_default_logger()
2292 IF (logger%para_env%is_source()) THEN
2293 unit_nr = cp_logger_get_default_unit_nr(logger, local=.true.)
2294 ELSE
2295 unit_nr = -1
2296 END IF
2297
2298 CALL dbcsr_create(matrix_ps, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2299 CALL dbcsr_create(matrix_psp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2300 CALL dbcsr_create(matrix_test, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2301
2302 DO ispin = 1, SIZE(matrix_p)
2303 DO i = 1, max_steps
2304 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p(ispin), matrix_s, &
2305 0.0_dp, matrix_ps, filter_eps=threshold)
2306 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_ps, matrix_p(ispin), &
2307 0.0_dp, matrix_psp, filter_eps=threshold)
2308 IF (i == 1) CALL dbcsr_trace(matrix_ps, trace)
2309
2310 ! test convergence
2311 CALL dbcsr_copy(matrix_test, matrix_psp)
2312 CALL dbcsr_add(matrix_test, matrix_p(ispin), 1.0_dp, -1.0_dp)
2313 frob_norm = dbcsr_frobenius_norm(matrix_test) ! test = PSP - P
2314 IF (unit_nr > 0) WRITE (unit_nr, '(t3,a,2f16.8)') "McWeeny: Deviation of idempotency", frob_norm
2315 IF (unit_nr > 0) CALL m_flush(unit_nr)
2316
2317 ! construct new P
2318 CALL dbcsr_copy(matrix_p(ispin), matrix_psp)
2319 CALL dbcsr_multiply("N", "N", -2.0_dp, matrix_ps, matrix_psp, &
2320 3.0_dp, matrix_p(ispin), filter_eps=threshold)
2321
2322 ! frob_norm < SQRT(trace*threshold)
2323 IF (frob_norm*frob_norm < trace*threshold) EXIT
2324 END DO
2325 END DO
2326
2327 CALL dbcsr_release(matrix_ps)
2328 CALL dbcsr_release(matrix_psp)
2329 CALL dbcsr_release(matrix_test)
2330 CALL timestop(handle)
2331 END SUBROUTINE purify_mcweeny_nonorth
2332
2333END MODULE iterate_matrix
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
arnoldi iteration using dbcsr
Definition arnoldi_api.F:15
subroutine, public arnoldi_extremal(matrix_a, max_ev, min_ev, converged, threshold, max_iter)
simple wrapper to estimate extremal eigenvalues with arnoldi, using the old lanczos interface this hi...
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public richters2018
various routines to log and control the output. The idea is that decisions about where to log should ...
recursive integer function, public cp_logger_get_default_unit_nr(logger, local, skip_not_ionode)
asks the default unit number of the given logger. try to use cp_logger_get_unit_nr
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public ls_scf_submatrix_sign_direct_muadj
integer, parameter, public ls_scf_submatrix_sign_ns
integer, parameter, public ls_scf_submatrix_sign_direct_muadj_lowmem
integer, parameter, public ls_scf_submatrix_sign_direct
Routines useful for iterative matrix calculations.
recursive subroutine, public determinant(matrix, det, threshold)
Computes the determinant of a symmetric positive definite matrix using the trace of the matrix logari...
subroutine, public matrix_sqrt_newton_schulz(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged)
compute the sqrt of a matrix via the sign function and the corresponding Newton-Schulz iterations the...
subroutine, public matrix_sign_submatrix(matrix_sign, matrix, threshold, sign_order, submatrix_sign_method)
Submatrix method.
subroutine, public invert_taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite diagonally dominant matrix
subroutine, public matrix_exponential(matrix_exp, matrix, omega, alpha, threshold)
...
subroutine, public matrix_sign_submatrix_mu_adjust(matrix_sign, matrix, mu, nelectron, threshold, variant)
Submatrix method with internal adjustment of chemical potential.
subroutine, public invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite matrix by Hotelling's method explicit symmetrization makes this ...
subroutine, public matrix_sign_proot(matrix_sign, matrix, threshold, sign_order)
compute the sign a matrix using the general algorithm for the p-th root of Richters et al....
subroutine, public matrix_sign_newton_schulz(matrix_sign, matrix, threshold, sign_order)
compute the sign a matrix using Newton-Schulz iterations
subroutine, public matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged)
compute the sqrt of a matrix via the general algorithm for the p-th root of Richters et al....
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:106
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:123
Definition of mathematical constants and functions.
real(kind=dp), dimension(0:maxfac), parameter, public ifac
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
logical function, public abnormal_value(a)
determines if a value is not normal (e.g. for Inf and Nan) based on IO to work also under optimizatio...
Definition mathlib.F:150
Routines for calculating a complex matrix exponential.
Definition matrix_exp.F:13
Interface to the message passing library MPI.
type of a logger, at the moment it contains just a print level starting at which level it should be l...