71 CHARACTER(LEN=*),
PARAMETER :: routinen =
'krylov_space_allocate'
73 INTEGER :: handle, ik, ispin, max_nmo, nao, nblock, &
78 CALL timeset(routinen, handle)
80 IF (.NOT.
ASSOCIATED(krylov_space%mo_conv))
THEN
81 NULLIFY (fm_struct_tmp, mo_coeff)
83 krylov_space%nkrylov = scf_control%diagonalization%nkrylov
84 krylov_space%nblock = scf_control%diagonalization%nblock_krylov
86 nk = krylov_space%nkrylov
87 nblock = krylov_space%nblock
90 ALLOCATE (krylov_space%mo_conv(nspin))
91 ALLOCATE (krylov_space%mo_refine(nspin))
92 ALLOCATE (krylov_space%chc_mat(nspin))
93 ALLOCATE (krylov_space%c_vec(nspin))
96 CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff, nao=nao, nmo=nmo)
97 CALL cp_fm_create(krylov_space%mo_conv(ispin), mo_coeff%matrix_struct)
98 CALL cp_fm_create(krylov_space%mo_refine(ispin), mo_coeff%matrix_struct)
99 NULLIFY (fm_struct_tmp)
101 para_env=mo_coeff%matrix_struct%para_env, &
102 context=mo_coeff%matrix_struct%context)
103 CALL cp_fm_create(krylov_space%chc_mat(ispin), fm_struct_tmp,
"chc")
104 CALL cp_fm_create(krylov_space%c_vec(ispin), fm_struct_tmp,
"vec")
106 max_nmo = max(max_nmo, nmo)
110 ALLOCATE (krylov_space%c_eval(max_nmo))
112 ALLOCATE (krylov_space%v_mat(nk))
114 NULLIFY (fm_struct_tmp)
116 para_env=mo_coeff%matrix_struct%para_env, &
117 context=mo_coeff%matrix_struct%context)
119 CALL cp_fm_create(krylov_space%v_mat(ik), matrix_struct=fm_struct_tmp, &
122 ALLOCATE (krylov_space%tmp_mat)
123 CALL cp_fm_create(krylov_space%tmp_mat, matrix_struct=fm_struct_tmp, &
129 NULLIFY (fm_struct_tmp)
131 para_env=mo_coeff%matrix_struct%para_env, &
132 context=mo_coeff%matrix_struct%context)
133 ALLOCATE (krylov_space%block1_mat)
134 CALL cp_fm_create(krylov_space%block1_mat, matrix_struct=fm_struct_tmp, &
136 ALLOCATE (krylov_space%block2_mat)
137 CALL cp_fm_create(krylov_space%block2_mat, matrix_struct=fm_struct_tmp, &
139 ALLOCATE (krylov_space%block3_mat)
140 CALL cp_fm_create(krylov_space%block3_mat, matrix_struct=fm_struct_tmp, &
145 NULLIFY (fm_struct_tmp)
147 para_env=mo_coeff%matrix_struct%para_env, &
148 context=mo_coeff%matrix_struct%context)
149 ALLOCATE (krylov_space%block4_mat)
150 CALL cp_fm_create(krylov_space%block4_mat, matrix_struct=fm_struct_tmp, &
152 ALLOCATE (krylov_space%block5_mat)
153 CALL cp_fm_create(krylov_space%block5_mat, matrix_struct=fm_struct_tmp, &
156 ALLOCATE (krylov_space%t_eval(ndim))
162 CALL timestop(handle)
183 eps_iter, ispin, check_moconv_only)
187 REAL(
dp),
DIMENSION(:),
POINTER :: eval
188 INTEGER,
INTENT(IN) :: nao
189 REAL(
dp),
INTENT(IN) :: eps_iter
190 INTEGER,
INTENT(IN) :: ispin
191 LOGICAL,
INTENT(IN),
OPTIONAL :: check_moconv_only
193 CHARACTER(LEN=*),
PARAMETER :: routinen =
'lanczos_refinement'
194 REAL(kind=
dp),
PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
197 INTEGER :: hand1, hand2, hand3, hand4, hand5, handle, ib, ik, imo, imo_low, imo_up, it, jt, &
198 nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, num_blocks
199 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: itaken
200 LOGICAL :: my_check_moconv_only
201 REAL(
dp) :: max_norm, min_norm, vmax
202 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: q_mat, tblock, tvblock
203 REAL(
dp),
DIMENSION(:),
POINTER :: c_res, t_eval
206 TYPE(
cp_fm_type),
DIMENSION(:),
POINTER :: v_mat
207 TYPE(
cp_fm_type),
POINTER :: a_mat, b2_mat, b_mat, chc, evec, t_mat, &
210 CALL timeset(routinen, handle)
212 NULLIFY (fm_struct_tmp)
214 NULLIFY (c_res, t_eval)
215 NULLIFY (t_mat, t_vec)
216 NULLIFY (a_mat, b_mat, b2_mat, v_mat)
219 my_check_moconv_only = .false.
220 IF (
PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
222 chc => krylov_space%chc_mat(ispin)
223 evec => krylov_space%c_vec(ispin)
224 c_res => krylov_space%c_eval
225 t_eval => krylov_space%t_eval
227 NULLIFY (fm_struct_tmp)
229 para_env=c0%matrix_struct%para_env, &
230 context=c0%matrix_struct%context)
238 CALL parallel_gemm(
'N',
'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
239 CALL parallel_gemm(
'T',
'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
242 CALL timeset(routinen//
"diag_chc", hand1)
247 CALL parallel_gemm(
'N',
'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
250 CALL parallel_gemm(
'N',
'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
262 max_norm = max(max_norm, c_res(imo))
263 min_norm = min(min_norm, c_res(imo))
266 IF (c_res(imo) <= eps_iter)
THEN
267 nmo_converged = nmo_converged + 1
269 nmo_nc = nmo - nmo_converged
274 nblock = krylov_space%nblock
275 num_blocks = nmo_nc/nblock
277 krylov_space%nmo_nc = nmo_nc
278 krylov_space%nmo_conv = nmo_converged
279 krylov_space%max_res_norm = max_norm
280 krylov_space%min_res_norm = min_norm
282 IF (my_check_moconv_only)
THEN
285 CALL timestop(handle)
287 ELSE IF (krylov_space%nmo_nc > 0)
THEN
289 CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
291 nblock = krylov_space%nblock
292 IF (
modulo(nmo_nc, nblock) > 0.0_dp)
THEN
293 num_blocks = nmo_nc/nblock + 1
295 num_blocks = nmo_nc/nblock
298 DO ib = 1, num_blocks
300 imo_low = (ib - 1)*nblock + 1
301 imo_up = min(ib*nblock, nmo_nc)
302 nmob = imo_up - imo_low + 1
303 ndim = krylov_space%nkrylov*nmob
305 NULLIFY (fm_struct_tmp)
307 para_env=c0%matrix_struct%para_env, &
308 context=c0%matrix_struct%context)
309 CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
312 NULLIFY (fm_struct_tmp)
314 para_env=c0%matrix_struct%para_env, &
315 context=c0%matrix_struct%context)
316 CALL cp_fm_create(c3_tmp, matrix_struct=fm_struct_tmp, &
321 IF (nmob /= nblock)
THEN
322 NULLIFY (a_mat, b_mat, b2_mat, t_mat, t_vec, v_mat)
323 ALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
324 NULLIFY (fm_struct_tmp)
326 para_env=chc%matrix_struct%para_env, &
327 context=chc%matrix_struct%context)
332 CALL cp_fm_create(b2_mat, matrix_struct=fm_struct_tmp, &
335 NULLIFY (fm_struct_tmp)
337 para_env=chc%matrix_struct%para_env, &
338 context=chc%matrix_struct%context)
344 NULLIFY (fm_struct_tmp)
346 para_env=c0%matrix_struct%para_env, &
347 context=c0%matrix_struct%context)
348 ALLOCATE (v_mat(krylov_space%nkrylov))
349 DO ik = 1, krylov_space%nkrylov
350 CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
355 a_mat => krylov_space%block1_mat
356 b_mat => krylov_space%block2_mat
357 b2_mat => krylov_space%block3_mat
358 t_mat => krylov_space%block4_mat
359 t_vec => krylov_space%block5_mat
360 v_mat => krylov_space%v_mat
363 ALLOCATE (tblock(nmob, nmob))
364 ALLOCATE (tvblock(nmob, ndim))
366 CALL timeset(routinen//
"_kry_loop", hand2)
372 CALL parallel_gemm(
'N',
'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
373 CALL parallel_gemm(
'T',
'N', nmob, nmob, nao, rone, v_mat(1), hc, &
378 CALL parallel_gemm(
'N',
'N', nao, nmob, nmob, rone, v_mat(1), a_mat, &
386 DO ik = 2, krylov_space%nkrylov
394 n_rows=nao, n_cols=nmob)
397 CALL parallel_gemm(
'N',
'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
398 CALL parallel_gemm(
'T',
'N', nmob, nmob, nao, rone, v_mat(ik), hc, rzero, a_mat)
401 CALL parallel_gemm(
'N',
'N', nao, nmob, nmob, rone, v_mat(ik), a_mat, &
406 n_rows=nao, n_cols=nmob, alpha=rmone)
410 it = (ik - 2)*nmob + 1
411 jt = (ik - 1)*nmob + 1
424 CALL timeset(routinen//
"_diag_tri", hand3)
430 CALL timeset(routinen//
"_build_cnew", hand4)
433 DO ik = 1, krylov_space%nkrylov
435 CALL parallel_gemm(
'N',
'N', nao, ndim, nmob, rone, v_mat(ik), t_vec, rone, c2_tmp, &
436 b_first_row=(jt + 1))
441 CALL parallel_gemm(
'T',
'N', nmob, ndim, nao, rone, v_mat(1), c2_tmp, rzero, c3_tmp)
444 ALLOCATE (q_mat(nmob, ndim))
448 ALLOCATE (itaken(ndim))
454 IF (itaken(jt) == 0 .AND. abs(q_mat(it, jt)) > vmax)
THEN
455 vmax = abs(q_mat(it, jt))
467 CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
470 IF (nmob < nblock)
THEN
476 DEALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
483 CALL timeset(routinen//
"_ortho", hand5)
484 CALL parallel_gemm(
'T',
'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
495 CALL timestop(handle)
499 CALL timestop(handle)
518 eps_iter, ispin, check_moconv_only)
522 REAL(
dp),
DIMENSION(:),
POINTER :: eval
523 INTEGER,
INTENT(IN) :: nao
524 REAL(
dp),
INTENT(IN) :: eps_iter
525 INTEGER,
INTENT(IN) :: ispin
526 LOGICAL,
INTENT(IN),
OPTIONAL :: check_moconv_only
528 CHARACTER(LEN=*),
PARAMETER :: routinen =
'lanczos_refinement_2v'
529 REAL(kind=
dp),
PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
532 INTEGER :: hand1, hand2, hand3, hand4, hand5, hand6, handle, i, ia, ib, ik, imo, imo_low, &
533 imo_up, info, it, j, jt, liwork, lwork, nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, &
535 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: itaken
536 INTEGER,
DIMENSION(:),
POINTER :: iwork
537 LOGICAL :: my_check_moconv_only
538 REAL(
dp) :: max_norm, min_norm, vmax
539 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: a_block, b_block, q_mat, t_mat
540 REAL(
dp),
DIMENSION(:),
POINTER :: c_res, t_eval
541 REAL(kind=
dp),
DIMENSION(:),
POINTER :: work
542 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: a_loc, b_loc
544 TYPE(
cp_fm_type) :: b_mat, c2_tmp, c_tmp, hc, v_tmp
545 TYPE(
cp_fm_type),
ALLOCATABLE,
DIMENSION(:) :: v_mat
548 CALL timeset(routinen, handle)
550 NULLIFY (fm_struct_tmp)
552 NULLIFY (c_res, t_eval)
553 NULLIFY (b_loc, a_loc)
556 my_check_moconv_only = .false.
557 IF (
PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
559 chc => krylov_space%chc_mat(ispin)
560 evec => krylov_space%c_vec(ispin)
561 c_res => krylov_space%c_eval
562 t_eval => krylov_space%t_eval
564 NULLIFY (fm_struct_tmp)
566 para_env=c0%matrix_struct%para_env, &
567 context=c0%matrix_struct%context)
575 CALL parallel_gemm(
'N',
'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
576 CALL parallel_gemm(
'T',
'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
579 CALL timeset(routinen//
"diag_chc", hand1)
584 CALL timeset(routinen//
"check_conv", hand6)
586 CALL parallel_gemm(
'N',
'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
589 CALL parallel_gemm(
'N',
'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
601 max_norm = max(max_norm, c_res(imo))
602 min_norm = min(min_norm, c_res(imo))
605 IF (c_res(imo) <= eps_iter)
THEN
606 nmo_converged = nmo_converged + 1
608 nmo_nc = nmo - nmo_converged
617 krylov_space%nmo_nc = nmo_nc
618 krylov_space%nmo_conv = nmo_converged
619 krylov_space%max_res_norm = max_norm
620 krylov_space%min_res_norm = min_norm
622 IF (my_check_moconv_only)
THEN
624 ELSE IF (krylov_space%nmo_nc > 0)
THEN
626 CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
628 nblock = krylov_space%nblock
629 IF (
modulo(nmo_nc, nblock) > 0.0_dp)
THEN
630 num_blocks = nmo_nc/nblock + 1
632 num_blocks = nmo_nc/nblock
635 DO ib = 1, num_blocks
637 imo_low = (ib - 1)*nblock + 1
638 imo_up = min(ib*nblock, nmo_nc)
639 nmob = imo_up - imo_low + 1
640 ndim = krylov_space%nkrylov*nmob
643 CALL timeset(routinen//
"alloc", hand6)
644 ALLOCATE (a_block(nmob, nmob))
645 ALLOCATE (b_block(nmob, nmob))
646 ALLOCATE (t_mat(ndim, ndim))
648 NULLIFY (fm_struct_tmp)
657 para_env=c0%matrix_struct%para_env, &
658 context=c0%matrix_struct%context, &
667 NULLIFY (fm_struct_tmp)
668 ALLOCATE (v_mat(krylov_space%nkrylov))
671 para_env=c0%matrix_struct%para_env, &
672 context=c0%matrix_struct%context, &
674 DO ik = 1, krylov_space%nkrylov
675 CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
681 NULLIFY (fm_struct_tmp)
684 para_env=c0%matrix_struct%para_env, &
685 context=c0%matrix_struct%context, &
687 CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
691 NULLIFY (fm_struct_tmp)
693 para_env=c0%matrix_struct%para_env, &
694 context=c0%matrix_struct%context)
705 CALL timeset(routinen//
"_kry_loop", hand2)
707 CALL parallel_gemm(
'N',
'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
710 a_loc => v_mat(1)%local_data
711 b_loc => hc%local_data
713 IF (
SIZE(hc%local_data, 2) == nmob)
THEN
715 CALL dgemm(
'T',
'N', nmob, nmob,
SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
716 SIZE(hc%local_data, 1), b_loc(1, 1),
SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
718 CALL hc%matrix_struct%para_env%sum(a_block)
722 c_tmp%local_data = 0.0_dp
723 IF (
SIZE(c_tmp%local_data, 2) == nmob)
THEN
724 b_loc => c_tmp%local_data
725 CALL dgemm(
'N',
'N',
SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
726 SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
727 b_loc(1, 1),
SIZE(c_tmp%local_data, 1))
734 t_mat(1:nmob, i) = a_block(1:nmob, i)
737 DO ik = 2, krylov_space%nkrylov
744 n_rows=nao, n_cols=nmob)
749 CALL parallel_gemm(
'N',
'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
752 IF (
SIZE(hc%local_data, 2) == nmob)
THEN
753 a_loc => v_mat(ik)%local_data
754 b_loc => hc%local_data
755 CALL dgemm(
'T',
'N', nmob, nmob,
SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
756 SIZE(hc%local_data, 1), b_loc(1, 1),
SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
758 CALL hc%matrix_struct%para_env%sum(a_block)
762 c_tmp%local_data = 0.0_dp
763 IF (
SIZE(c_tmp%local_data, 2) == nmob)
THEN
764 a_loc => v_mat(ik)%local_data
765 b_loc => c_tmp%local_data
766 CALL dgemm(
'N',
'N',
SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
767 SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
768 b_loc(1, 1),
SIZE(c_tmp%local_data, 1))
772 IF (
SIZE(c_tmp%local_data, 2) == nmob)
THEN
773 a_loc => v_mat(ik - 1)%local_data
776 DO ia = 1,
SIZE(c_tmp%local_data, 1)
777 b_loc(ia, i) = b_loc(ia, i) - a_loc(ia, j)*b_block(i, j)
787 t_mat(jt + 1:jt + nmob, jt + j) = a_block(1:nmob, j)
789 t_mat(it + i, jt + j) = b_block(j, i)
790 t_mat(jt + j, it + i) = b_block(j, i)
796 CALL timeset(routinen//
"_diag_tri", hand3)
797 lwork = 1 + 6*ndim + 2*ndim**2 + 5000
799 ALLOCATE (work(lwork))
800 ALLOCATE (iwork(liwork))
803 CALL dsyevd(
'V',
'U', ndim, t_mat(1, 1), ndim, t_eval(1), &
804 work(1), lwork, iwork(1), liwork, info)
809 CALL timeset(routinen//
"_build_cnew", hand4)
812 c2_tmp%local_data = 0.0_dp
813 ALLOCATE (q_mat(nmob, ndim))
815 b_loc => c2_tmp%local_data
816 DO ik = 1, krylov_space%nkrylov
818 IF (
SIZE(c2_tmp%local_data, 2) == ndim)
THEN
820 a_loc => v_tmp%local_data
822 CALL dgemm(
'N',
'N',
SIZE(c2_tmp%local_data, 1), ndim, nmob, 1.0_dp, a_loc(1, 1), &
823 SIZE(c2_tmp%local_data, 1), t_mat(it + 1, 1), ndim, 1.0_dp, &
824 b_loc(1, 1),
SIZE(c2_tmp%local_data, 1))
830 IF (
SIZE(c2_tmp%local_data, 2) == ndim)
THEN
832 a_loc => v_tmp%local_data
833 b_loc => c2_tmp%local_data
834 CALL dgemm(
'T',
'N', nmob, ndim,
SIZE(v_tmp%local_data, 1), 1.0_dp, a_loc(1, 1), &
835 SIZE(v_tmp%local_data, 1), b_loc(1, 1),
SIZE(v_tmp%local_data, 1), &
836 0.0_dp, q_mat(1, 1), nmob)
838 CALL hc%matrix_struct%para_env%sum(q_mat)
840 ALLOCATE (itaken(ndim))
846 IF (itaken(jt) == 0 .AND. abs(q_mat(it, jt)) > vmax)
THEN
847 vmax = abs(q_mat(it, jt))
859 CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
876 CALL timeset(routinen//
"_ortho", hand5)
877 CALL parallel_gemm(
'T',
'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
886 CALL timestop(handle)