449 INTEGER,
INTENT(in) :: nsquare, npade
451 CHARACTER(len=*),
PARAMETER :: routinen =
'exp_pade_only_imaginary'
452 REAL(kind=
dp),
PARAMETER :: rone = 1.0_dp, rzero = 0.0_dp
454 COMPLEX(KIND=dp) :: scaled, scalen
455 INTEGER :: handle, i, j, k, ldim, ndim, p, q
456 REAL(kind=
dp) :: my_fac, square_fac
457 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :), &
458 POINTER :: local_data_im
464 CALL timeset(routinen, handle)
468 CALL cp_fm_get_info(im_part, local_data=local_data_im, ncol_local=ldim, nrow_global=ndim)
469 square_fac = 1.0_dp/(2.0_dp**real(nsquare,
dp))
472 matrix_struct=im_part%matrix_struct, &
476 matrix_struct=dpq%matrix_struct, &
480 matrix_struct=dpq%matrix_struct, &
484 matrix_struct=t1%matrix_struct, &
488 matrix_struct=t1%matrix_struct, &
501 npq%local_data(:, i) = npq%local_data(:, i) + cmplx(rzero, 0.5_dp*square_fac*local_data_im(:, i),
dp)
502 dpq%local_data(:, i) = dpq%local_data(:, i) - cmplx(rzero, 0.5_dp*square_fac*local_data_im(:, i),
dp)
505 IF (npade .GT. 2)
THEN
506 DO j = 1, floor(npade/2.0_dp)
509 IF (i .LE. p) scalen = cmplx(my_fac*
fac(p + q - i)*
fac(p)/(
fac(p + q)*
fac(i)*
fac(p - i)), 0.0_dp,
dp)
510 scaled = cmplx(my_fac*
fac(p + q - i)*
fac(q)/(
fac(p + q)*
fac(i)*
fac(q - i)), 0.0_dp,
dp)
511 CALL parallel_gemm(
"N",
"N", ndim, ndim, ndim, square_fac, im_part, t2, rzero, tres)
514 npq%local_data(:, k) = npq%local_data(:, k) + scalen*tres%local_data(:, k)
515 dpq%local_data(:, k) = dpq%local_data(:, k) + scaled*tres%local_data(:, k)
518 IF (2*j + 1 .LE. q)
THEN
520 IF (i .LE. p) scalen = cmplx(my_fac*
fac(p + q - i)*
fac(p)/(
fac(p + q)*
fac(i)*
fac(p - i)), rzero,
dp)
521 scaled = cmplx(-my_fac*
fac(p + q - i)*
fac(q)/(
fac(p + q)*
fac(i)*
fac(q - i)), rzero,
dp)
522 CALL parallel_gemm(
"N",
"N", ndim, ndim, ndim, square_fac, im_part, tres, rzero, t2)
525 npq%local_data(:, k) = npq%local_data(:, k) + scalen*cmplx(rzero, t2%local_data(:, k),
dp)
526 dpq%local_data(:, k) = dpq%local_data(:, k) + scaled*cmplx(rzero, t2%local_data(:, k),
dp)
536 IF (nsquare .GT. 0)
THEN
538 CALL parallel_gemm(
"N",
"N", ndim, ndim, ndim,
z_one, cmult_p(mod(i, 2) + 1), cmult_p(mod(i, 2) + 1),
z_zero, &
539 cmult_p(mod(i + 1, 2) + 1))
540 fin_p = cmult_p(mod(i + 1, 2) + 1)
547 exp_h(1)%local_data(:, k) = real(fin_p%local_data(:, k), kind=
dp)
548 exp_h(2)%local_data(:, k) = aimag(fin_p%local_data(:, k))
556 CALL timestop(handle)
682 SUBROUTINE arnoldi(mos_old, mos_new, eps_exp, Hre, Him, mos_next, narn_old)
684 TYPE(
cp_fm_type),
DIMENSION(2) :: mos_old, mos_new
685 REAL(kind=
dp),
INTENT(in) :: eps_exp
688 TYPE(
cp_fm_type),
DIMENSION(2),
OPTIONAL :: mos_next
689 INTEGER,
INTENT(inout) :: narn_old
691 CHARACTER(len=*),
PARAMETER :: routinen =
'arnoldi'
692 REAL(kind=
dp),
PARAMETER :: rone = 1.0_dp, rzero = 0.0_dp
694 INTEGER :: handle, i, icol_local, idim, info, j, l, &
695 mydim, nao, narnoldi, ncol_local, &
696 newdim, nmo, npade, pade_step
697 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: ipivot
698 INTEGER,
DIMENSION(:),
POINTER :: col_indices, col_procs
699 LOGICAL :: convergence, double_col, double_row
700 REAL(
dp),
ALLOCATABLE,
DIMENSION(:) :: last_norm, norm1, results
701 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :) :: d, mat1, mat2, mat3, n
702 REAL(
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: h_approx, h_approx_save
703 REAL(kind=
dp) :: conv_norm, prefac, scaled, scalen
705 TYPE(
cp_fm_type),
ALLOCATABLE,
DIMENSION(:) :: v_mats
709 CALL timeset(routinen, handle)
710 para_env => mos_new(1)%matrix_struct%para_env
712 CALL cp_fm_get_info(mos_old(1), ncol_local=ncol_local, col_indices=col_indices, &
713 nrow_global=nao, ncol_global=nmo, matrix_struct=mo_struct)
714 narnoldi = min(18, nao)
716 ALLOCATE (results(ncol_local))
717 ALLOCATE (norm1(ncol_local))
718 ALLOCATE (v_mats(narnoldi + 1))
719 ALLOCATE (last_norm(ncol_local))
720 ALLOCATE (h_approx(narnoldi, narnoldi, ncol_local))
721 ALLOCATE (h_approx_save(narnoldi, narnoldi, ncol_local))
722 col_procs => mo_struct%context%blacs2mpi(:, mo_struct%context%mepos(2))
723 CALL col_group%from_reordering(para_env, col_procs)
728 h_approx_save = rzero
730 DO i = 1, narnoldi + 1
738 DO icol_local = 1, ncol_local
739 v_mats(1)%local_data(:, icol_local) = mos_old(1)%local_data(:, icol_local)
740 v_mats(1)%local_data(:, icol_local + ncol_local) = mos_old(2)%local_data(:, icol_local)
741 norm1(icol_local) = sum(v_mats(1)%local_data(:, icol_local)**2) &
742 + sum(v_mats(1)%local_data(:, icol_local + ncol_local)**2)
745 CALL col_group%sum(norm1)
747 norm1(:) = sqrt(norm1(:))
750 DO icol_local = 1, ncol_local
751 v_mats(1)%local_data(:, icol_local) = v_mats(1)%local_data(:, icol_local)/norm1(icol_local)
752 v_mats(1)%local_data(:, icol_local + ncol_local) = &
753 v_mats(1)%local_data(:, icol_local + ncol_local)/norm1(icol_local)
757 DO i = 2, narnoldi + 1
759 CALL parallel_gemm(
"N",
"N", nao, newdim, nao, 1.0_dp, him, v_mats(i - 1), 0.0_dp, v_mats(i))
762 DO icol_local = 1, ncol_local
763 mos_new(1)%local_data(:, icol_local) = v_mats(i)%local_data(:, icol_local)
764 v_mats(i)%local_data(:, icol_local) = -v_mats(i)%local_data(:, icol_local + ncol_local)
765 v_mats(i)%local_data(:, icol_local + ncol_local) = mos_new(1)%local_data(:, icol_local)
768 IF (
PRESENT(hre))
THEN
769 CALL parallel_gemm(
"N",
"N", nao, newdim, nao, 1.0_dp, hre, v_mats(i - 1), 1.0_dp, v_mats(i))
774 DO icol_local = 1, ncol_local
775 results(icol_local) = sum(v_mats(l)%local_data(:, icol_local)*v_mats(i)%local_data(:, icol_local)) + &
776 sum(v_mats(l)%local_data(:, icol_local + ncol_local)* &
777 v_mats(i)%local_data(:, icol_local + ncol_local))
780 CALL col_group%sum(results)
783 DO icol_local = 1, ncol_local
784 h_approx_save(l, i - 1, icol_local) = results(icol_local)
785 v_mats(i)%local_data(:, icol_local) = v_mats(i)%local_data(:, icol_local) - &
786 results(icol_local)*v_mats(l)%local_data(:, icol_local)
787 v_mats(i)%local_data(:, icol_local + ncol_local) = &
788 v_mats(i)%local_data(:, icol_local + ncol_local) - &
789 results(icol_local)*v_mats(l)%local_data(:, icol_local + ncol_local)
794 DO icol_local = 1, ncol_local
795 results(icol_local) = sum(v_mats(i)%local_data(:, icol_local)**2) + &
796 sum(v_mats(i)%local_data(:, icol_local + ncol_local)**2)
799 CALL col_group%sum(results)
801 IF (i .LE. narnoldi)
THEN
804 DO icol_local = 1, ncol_local
805 h_approx_save(i, i - 1, icol_local) = sqrt(results(icol_local))
806 last_norm(icol_local) = sqrt(results(icol_local))
807 v_mats(i)%local_data(:, icol_local) = v_mats(i)%local_data(:, icol_local)/sqrt(results(icol_local))
808 v_mats(i)%local_data(:, icol_local + ncol_local) = &
809 v_mats(i)%local_data(:, icol_local + ncol_local)/sqrt(results(icol_local))
813 DO icol_local = 1, ncol_local
814 last_norm(icol_local) = sqrt(results(icol_local))
818 h_approx(:, :, :) = h_approx_save
822 convergence = .false.
823 IF (i .GE. narn_old)
THEN
825 mydim = min(i, narnoldi)
826 ALLOCATE (ipivot(mydim))
827 ALLOCATE (mat1(mydim, mydim))
828 ALLOCATE (mat2(mydim, mydim))
829 ALLOCATE (mat3(mydim, mydim))
830 ALLOCATE (n(mydim, mydim))
831 ALLOCATE (d(mydim, mydim))
832 DO icol_local = 1, ncol_local
835 mat1(idim, j) = h_approx(idim, j, icol_local)/16.0_dp
836 mat3(idim, j) = mat1(idim, j)
845 n(:, :) = n + 0.5_dp*mat1
846 d(:, :) = d - 0.5_dp*mat1
849 pade_step = pade_step + 1
850 CALL dgemm(
"N",
'N', mydim, mydim, mydim, rone, mat1(1, 1), &
851 mydim, mat3(1, 1), mydim, rzero, mat2(1, 1), mydim)
852 scalen = real(
fac(2*npade - pade_step)*
fac(npade)/ &
853 (
fac(2*npade)*
fac(pade_step)*
fac(npade - pade_step)),
dp)
854 scaled = real((-1.0_dp)**pade_step*
fac(2*npade - pade_step)*
fac(npade)/ &
855 (
fac(2*npade)*
fac(pade_step)*
fac(npade - pade_step)),
dp)
856 n(:, :) = n + scalen*mat2
857 d(:, :) = d + scaled*mat2
858 pade_step = pade_step + 1
859 CALL dgemm(
"N",
'N', mydim, mydim, mydim, rone, mat2(1, 1), &
860 mydim, mat1(1, 1), mydim, rzero, mat3(1, 1), mydim)
861 scalen = real(
fac(2*npade - pade_step)*
fac(npade)/ &
862 (
fac(2*npade)*
fac(pade_step)*
fac(npade - pade_step)),
dp)
863 scaled = real((-1.0_dp)**pade_step*
fac(2*npade - pade_step)*
fac(npade)/ &
864 (
fac(2*npade)*
fac(pade_step)*
fac(npade - pade_step)),
dp)
865 n(:, :) = n + scalen*mat3
866 d(:, :) = d + scaled*mat3
869 CALL dgetrf(mydim, mydim, d(1, 1), mydim, ipivot, info)
870 CALL dgetrs(
"N", mydim, mydim, d(1, 1), mydim, ipivot, n, mydim, info)
871 CALL dgemm(
"N",
'N', mydim, mydim, mydim, rone, n(1, 1), mydim, n(1, 1), mydim, rzero, mat1(1, 1), mydim)
872 CALL dgemm(
"N",
'N', mydim, mydim, mydim, rone, mat1(1, 1), mydim, mat1(1, 1), mydim, rzero, n(1, 1), mydim)
873 CALL dgemm(
"N",
'N', mydim, mydim, mydim, rone, n(1, 1), mydim, n(1, 1), mydim, rzero, mat1(1, 1), mydim)
874 CALL dgemm(
"N",
'N', mydim, mydim, mydim, rone, mat1(1, 1), mydim, mat1(1, 1), mydim, rzero, n(1, 1), mydim)
877 h_approx(idim, j, icol_local) = n(idim, j)
884 DO icol_local = 1, ncol_local
885 results(icol_local) = last_norm(icol_local)*h_approx(i - 1, 1, icol_local)
886 conv_norm = max(conv_norm, abs(results(icol_local)))
889 CALL para_env%max(conv_norm)
891 IF (conv_norm .LT. eps_exp .OR. i .GT. narnoldi)
THEN
893 mos_new(1)%local_data = rzero
894 mos_new(2)%local_data = rzero
895 DO icol_local = 1, ncol_local
897 prefac = h_approx(idim, 1, icol_local)*norm1(icol_local)
898 mos_new(1)%local_data(:, icol_local) = mos_new(1)%local_data(:, icol_local) + &
899 v_mats(idim)%local_data(:, icol_local)*prefac
900 mos_new(2)%local_data(:, icol_local) = mos_new(2)%local_data(:, icol_local) + &
901 v_mats(idim)%local_data(:, icol_local + ncol_local)*prefac
905 IF (
PRESENT(mos_next))
THEN
906 DO icol_local = 1, ncol_local
909 n(idim, j) = h_approx(idim, j, icol_local)
912 CALL dgemm(
"N",
'N', mydim, mydim, mydim, rone, n(1, 1), mydim, n(1, 1), mydim, rzero, mat1(1, 1), mydim)
915 h_approx(idim, j, icol_local) = mat1(idim, j)
919 mos_next(1)%local_data = rzero
920 mos_next(2)%local_data = rzero
921 DO icol_local = 1, ncol_local
923 prefac = h_approx(idim, 1, icol_local)*norm1(icol_local)
924 mos_next(1)%local_data(:, icol_local) = &
925 mos_next(1)%local_data(:, icol_local) + &
926 v_mats(idim)%local_data(:, icol_local)*prefac
927 mos_next(2)%local_data(:, icol_local) = &
928 mos_next(2)%local_data(:, icol_local) + &
929 v_mats(idim)%local_data(:, icol_local + ncol_local)*prefac
933 IF (conv_norm .LT. eps_exp)
THEN
946 IF (convergence)
EXIT
949 cpwarn_if(.NOT. convergence,
"ARNOLDI method did not converge")
954 CALL col_group%free()
956 DEALLOCATE (h_approx)
957 DEALLOCATE (h_approx_save)
960 DEALLOCATE (last_norm)
961 CALL timestop(handle)