24 #include "../base/base_uses.f90"
30 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'ps_wavelet_kernel'
79 SUBROUTINE createkernel(geocode, n01, n02, n03, hx, hy, hz, itype_scf, iproc, nproc, kernel, mpi_group)
81 CHARACTER(len=1),
INTENT(in) :: geocode
82 INTEGER,
INTENT(in) :: n01, n02, n03
83 REAL(kind=
dp),
INTENT(in) :: hx, hy, hz
84 INTEGER,
INTENT(in) :: itype_scf, iproc, nproc
85 REAL(kind=
dp),
POINTER :: kernel(:)
87 CLASS(mp_comm_type),
INTENT(in) :: mpi_group
89 INTEGER :: m1, m2, m3, md1, md2, md3, n1, n2, n3, &
90 nd1, nd2, nd3, nlimd, nlimk
91 REAL(kind=
dp) :: hgrid
93 hgrid = max(hx, hy, hz)
95 IF (geocode ==
'P')
THEN
98 md1, md2, md3, nd1, nd2, nd3, nproc)
104 ELSE IF (geocode ==
'S')
THEN
107 md1, md2, md3, nd1, nd2, nd3, nproc)
109 ALLOCATE (kernel(nd1*nd2*nd3/nproc))
113 CALL surfaces_kernel(n1, n2, n3, m3, nd1, nd2, nd3, hx, hz, hy, &
114 itype_scf, kernel, iproc, nproc, mpi_group)
119 ELSE IF (geocode ==
'F')
THEN
124 md1, md2, md3, nd1, nd2, nd3, nproc)
125 ALLOCATE (kernel(nd1*nd2*nd3/nproc))
128 CALL free_kernel(n01, n02, n03, n1, n2, n3, nd1, nd2, nd3, hgrid, &
129 itype_scf, iproc, nproc, kernel, mpi_group)
137 cpabort(
"No wavelet based poisson solver for given geometry")
220 SUBROUTINE surfaces_kernel(n1, n2, n3, m3, nker1, nker2, nker3, h1, h2, h3, &
221 itype_scf, karray, iproc, nproc, mpi_group)
223 INTEGER,
INTENT(in) :: n1, n2, n3, m3, nker1, nker2, nker3
224 REAL(kind=
dp),
INTENT(in) :: h1, h2, h3
225 INTEGER,
INTENT(in) :: itype_scf, nproc, iproc
227 DIMENSION(nker1, nker2, nker3/nproc), &
228 INTENT(out) :: karray
229 TYPE(mp_comm_type),
INTENT(in) :: mpi_group
231 INTEGER,
PARAMETER :: n_points = 2**6, ncache_optimal = 8*1024
233 INTEGER :: i, i1, i2, i3, ic, iend, imu, ind1, ind2, inzee, ipolyord, ireim, istart, j2, &
234 j2nd, j2st, jnd1, jp2, jreim, n_cell, n_range, n_scf, nact2, ncache, nfft, num_of_mus, &
236 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: after, before, now
237 REAL(kind=
dp) :: a, b, c, cp, d, diff, dx, fei, fer, foi, &
238 for, fr, mu1, pi, pion, ponx, pony, &
240 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: kernel_scf, x_scf, y_scf
241 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: btrig, cossinarr
242 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: halfft_cache, kernel
243 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :, :) :: kernel_mpi
244 REAL(kind=
dp),
DIMENSION(9, 8) :: cpol
257 cpol(1, 3) = 1._dp/3._dp
259 cpol(1, 4) = 7._dp/12._dp
260 cpol(2, 4) = 8._dp/3._dp
262 cpol(1, 5) = 19._dp/50._dp
263 cpol(2, 5) = 3._dp/2._dp
265 cpol(1, 6) = 41._dp/272._dp
266 cpol(2, 6) = 27._dp/34._dp
267 cpol(3, 6) = 27._dp/272._dp
269 cpol(1, 7) = 751._dp/2989._dp
270 cpol(2, 7) = 73._dp/61._dp
271 cpol(3, 7) = 27._dp/61._dp
273 cpol(1, 8) = -989._dp/4540._dp
274 cpol(2, 8) = -1472._dp/1135._dp
275 cpol(3, 8) = 232._dp/1135._dp
276 cpol(4, 8) = -2624._dp/1135._dp
279 cpol(1, 1) = .5_dp*cpol(1, 1)
280 cpol(1:2, 2) = 2._dp/3._dp*cpol(1:2, 2)
281 cpol(1:2, 3) = 3._dp/8._dp*cpol(1:2, 3)
282 cpol(1:3, 4) = 2._dp/15._dp*cpol(1:3, 4)
283 cpol(1:3, 5) = 25._dp/144._dp*cpol(1:3, 5)
284 cpol(1:4, 6) = 34._dp/105._dp*cpol(1:4, 6)
285 cpol(1:4, 7) = 2989._dp/17280._dp*cpol(1:4, 7)
286 cpol(1:5, 8) = -454._dp/2835._dp*cpol(1:5, 8)
289 cpol(2, 1) = cpol(1, 1)
291 cpol(3, 2) = cpol(1, 2)
293 cpol(3, 3) = cpol(2, 3)
294 cpol(4, 3) = cpol(1, 3)
296 cpol(4, 4) = cpol(2, 4)
297 cpol(5, 4) = cpol(1, 4)
299 cpol(4, 5) = cpol(3, 5)
300 cpol(5, 5) = cpol(2, 5)
301 cpol(6, 5) = cpol(1, 5)
303 cpol(5, 6) = cpol(3, 6)
304 cpol(6, 6) = cpol(2, 6)
305 cpol(7, 6) = cpol(1, 6)
307 cpol(5, 7) = cpol(4, 7)
308 cpol(6, 7) = cpol(3, 7)
309 cpol(7, 7) = cpol(2, 7)
310 cpol(8, 7) = cpol(1, 7)
312 cpol(6, 8) = cpol(4, 8)
313 cpol(7, 8) = cpol(3, 8)
314 cpol(8, 8) = cpol(2, 8)
315 cpol(9, 8) = cpol(1, 8)
318 n_scf = 2*itype_scf*n_points
320 ALLOCATE (x_scf(0:n_scf))
321 ALLOCATE (y_scf(0:n_scf))
326 dx = real(n_range, kind=
dp)/real(n_scf, kind=
dp)
329 n_range = max(n_cell, n_range)
332 ncache = ncache_optimal
335 IF (ncache <= (nker3 - 1)*4) ncache = nker3 - 1*4
340 IF (nproc*(nact2/nproc) /= nact2)
THEN
348 ALLOCATE (kernel(nker1, nact2/nproc, nker3))
349 ALLOCATE (kernel_mpi(nker1, nact2/nproc, nker3/nproc, nproc))
350 ALLOCATE (kernel_scf(n_range))
351 ALLOCATE (halfft_cache(2, ncache/4, 2))
352 ALLOCATE (cossinarr(2, n3/2 - 1))
359 pi = 4._dp*atan(1._dp)
362 CALL ctrig(n3/2, btrig, after, before, now, 1, ic)
365 pion = 2._dp*pi/real(n3, kind=
dp)
367 x = real(i3 - 1, kind=
dp)*pion
368 cossinarr(1, i3 - 1) = cos(x)
369 cossinarr(2, i3 - 1) = -sin(x)
374 kernel_mpi = huge(0._dp)
378 num_of_mus = ncache/(2*n3)
386 j2st = iproc*(nact2/nproc)
387 j2nd = min((iproc + 1)*(nact2/nproc), n2/2 + 1)
389 DO ind2 = (n1/2 + 1)*j2st + 1, (n1/2 + 1)*j2nd, num_of_mus
391 iend = min(ind2 + (num_of_mus - 1), (n1/2 + 1)*j2nd)
392 nfft = iend - istart + 1
396 halfft_cache(:, :, :) = 0._dp
398 IF (istart == 1)
THEN
402 CALL calculates_green_opt_muzero(n_range, n_scf, ipolyord, x_scf, y_scf, &
403 cpol(1, ipolyord), dx, kernel_scf)
406 halfft_cache(1, 1, 1) = 0._dp
410 value = 0.5_dp*h3*kernel_scf(i3)
412 CALL indices(ireim, num_of_mus, n3/2 + i3, 1, ind1)
414 CALL indices(jreim, num_of_mus, n3/2 + 2 - i3, 1, jnd1)
415 halfft_cache(ireim, ind1, 1) =
value
416 halfft_cache(jreim, jnd1, 1) =
value
422 loopimpulses:
DO imu = istart + shift, iend
430 i1 = mod(imu, n1/2 + 1)
431 IF (i1 == 0) i1 = n1/2 + 1
432 i2 = (imu - i1)/(n1/2 + 1) + 1
433 ponx = real(i1 - 1, kind=
dp)/real(n1, kind=
dp)
434 pony = real(i2 - 1, kind=
dp)/real(n2, kind=
dp)
436 mu1 = 2._dp*pi*sqrt((ponx/h1)**2 + (pony/h2)**2)*h3
438 CALL calculates_green_opt(n_range, n_scf, itype_scf, ipolyord, x_scf, y_scf, &
439 cpol(1, ipolyord), mu1, dx, kernel_scf)
444 halfft_cache(1, imu - istart + 1, 1) = 0._dp
446 value = -0.5_dp*h3/mu1*kernel_scf(i3)
449 CALL indices(ireim, num_of_mus, n3/2 + i3, imu - istart + 1, ind1)
451 CALL indices(jreim, num_of_mus, n3/2 + 2 - i3, imu - istart + 1, jnd1)
452 halfft_cache(ireim, ind1, 1) =
value
453 halfft_cache(jreim, jnd1, 1) =
value
461 CALL fftstp(num_of_mus, nfft, n3/2, num_of_mus, n3/2, &
462 halfft_cache(1, 1, inzee), halfft_cache(1, 1, 3 - inzee), &
463 btrig, after(i), now(i), before(i), 1)
468 DO imu = istart, iend
471 i1 = mod(imu, n1/2 + 1)
472 IF (i1 == 0) i1 = n1/2 + 1
473 i2 = (imu - i1)/(n1/2 + 1) + 1
477 a = halfft_cache(1, imu - istart + 1, inzee)
478 b = halfft_cache(2, imu - istart + 1, inzee)
479 kernel(i1, j2, 1) = a + b
480 kernel(i1, j2, n3/2 + 1) = a - b
483 ind1 = imu - istart + 1 + num_of_mus*(i3 - 1)
484 jnd1 = imu - istart + 1 + num_of_mus*(n3/2 + 2 - i3 - 1)
485 cp = cossinarr(1, i3 - 1)
486 sp = cossinarr(2, i3 - 1)
487 a = halfft_cache(1, ind1, inzee)
488 b = halfft_cache(2, ind1, inzee)
489 c = halfft_cache(1, jnd1, inzee)
490 d = halfft_cache(2, jnd1, inzee)
495 fr = fer + cp*foi - sp*
for
496 kernel(i1, j2, i3) = fr
504 CALL mpi_group%alltoall(kernel, &
505 kernel_mpi, nker1*(nact2/nproc)*(nker3/nproc))
508 DO i3 = 1, nker3/nproc
509 DO i2 = 1, nact2/nproc
510 j2 = i2 + (jp2 - 1)*(nact2/nproc)
511 IF (j2 <= nker2)
THEN
513 karray(i1, j2, i3) = &
514 kernel_mpi(i1, i2, i3, jp2)
522 karray(1:nker1, 1:nker2, 1:nker3) = kernel(1:nker1, 1:nker2, 1:nker3)
527 DEALLOCATE (kernel_mpi)
532 DEALLOCATE (halfft_cache)
533 DEALLOCATE (kernel_scf)
537 END SUBROUTINE surfaces_kernel
552 SUBROUTINE calculates_green_opt(n, n_scf, itype_scf, intorder, xval, yval, c, mu, hres, g_mu)
553 INTEGER,
INTENT(in) :: n, n_scf, itype_scf, intorder
554 REAL(kind=
dp),
DIMENSION(0:n_scf),
INTENT(in) :: xval, yval
555 REAL(kind=
dp),
DIMENSION(intorder+1),
INTENT(in) :: c
556 REAL(kind=
dp),
INTENT(in) :: mu, hres
557 REAL(kind=
dp),
DIMENSION(n),
INTENT(out) :: g_mu
559 REAL(kind=
dp),
PARAMETER :: mu_max = 0.2_dp
561 INTEGER :: i, iend, ikern, ivalue, izero, n_iter, &
563 REAL(kind=
dp) :: f, filter, fl, fr, gleft, gltmp, gright, &
564 grtmp, mu0, ratio, x, x0, x1
565 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: green, green1
569 IF (mu <= mu_max)
THEN
575 ratio = real(2**n_iter, kind=
dp)
577 IF (mu0 <= mu_max)
THEN
587 ALLOCATE (green(-nrec:nrec))
593 IF (xval(izero) >= real(ikern, kind=
dp) .OR. izero == n_scf)
EXIT initialization
595 END DO initialization
601 IF (ivalue >= n_scf - intorder - 1)
EXIT loop_right
602 DO i = 1, intorder + 1
603 x = xval(ivalue) - real(ikern, kind=
dp)
604 f = yval(ivalue)*exp(-mu0*x)
605 filter = intorder*c(i)
606 gright = gright + filter*f
611 iend = n_scf - ivalue
613 x = xval(ivalue) - real(ikern, kind=
dp)
614 f = yval(ivalue)*exp(-mu0*x)
615 filter = intorder*c(i)
616 gright = gright + filter*f
624 green(ikern) = gleft + gright
633 IF (izero == n_scf)
EXIT loop_integration
634 DO i = 1, intorder + 1
636 fl = yval(ivalue)*exp(mu0*x)
637 fr = yval(ivalue)*exp(-mu0*x)
638 filter = intorder*c(i)
639 gltmp = gltmp + filter*fl
640 grtmp = grtmp + filter*fr
642 IF (xval(izero) >= real(ikern, kind=
dp) .OR. izero == n_scf)
THEN
644 EXIT loop_integration
650 END DO loop_integration
651 gleft = exp(-mu0)*(gleft + hres*exp(-mu0*real(ikern - 1, kind=
dp))*gltmp)
652 IF (izero == n_scf)
THEN
655 gright = exp(mu0)*(gright - hres*exp(mu0*real(ikern - 1, kind=
dp))*grtmp)
657 green(ikern) = gleft + gright
658 green(-ikern) = gleft + gright
659 IF (abs(green(ikern)) <= 1.e-20_dp)
THEN
666 ALLOCATE (green1(-nrec:nrec))
669 CALL scf_recursion(itype_scf, n_iter, nrec, green(-nrec), green1(-nrec))
671 DO i = 1, min(n, nrec)
672 g_mu(i) = green(i - 1)
674 DO i = min(n, nrec) + 1, n
678 DEALLOCATE (green, green1)
680 END SUBROUTINE calculates_green_opt
693 SUBROUTINE calculates_green_opt_muzero(n, n_scf, intorder, xval, yval, c, hres, green)
694 INTEGER,
INTENT(in) :: n, n_scf, intorder
695 REAL(kind=
dp),
DIMENSION(0:n_scf),
INTENT(in) :: xval, yval
696 REAL(kind=
dp),
DIMENSION(intorder+1),
INTENT(in) :: c
697 REAL(kind=
dp),
INTENT(in) :: hres
698 REAL(kind=
dp),
DIMENSION(n),
INTENT(out) :: green
700 INTEGER :: i, iend, ikern, ivalue, izero
701 REAL(kind=
dp) :: c0, c1, filter, gl0, gl1, gr0, gr1, x, y
708 IF (xval(izero) >= real(ikern, kind=
dp) .OR. izero == n_scf)
EXIT initialization
710 END DO initialization
717 IF (ivalue >= n_scf - intorder - 1)
EXIT loop_right
718 DO i = 1, intorder + 1
721 filter = intorder*c(i)
722 gr1 = gr1 + filter*x*y
727 iend = n_scf - ivalue
731 filter = intorder*c(i)
732 gr1 = gr1 + filter*x*y
749 IF (izero == n_scf)
EXIT loop_integration
750 DO i = 1, intorder + 1
753 filter = intorder*c(i)
757 IF (xval(izero) >= real(ikern, kind=
dp) .OR. izero == n_scf)
THEN
758 EXIT loop_integration
764 END DO loop_integration
773 green(ikern + 1) = real(ikern, kind=
dp)*(gl0 - gr0) + gr1 - gl1
777 END SUBROUTINE calculates_green_opt_muzero
787 SUBROUTINE indices(var_realimag, nelem, intrn, extrn, index)
789 INTEGER,
INTENT(out) :: var_realimag
790 INTEGER,
INTENT(in) :: nelem, intrn, extrn
791 INTEGER,
INTENT(out) :: index
797 var_realimag = 2 - mod(intrn, 2)
802 IF (2*(i - 1) + var_realimag /= intrn)
THEN
803 print *,
'error, index=', intrn,
'var_realimag=', var_realimag,
'i=', i
806 index = extrn + nelem*(i - 1)
808 END SUBROUTINE indices
837 SUBROUTINE free_kernel(n01, n02, n03, nfft1, nfft2, nfft3, n1k, n2k, n3k, &
838 hgrid, itype_scf, iproc, nproc, karray, mpi_group)
840 INTEGER,
INTENT(in) :: n01, n02, n03, nfft1, nfft2, nfft3, n1k, &
842 REAL(kind=
dp),
INTENT(in) :: hgrid
843 INTEGER,
INTENT(in) :: itype_scf, iproc, nproc
844 REAL(kind=
dp),
DIMENSION(n1k, n2k, n3k/nproc), &
845 INTENT(out) :: karray
846 TYPE(mp_comm_type),
INTENT(in) :: mpi_group
848 INTEGER,
PARAMETER :: n_gauss = 89, n_points = 2**6
849 REAL(kind=
dp),
PARAMETER :: p0_ref = 1._dp
851 INTEGER :: i, i01, i02, i03, i1, i2, i3, i_gauss, &
852 i_kern, iend, istart, istart1, n1h, &
853 n2h, n3h, n_cell, n_iter, n_range, &
854 n_scf, nker1, nker2, nker3
855 REAL(kind=
dp) :: a1, a2, a3, a_range, absci, acc_gauss, &
856 dr_gauss, dx, factor, factor2, kern, &
857 p0_cell, p0gauss, pgauss, ur_gauss
858 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: kern_1_scf, kernel_scf, x_scf, y_scf
859 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: kp
860 REAL(kind=
dp),
DIMENSION(n_gauss) :: p_gauss, w_gauss
868 n_scf = 2*itype_scf*n_points
886 IF (
modulo(nker2, nproc) == 0)
EXIT
890 IF (
modulo(nker3, nproc) == 0)
EXIT
895 ALLOCATE (kp(n1h + 1, n3h + 1, nker2/nproc))
900 istart = iproc*nker2/nproc + 1
901 iend = min((iproc + 1)*nker2/nproc, n2h + n03)
904 IF (iproc .EQ. 0) istart1 = n2h - n03 + 2
907 ALLOCATE (x_scf(0:n_scf))
908 ALLOCATE (y_scf(0:n_scf))
913 dx = real(n_range, kind=
dp)/real(n_scf, kind=
dp)
915 n_cell = max(n01, n02, n03)
916 n_range = max(n_cell, n_range)
919 ALLOCATE (kernel_scf(-n_range:n_range))
920 ALLOCATE (kern_1_scf(-n_range:n_range))
923 a1 = hgrid*real(n01, kind=
dp)
924 a2 = hgrid*real(n02, kind=
dp)
925 a3 = hgrid*real(n03, kind=
dp)
927 x_scf(:) = hgrid*x_scf(:)
928 y_scf(:) = 1._dp/hgrid*y_scf(:)
931 p0_cell = p0_ref/(hgrid*hgrid)
934 CALL gequad(p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss)
938 a_range = sqrt(a1*a1 + a2*a2 + a3*a3)
939 factor = 1._dp/a_range
941 factor2 = 1._dp/(a1*a1 + a2*a2 + a3*a3)
942 DO i_gauss = 1, n_gauss
943 p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
945 DO i_gauss = 1, n_gauss
946 w_gauss(i_gauss) = factor*w_gauss(i_gauss)
951 loop_gauss:
DO i_gauss = n_gauss, 1, -1
953 pgauss = p_gauss(i_gauss)
956 n_iter = nint((log(pgauss) - log(p0_cell))/log(4._dp))
957 IF (n_iter <= 0)
THEN
961 p0gauss = pgauss/4._dp**n_iter
966 kernel_scf(:) = 0._dp
967 DO i_kern = 0, n_range
970 absci = x_scf(i) - real(i_kern, kind=
dp)*hgrid
972 kern = kern + y_scf(i)*exp(-p0gauss*absci)*dx
974 kernel_scf(i_kern) = kern
975 kernel_scf(-i_kern) = kern
976 IF (abs(kern) < 1.e-18_dp)
THEN
983 CALL scf_recursion(itype_scf, n_iter, n_range, kernel_scf, kern_1_scf)
987 DO i3 = istart1, iend
995 IF (i03 .LT. -n_range .OR. i03 .GT. n_range)
THEN
996 CALL cp_abort(__location__,
"Index out of range in wavelet solver. "// &
997 "Try decreasing the number of MPI processors, or adjust the PW_CUTOFF or cell size "// &
998 "so that 2*(number of RS grid points) matches the allowed FFT dimensions "// &
999 "(see ps_wavelet_fft3d.F) exactly.")
1005 kp(i1, i2, i3 - istart + 1) = kp(i1, i2, i3 - istart + 1) + w_gauss(i_gauss)* &
1006 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
1014 DEALLOCATE (kernel_scf)
1015 DEALLOCATE (kern_1_scf)
1023 CALL kernelfft(nfft1, nfft2, nfft3, nker1, nker2, nker3, n1k, n2k, n3k, nproc, iproc, &
1024 kp, karray, mpi_group)
1029 END SUBROUTINE free_kernel
1041 SUBROUTINE inserthalf(n1, n3, lot, nfft, i1, zf, zw)
1042 INTEGER,
INTENT(in) :: n1, n3, lot, nfft, i1
1043 REAL(kind=
dp),
DIMENSION(n1/2+1, n3/2+1), &
1045 REAL(kind=
dp),
DIMENSION(2, lot, n3/2), &
1048 INTEGER :: i01, i03i, i03r, i3, l1, l3
1054 i03r = abs(l3 - n3/2 - 1) + 1
1055 i03i = abs(l3 - n3/2) + 1
1057 i01 = abs(l1 - 1 + i1 - n1/2 - 1) + 1
1058 zw(1, l1, i3) = zf(i01, i03r)
1059 zw(2, l1, i3) = zf(i01, i03i)
1063 END SUBROUTINE inserthalf
1097 SUBROUTINE kernelfft(n1, n2, n3, nd1, nd2, nd3, nk1, nk2, nk3, nproc, iproc, zf, zr, mpi_group)
1099 INTEGER,
INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, nk1, nk2, &
1102 DIMENSION(n1/2+1, n3/2+1, nd2/nproc), &
1104 REAL(kind=
dp),
DIMENSION(nk1, nk2, nk3/nproc), &
1106 TYPE(mp_comm_type),
INTENT(in) :: mpi_group
1108 INTEGER,
PARAMETER :: ncache_optimal = 8*1024
1110 INTEGER :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
1111 j2st, j3, jp2st, lot, lzt, ma, mb, &
1113 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: after1, after2, after3, before1, &
1114 before2, before3, now1, now2, now3
1115 REAL(kind=
dp) :: twopion
1116 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: cosinarr, trig1, trig2, trig3
1117 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: zt, zw
1118 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :, :) :: zmpi2
1119 REAL(kind=
dp),
ALLOCATABLE, &
1120 DIMENSION(:, :, :, :, :) :: zmpi1
1130 cpassert(nd1 .GE. n1)
1131 cpassert(nd2 .GE. n2)
1132 cpassert(nd3 .GE. n3/2 + 1)
1133 cpassert(mod(nd3, nproc) .EQ. 0)
1134 cpassert(mod(nd2, nproc) .EQ. 0)
1138 ncache = ncache_optimal
1139 IF (ncache <= max(n1, n2, n3/2)*4) ncache = max(n1, n2, n3/2)*4
1141 IF (mod(n2, 2) .EQ. 0) lzt = lzt + 1
1142 IF (mod(n2, 4) .EQ. 0) lzt = lzt + 1
1146 ALLOCATE (after1(7))
1148 ALLOCATE (before1(7))
1150 ALLOCATE (after2(7))
1152 ALLOCATE (before2(7))
1154 ALLOCATE (after3(7))
1156 ALLOCATE (before3(7))
1157 ALLOCATE (zw(2, ncache/4, 2))
1158 ALLOCATE (zt(2, lzt, n1))
1159 ALLOCATE (zmpi2(2, n1, nd2/nproc, nd3))
1160 ALLOCATE (cosinarr(2, n3/2))
1161 IF (nproc .GT. 1)
THEN
1162 ALLOCATE (zmpi1(2, n1, nd2/nproc, nd3/nproc, nproc))
1168 CALL ctrig(n3/2, trig3, after3, before3, now3, 1, ic3)
1169 CALL ctrig(n1, trig1, after1, before1, now1, 1, ic1)
1170 CALL ctrig(n2, trig2, after2, before2, now2, 1, ic2)
1173 twopion = 8._dp*atan(1._dp)/real(n3, kind=
dp)
1175 cosinarr(1, i3) = cos(twopion*(i3 - 1))
1176 cosinarr(2, i3) = -sin(twopion*(i3 - 1))
1182 cpassert(lot .GE. 1)
1184 DO j2 = 1, nd2/nproc
1186 IF (iproc*(nd2/nproc) + j2 .LE. n2)
THEN
1189 mb = min(i1 + (lot - 1), n1)
1195 CALL inserthalf(n1, n3, lot, nfft, i1, zf(1, 1, j2), zw(1, 1, 1))
1200 CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1201 trig3, after3(i), now3(i), before3(i), 1)
1209 CALL scramble_unpack(i1, j2, lot, nfft, n1, n3, nd2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
1217 IF (nproc .GT. 1)
THEN
1219 CALL mpi_group%alltoall(zmpi2, &
1220 zmpi1, 2*n1*(nd2/nproc)*(nd3/nproc))
1224 DO j3 = 1, nd3/nproc
1226 IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1)
THEN
1232 cpassert(lot .GE. 1)
1236 mb = min(j + (lot - 1), n2)
1241 IF (nproc .EQ. 1)
THEN
1242 CALL mpiswitch(j3, nfft, jp2st, j2st, lot, n1, nd2, nd3, nproc, zmpi2, zw(1, 1, 1))
1244 CALL mpiswitch(j3, nfft, jp2st, j2st, lot, n1, nd2, nd3, nproc, zmpi1, zw(1, 1, 1))
1252 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1253 trig1, after1(i), now1(i), before1(i), 1)
1258 CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
1259 trig1, after1(i), now1(i), before1(i), 1)
1265 cpassert(lot .GE. 1)
1269 mb = min(j + (lot - 1), nk1)
1274 CALL switch(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
1281 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1282 trig2, after2(i), now2(i), before2(i), 1)
1286 CALL realcopy(lot, nfft, n2, nk1, nk2, zw(1, 1, inzee), zr(j, 1, j3))
1297 DEALLOCATE (before1)
1301 DEALLOCATE (before2)
1305 DEALLOCATE (before3)
1309 DEALLOCATE (cosinarr)
1310 IF (nproc .GT. 1)
DEALLOCATE (zmpi1)
1312 END SUBROUTINE kernelfft
1324 SUBROUTINE realcopy(lot, nfft, n2, nk1, nk2, zin, zout)
1325 INTEGER,
INTENT(in) :: lot, nfft, n2, nk1, nk2
1326 REAL(kind=
dp),
DIMENSION(2, lot, n2),
INTENT(in) :: zin
1327 REAL(kind=
dp),
DIMENSION(nk1, nk2),
INTENT(inout) :: zout
1333 zout(j, i) = zin(1, j, i)
1337 END SUBROUTINE realcopy
1349 SUBROUTINE switch(nfft, n2, lot, n1, lzt, zt, zw)
1350 INTEGER :: nfft, n2, lot, n1, lzt
1351 REAL(kind=
dp) :: zt(2, lzt, n1), zw(2, lot, n2)
1357 zw(1, j, i) = zt(1, i, j)
1358 zw(2, j, i) = zt(2, i, j)
1362 END SUBROUTINE switch
1378 SUBROUTINE mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi1, zw)
1379 INTEGER :: j3, nfft, jp2st, j2st, lot, n1, nd2, &
1381 REAL(kind=
dp) :: zmpi1(2, n1, nd2/nproc, nd3/nproc, nproc), zw(2, lot, n1)
1383 INTEGER :: i1, j2, jp2, mfft
1386 DO 300, jp2 = jp2st, nproc
1387 DO 200, j2 = j2st, nd2/nproc
1389 IF (mfft .GT. nfft)
THEN
1395 zw(1, mfft, i1) = zmpi1(1, i1, j2, j3, jp2)
1396 zw(2, mfft, i1) = zmpi1(2, i1, j2, j3, jp2)
1401 END SUBROUTINE mpiswitch
1411 SUBROUTINE gequad(p, w, urange, drange, acc)
1413 REAL(kind=
dp) :: p(*), w(*), urange, drange, acc
1421 p(1) = 4.96142640560223544e19_dp
1422 p(2) = 1.37454269147978052e19_dp
1423 p(3) = 7.58610013441204679e18_dp
1424 p(4) = 4.42040691347806996e18_dp
1425 p(5) = 2.61986077948367892e18_dp
1426 p(6) = 1.56320138155496681e18_dp
1427 p(7) = 9.35645215863028402e17_dp
1428 p(8) = 5.60962910452691703e17_dp
1429 p(9) = 3.3666225119686761e17_dp
1430 p(10) = 2.0218253197947866e17_dp
1431 p(11) = 1.21477756091902017e17_dp
1432 p(12) = 7.3012982513608503e16_dp
1433 p(13) = 4.38951893556421099e16_dp
1434 p(14) = 2.63949482512262325e16_dp
1435 p(15) = 1.58742054072786174e16_dp
1436 p(16) = 9.54806587737665531e15_dp
1437 p(17) = 5.74353712364571709e15_dp
1438 p(18) = 3.455214877389445e15_dp
1439 p(19) = 2.07871658520326804e15_dp
1440 p(20) = 1.25064667315629928e15_dp
1441 p(21) = 7.52469429541933745e14_dp
1442 p(22) = 4.5274603337253175e14_dp
1443 p(23) = 2.72414006900059548e14_dp
1444 p(24) = 1.63912168349216752e14_dp
1445 p(25) = 9.86275802590865738e13_dp
1446 p(26) = 5.93457701624974985e13_dp
1447 p(27) = 3.5709554322296296e13_dp
1448 p(28) = 2.14872890367310454e13_dp
1449 p(29) = 1.29294719957726902e13_dp
1450 p(30) = 7.78003375426361016e12_dp
1451 p(31) = 4.68148199759876704e12_dp
1452 p(32) = 2.8169955024829868e12_dp
1453 p(33) = 1.69507790481958464e12_dp
1454 p(34) = 1.01998486064607581e12_dp
1455 p(35) = 6.13759486539856459e11_dp
1456 p(36) = 3.69320183828682544e11_dp
1457 p(37) = 2.22232783898905102e11_dp
1458 p(38) = 1.33725247623668682e11_dp
1459 p(39) = 8.0467192739036288e10_dp
1460 p(40) = 4.84199582415144143e10_dp
1461 p(41) = 2.91360091170559564e10_dp
1462 p(42) = 1.75321747475309216e10_dp
1463 p(43) = 1.0549735552210995e10_dp
1464 p(44) = 6.34815321079006586e9_dp
1465 p(45) = 3.81991113733594231e9_dp
1466 p(46) = 2.29857747533101109e9_dp
1467 p(47) = 1.38313653595483694e9_dp
1468 p(48) = 8.32282908580025358e8_dp
1469 p(49) = 5.00814519374587467e8_dp
1470 p(50) = 3.01358090773319025e8_dp
1471 p(51) = 1.81337994217503535e8_dp
1472 p(52) = 1.09117589961086823e8_dp
1473 p(53) = 6.56599771718640323e7_dp
1474 p(54) = 3.95099693638497164e7_dp
1475 p(55) = 2.37745694710665991e7_dp
1476 p(56) = 1.43060135285912813e7_dp
1477 p(57) = 8.60844290313506695e6_dp
1478 p(58) = 5.18000974075383424e6_dp
1479 p(59) = 3.116998193057466e6_dp
1480 p(60) = 1.87560993870024029e6_dp
1481 p(61) = 1.12862197183979562e6_dp
1482 p(62) = 679132.441326077231_dp
1483 p(63) = 408658.421279877969_dp
1484 p(64) = 245904.473450669789_dp
1485 p(65) = 147969.568088321005_dp
1486 p(66) = 89038.612357311147_dp
1487 p(67) = 53577.7362552358895_dp
1488 p(68) = 32239.6513926914668_dp
1489 p(69) = 19399.7580852362791_dp
1490 p(70) = 11673.5323603058634_dp
1491 p(71) = 7024.38438577707758_dp
1492 p(72) = 4226.82479307685999_dp
1493 p(73) = 2543.43254175354295_dp
1494 p(74) = 1530.47486269122675_dp
1495 p(75) = 920.941785160749482_dp
1496 p(76) = 554.163803906291646_dp
1497 p(77) = 333.46029740785694_dp
1498 p(78) = 200.6550575335041_dp
1499 p(79) = 120.741366914147284_dp
1500 p(80) = 72.6544243200329916_dp
1501 p(81) = 43.7187810415471025_dp
1502 p(82) = 26.3071631447061043_dp
1503 p(83) = 15.8299486353816329_dp
1504 p(84) = 9.52493152341244004_dp
1505 p(85) = 5.72200417067776041_dp
1506 p(86) = 3.36242234070940928_dp
1507 p(87) = 1.75371394604499472_dp
1508 p(88) = 0.64705932650658966_dp
1509 p(89) = 0.072765905943708247_dp
1511 w(1) = 47.67445484528304247e10_dp
1512 w(2) = 11.37485774750442175e9_dp
1513 w(3) = 78.64340976880190239e8_dp
1514 w(4) = 46.27335788759590498e8_dp
1515 w(5) = 24.7380464827152951e8_dp
1516 w(6) = 13.62904116438987719e8_dp
1517 w(7) = 92.79560029045882433e8_dp
1518 w(8) = 52.15931216254660251e8_dp
1519 w(9) = 31.67018011061666244e8_dp
1520 w(10) = 1.29291036801493046e8_dp
1521 w(11) = 1.00139319988015862e8_dp
1522 w(12) = 7.75892350510188341e7_dp
1523 w(13) = 6.01333567950731271e7_dp
1524 w(14) = 4.66141178654796875e7_dp
1525 w(15) = 3.61398903394911448e7_dp
1526 w(16) = 2.80225846672956389e7_dp
1527 w(17) = 2.1730509180930247e7_dp
1528 w(18) = 1.68524482625876965e7_dp
1529 w(19) = 1.30701489345870338e7_dp
1530 w(20) = 1.01371784832269282e7_dp
1531 w(21) = 7.86264116300379329e6_dp
1532 w(22) = 6.09861667912273717e6_dp
1533 w(23) = 4.73045784039455683e6_dp
1534 w(24) = 3.66928949951594161e6_dp
1535 w(25) = 2.8462050836230259e6_dp
1536 w(26) = 2.20777394798527011e6_dp
1537 w(27) = 1.71256191589205524e6_dp
1538 w(28) = 1.32843556197737076e6_dp
1539 w(29) = 1.0304731275955989e6_dp
1540 w(30) = 799345.206572271448_dp
1541 w(31) = 620059.354143595343_dp
1542 w(32) = 480986.704107449333_dp
1543 w(33) = 373107.167700228515_dp
1544 w(34) = 289424.08337412132_dp
1545 w(35) = 224510.248231581788_dp
1546 w(36) = 174155.825690028966_dp
1547 w(37) = 135095.256919654065_dp
1548 w(38) = 104795.442776800312_dp
1549 w(39) = 81291.4458222430418_dp
1550 w(40) = 63059.0493649328682_dp
1551 w(41) = 48915.9040455329689_dp
1552 w(42) = 37944.8484018048756_dp
1553 w(43) = 29434.4290473253969_dp
1554 w(44) = 22832.7622054490044_dp
1555 w(45) = 17711.743950151233_dp
1556 w(46) = 13739.287867104177_dp
1557 w(47) = 10657.7895710752585_dp
1558 w(48) = 8267.42141053961834_dp
1559 w(49) = 6413.17397520136448_dp
1560 w(50) = 4974.80402838654277_dp
1561 w(51) = 3859.03698188553047_dp
1562 w(52) = 2993.51824493299154_dp
1563 w(53) = 2322.1211966811754_dp
1564 w(54) = 1801.30750964719641_dp
1565 w(55) = 1397.30379659817038_dp
1566 w(56) = 1083.91149143250697_dp
1567 w(57) = 840.807939169209188_dp
1568 w(58) = 652.228524366749422_dp
1569 w(59) = 505.944376983506128_dp
1570 w(60) = 392.469362317941064_dp
1571 w(61) = 304.444930257324312_dp
1572 w(62) = 236.162932842453601_dp
1573 w(63) = 183.195466078603525_dp
1574 w(64) = 142.107732186551471_dp
1575 w(65) = 110.23530215723992_dp
1576 w(66) = 85.5113346705382257_dp
1577 w(67) = 66.3325469806696621_dp
1578 w(68) = 51.4552463353841373_dp
1579 w(69) = 39.9146798429449273_dp
1580 w(70) = 30.9624728409162095_dp
1581 w(71) = 24.018098812215013_dp
1582 w(72) = 18.6312338024296588_dp
1583 w(73) = 14.4525541233150501_dp
1584 w(74) = 11.2110836519105938_dp
1585 w(75) = 8.69662175848497178_dp
1586 w(76) = 6.74611236165731961_dp
1587 w(77) = 5.23307018057529994_dp
1588 w(78) = 4.05937850501539556_dp
1589 w(79) = 3.14892659076635714_dp
1590 w(80) = 2.44267408211071604_dp
1591 w(81) = 1.89482240522855261_dp
1592 w(82) = 1.46984505907050079_dp
1593 w(83) = 1.14019261330527007_dp
1594 w(84) = 0.884791217422925293_dp
1595 w(85) = 0.692686387080616483_dp
1596 w(86) = 0.585244576897023282_dp
1597 w(87) = 0.576182522545327589_dp
1598 w(88) = 0.596688817388997178_dp
1599 w(89) = 0.607879901151108771_dp
1607 END SUBROUTINE gequad
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
for(int lxp=0;lxp<=lp;lxp++)
Defines the basic variable types.
integer, parameter, public dp
Interface to the message passing library MPI.
Creates the wavelet kernel for the wavelet based poisson solver.
subroutine, public scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
(Based on suitable modifications of S.Goedecker routines) Assign the correct planes to the work array...
integer, parameter, public ctrig_length
subroutine, public fftstp(mm, nfft, m, nn, n, zin, zout, trig, after, now, before, isign)
...
subroutine, public ctrig(n, trig, after, before, now, isign, ic)
...
Creates the wavelet kernel for the wavelet based poisson solver.
subroutine, public createkernel(geocode, n01, n02, n03, hx, hy, hz, itype_scf, iproc, nproc, kernel, mpi_group)
Allocate a pointer which corresponds to the zero-padded FFT slice needed for calculating the convolut...
Creates the wavelet kernel for the wavelet based poisson solver.
subroutine, public scf_recursion(itype, n_iter, n_range, kernel_scf, kern_1_scf)
Do iterations to go from p0gauss to pgauss order interpolating scaling function.
subroutine, public scaling_function(itype, nd, nrange, a, x)
Calculate the values of a scaling function in real uniform grid.
Performs a wavelet based solution of the Poisson equation.
subroutine, public s_fft_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc)
Calculate four sets of dimension needed for the calculation of the convolution for the surface system...
subroutine, public f_fft_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc)
Calculate four sets of dimension needed for the calculation of the zero-padded convolution.