19 #include "../base/base_uses.f90"
25 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'ps_wavelet_base'
51 SUBROUTINE p_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, zf &
52 , scal, hx, hy, hz, mpi_group)
53 INTEGER,
INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
55 REAL(kind=
dp),
DIMENSION(md1, md3, md2/nproc), &
57 REAL(kind=
dp),
INTENT(in) :: scal, hx, hy, hz
59 CLASS(mp_comm_type),
INTENT(in) :: mpi_group
61 INTEGER,
PARAMETER :: ncache_optimal = 8*1024
63 INTEGER :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
64 j2stb, j2stf, j3, jp2stb, jp2stf, lot, &
65 lzt, ma, mb, ncache, nfft
66 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: after1, after2, after3, before1, &
67 before2, before3, now1, now2, now3
68 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: btrig1, btrig2, btrig3, ftrig1, ftrig2, &
70 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: zt, zw
71 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :, :) :: zmpi2
72 REAL(kind=
dp),
ALLOCATABLE, &
73 DIMENSION(:, :, :, :, :) :: zmpi1
75 IF (nd1 .LT. n1/2 + 1) cpabort(
"Parallel convolution:ERROR:nd1")
76 IF (nd2 .LT. n2/2 + 1) cpabort(
"Parallel convolution:ERROR:nd2")
77 IF (nd3 .LT. n3/2 + 1) cpabort(
"Parallel convolution:ERROR:nd3")
78 IF (md1 .LT. n1) cpabort(
"Parallel convolution:ERROR:md1")
79 IF (md2 .LT. n2) cpabort(
"Parallel convolution:ERROR:md2")
80 IF (md3 .LT. n3) cpabort(
"Parallel convolution:ERROR:md3")
81 IF (mod(nd3, nproc) .NE. 0) cpabort(
"Parallel convolution:ERROR:nd3")
82 IF (mod(md2, nproc) .NE. 0) cpabort(
"Parallel convolution:ERROR:md2")
85 ncache = ncache_optimal
86 IF (ncache <= max(n1, n2, n3)*4) ncache = max(n1, n2, n3)*4
89 IF (mod(n2, 2) .EQ. 0) lzt = lzt + 1
90 IF (mod(n2, 4) .EQ. 0) lzt = lzt + 1
102 ALLOCATE (before2(7))
107 ALLOCATE (before3(7))
108 ALLOCATE (zw(2, ncache/4, 2))
109 ALLOCATE (zt(2, lzt, n1))
110 ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
111 IF (nproc .GT. 1)
ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
114 CALL ctrig(n3, btrig3, after3, before3, now3, 1, ic3)
115 CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
116 CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
118 ftrig1(1, j) = btrig1(1, j)
119 ftrig1(2, j) = -btrig1(2, j)
122 ftrig2(1, j) = btrig2(1, j)
123 ftrig2(2, j) = -btrig2(2, j)
126 ftrig3(1, j) = btrig3(1, j)
127 ftrig3(2, j) = -btrig3(2, j)
134 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
135 'least one 1-d FFT of this size even though this will'// &
136 'reduce the performance for shorter transform lengths'
141 IF (iproc*(md2/nproc) + j2 .LE. n2)
THEN
144 mb = min(i1 + (lot - 1), n1)
147 CALL p_fill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
153 CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
154 btrig3, after3(i), now3(i), before3(i), 1)
161 CALL scramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2)
169 IF (nproc .GT. 1)
THEN
171 CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
178 IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1)
THEN
188 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
189 'least one 1-d FFT of this size even though this will'// &
190 'reduce the performance for shorter transform lengths'
196 mb = min(j + (lot - 1), n2)
201 IF (nproc .EQ. 1)
THEN
202 CALL p_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
204 CALL p_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
212 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
213 btrig1, after1(i), now1(i), before1(i), 1)
219 CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
220 btrig1, after1(i), now1(i), before1(i), 1)
228 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
229 'least one 1-d FFT of this size even though this will'// &
230 'reduce the performance for shorter transform lengths'
236 mb = min(j + (lot - 1), n1)
241 CALL p_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
248 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
249 btrig2, after2(i), now2(i), before2(i), 1)
255 i3 = iproc*(nd3/nproc) + j3
256 CALL p_multkernel(n1, n2, n3, lot, nfft, j, i3, zw(1, 1, inzee), hx, hy, hz)
263 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
264 ftrig2, after2(i), now2(i), before2(i), -1)
270 CALL p_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
279 mb = min(j + (lot - 1), n2)
284 CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
285 ftrig1, after1(i), now1(i), before1(i), -1)
289 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
290 ftrig1, after1(i), now1(i), before1(i), -1)
297 IF (nproc .EQ. 1)
THEN
298 CALL p_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
300 CALL p_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
309 IF (nproc .GT. 1)
THEN
311 CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
319 IF (iproc*(md2/nproc) + j2 .LE. n2)
THEN
322 mb = min(i1 + (lot - 1), n1)
327 CALL unscramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1))
334 CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
335 ftrig3, after3(i), now3(i), before3(i), -1)
341 CALL p_unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2), scal)
366 IF (nproc .GT. 1)
DEALLOCATE (zmpi1)
384 SUBROUTINE p_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
385 INTEGER,
INTENT(in) :: j3, nfft
386 INTEGER,
INTENT(inout) :: jp2stb, j2stb
387 INTEGER,
INTENT(in) :: lot, n1, md2, nd3, nproc
389 DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
391 REAL(kind=
dp),
DIMENSION(2, lot, n1), &
394 INTEGER :: i1, j2, jp2, mfft
397 DO jp2 = jp2stb, nproc
398 DO j2 = j2stb, md2/nproc
400 IF (mfft .GT. nfft)
THEN
406 zw(1, mfft, i1) = zmpi1(1, i1, j2, j3, jp2)
407 zw(2, mfft, i1) = zmpi1(2, i1, j2, j3, jp2)
412 END SUBROUTINE p_mpiswitch_upcorn
424 SUBROUTINE p_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
425 INTEGER,
INTENT(in) :: nfft, n2, lot, n1, lzt
426 REAL(kind=
dp),
DIMENSION(2, lzt, n1),
INTENT(in) :: zt
427 REAL(kind=
dp),
DIMENSION(2, lot, n2), &
434 zw(1, j, i) = zt(1, i, j)
435 zw(2, j, i) = zt(2, i, j)
439 END SUBROUTINE p_switch_upcorn
451 SUBROUTINE p_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
452 INTEGER,
INTENT(in) :: nfft, n2, lot, n1, lzt
453 REAL(kind=
dp),
DIMENSION(2, lot, n2),
INTENT(in) :: zw
454 REAL(kind=
dp),
DIMENSION(2, lzt, n1), &
461 zt(1, i, j) = zw(1, j, i)
462 zt(2, i, j) = zw(2, j, i)
466 END SUBROUTINE p_unswitch_downcorn
482 SUBROUTINE p_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
483 INTEGER,
INTENT(in) :: j3, nfft
484 INTEGER,
INTENT(inout) :: jp2stf, j2stf
485 INTEGER,
INTENT(in) :: lot, n1, md2, nd3, nproc
486 REAL(kind=
dp),
DIMENSION(2, lot, n1),
INTENT(in) :: zw
488 DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
489 INTENT(inout) :: zmpi1
491 INTEGER :: i1, j2, jp2, mfft
494 DO jp2 = jp2stf, nproc
495 DO j2 = j2stf, md2/nproc
497 IF (mfft .GT. nfft)
THEN
503 zmpi1(1, i1, j2, j3, jp2) = zw(1, mfft, i1)
504 zmpi1(2, i1, j2, j3, jp2) = zw(2, mfft, i1)
509 END SUBROUTINE p_unmpiswitch_downcorn
535 SUBROUTINE p_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
536 INTEGER,
INTENT(in) :: md1, md3, lot, nfft, n3
537 REAL(kind=
dp),
DIMENSION(2, lot, n3),
INTENT(in) :: zw
538 REAL(kind=
dp),
DIMENSION(md1, md3),
INTENT(inout) :: zf
539 REAL(kind=
dp),
INTENT(in) :: scal
542 REAL(kind=
dp) :: pot1
546 pot1 = scal*zw(1, i1, i3)
551 END SUBROUTINE p_unfill_downcorn
563 SUBROUTINE p_fill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
564 INTEGER,
INTENT(in) :: md1, md3, lot, nfft, n3
565 REAL(kind=
dp),
DIMENSION(md1, md3),
INTENT(in) :: zf
566 REAL(kind=
dp),
DIMENSION(2, lot, n3), &
573 zw(1, i1, i3) = zf(i1, i3)
574 zw(2, i1, i3) = 0._dp
578 END SUBROUTINE p_fill_upcorn
605 SUBROUTINE scramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2)
606 INTEGER,
INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
608 REAL(kind=
dp),
DIMENSION(2, lot, n3),
INTENT(in) :: zw
609 REAL(kind=
dp),
DIMENSION(2, n1, md2/nproc, nd3), &
610 INTENT(inout) :: zmpi2
616 zmpi2(1, i1 + i, j2, i3) = zw(1, i + 1, i3)
617 zmpi2(2, i1 + i, j2, i3) = zw(2, i + 1, i3)
621 END SUBROUTINE scramble_p
648 SUBROUTINE unscramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw)
649 INTEGER,
INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
651 REAL(kind=
dp),
DIMENSION(2, n1, md2/nproc, nd3), &
653 REAL(kind=
dp),
DIMENSION(2, lot, n3), &
660 zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
661 zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
667 zw(1, i + 1, j3) = zmpi2(1, i1 + i, j2, i3)
668 zw(2, i + 1, j3) = -zmpi2(2, i1 + i, j2, i3)
669 zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
670 zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
674 END SUBROUTINE unscramble_p
704 SUBROUTINE p_multkernel(n1, n2, n3, lot, nfft, jS, i3, zw, hx, hy, hz)
705 INTEGER,
INTENT(in) :: n1, n2, n3, lot, nfft, js, i3
706 REAL(kind=
dp),
DIMENSION(2, lot, n2), &
708 REAL(kind=
dp),
INTENT(in) :: hx, hy, hz
710 INTEGER :: i1, i2, j1, j2, j3
711 REAL(kind=
dp) :: fourpi2, ker, mu3, p1, p2, pi
713 pi = 4._dp*atan(1._dp)
714 fourpi2 = 4._dp*pi**2
716 mu3 = real(j3 - 1, kind=
dp)/real(n3, kind=
dp)
723 j1 = j1 - (j1/(n1/2 + 2))*n1
724 j2 = i2 - (i2/(n2/2 + 2))*n2
725 p1 = real(j1 - 1, kind=
dp)/real(n1, kind=
dp)
726 p2 = real(j2 - 1, kind=
dp)/real(n2, kind=
dp)
727 ker = -fourpi2*((p1/hx)**2 + (p2/hz)**2 + mu3)
728 IF (ker /= 0._dp) ker = 1._dp/ker
729 zw(1, i1, i2) = zw(1, i1, i2)*ker
730 zw(2, i1, i2) = zw(2, i1, i2)*ker
734 END SUBROUTINE p_multkernel
762 SUBROUTINE multkernel(nd1, nd2, n1, n2, lot, nfft, jS, pot, zw)
763 INTEGER,
INTENT(in) :: nd1, nd2, n1, n2, lot, nfft, js
764 REAL(kind=
dp),
DIMENSION(nd1, nd2),
INTENT(in) :: pot
765 REAL(kind=
dp),
DIMENSION(2, lot, n2), &
768 INTEGER :: i2, j, j1, j2
772 j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
773 zw(1, j, 1) = zw(1, j, 1)*pot(j1, 1)
774 zw(2, j, 1) = zw(2, j, 1)*pot(j1, 1)
781 j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
783 zw(1, j, i2) = zw(1, j, i2)*pot(j1, i2)
784 zw(2, j, i2) = zw(2, j, i2)*pot(j1, i2)
785 zw(1, j, j2) = zw(1, j, j2)*pot(j1, i2)
786 zw(2, j, j2) = zw(2, j, j2)*pot(j1, i2)
793 j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
795 zw(1, j, j2) = zw(1, j, j2)*pot(j1, j2)
796 zw(2, j, j2) = zw(2, j, j2)*pot(j1, j2)
799 END SUBROUTINE multkernel
840 SUBROUTINE s_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
842 INTEGER,
INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
844 REAL(kind=
dp),
DIMENSION(nd1, nd2, nd3/nproc), &
846 REAL(kind=
dp),
DIMENSION(md1, md3, md2/nproc), &
848 REAL(kind=
dp),
INTENT(in) :: scal
850 CLASS(mp_comm_type),
INTENT(in) :: mpi_group
852 CHARACTER(len=*),
PARAMETER :: routinen =
'S_PoissonSolver'
853 INTEGER,
PARAMETER :: ncache_optimal = 8*1024
855 INTEGER :: handle, i, i1, i3, ic1, ic2, ic3, inzee, &
856 j, j2, j2stb, j2stf, j3, jp2stb, &
857 jp2stf, lot, lzt, ma, mb, ncache, nfft
858 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: after1, after2, after3, before1, &
859 before2, before3, now1, now2, now3
860 REAL(kind=
dp) :: twopion
861 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: btrig1, btrig2, btrig3, cosinarr, &
862 ftrig1, ftrig2, ftrig3
863 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: zt, zw
864 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :, :) :: zmpi2
865 REAL(kind=
dp),
ALLOCATABLE, &
866 DIMENSION(:, :, :, :, :) :: zmpi1
868 CALL timeset(routinen, handle)
870 IF (mod(n3, 2) .NE. 0) cpabort(
"Parallel convolution:ERROR:n3")
871 IF (nd1 .LT. n1/2 + 1) cpabort(
"Parallel convolution:ERROR:nd1")
872 IF (nd2 .LT. n2/2 + 1) cpabort(
"Parallel convolution:ERROR:nd2")
873 IF (nd3 .LT. n3/2 + 1) cpabort(
"Parallel convolution:ERROR:nd3")
874 IF (md1 .LT. n1) cpabort(
"Parallel convolution:ERROR:md1")
875 IF (md2 .LT. n2) cpabort(
"Parallel convolution:ERROR:md2")
876 IF (md3 .LT. n3/2) cpabort(
"Parallel convolution:ERROR:md3")
877 IF (mod(nd3, nproc) .NE. 0) cpabort(
"Parallel convolution:ERROR:nd3")
878 IF (mod(md2, nproc) .NE. 0) cpabort(
"Parallel convolution:ERROR:md2")
881 ncache = ncache_optimal
882 IF (ncache <= max(n1, n2, n3/2)*4) ncache = max(n1, n2, n3/2)*4
887 IF (mod(n2, 2) .EQ. 0) lzt = lzt + 1
888 IF (mod(n2, 4) .EQ. 0) lzt = lzt + 1
895 ALLOCATE (before1(7))
900 ALLOCATE (before2(7))
905 ALLOCATE (before3(7))
906 ALLOCATE (zw(2, ncache/4, 2))
907 ALLOCATE (zt(2, lzt, n1))
908 ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
909 ALLOCATE (cosinarr(2, n3/2))
910 IF (nproc .GT. 1)
THEN
911 ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
919 CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
920 CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
921 CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
923 ftrig1(1, j) = btrig1(1, j)
924 ftrig1(2, j) = -btrig1(2, j)
927 ftrig2(1, j) = btrig2(1, j)
928 ftrig2(2, j) = -btrig2(2, j)
931 ftrig3(1, j) = btrig3(1, j)
932 ftrig3(2, j) = -btrig3(2, j)
936 twopion = 8._dp*atan(1._dp)/real(n3, kind=
dp)
938 cosinarr(1, i3) = cos(twopion*(i3 - 1))
939 cosinarr(2, i3) = -sin(twopion*(i3 - 1))
949 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
950 'least one 1-d FFT of this size even though this will'// &
951 'reduce the performance for shorter transform lengths', n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc
957 IF (iproc*(md2/nproc) + j2 .LE. n2)
THEN
960 mb = min(i1 + (lot - 1), n1)
964 CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
970 CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
971 btrig3, after3(i), now3(i), before3(i), 1)
978 CALL scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
985 IF (nproc .GT. 1)
THEN
986 CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
993 IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1)
THEN
1001 IF (lot .LT. 1)
THEN
1003 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
1004 'least one 1-d FFT of this size even though this will'// &
1005 'reduce the performance for shorter transform lengths'
1011 mb = min(j + (lot - 1), n2)
1016 IF (nproc .EQ. 1)
THEN
1017 CALL s_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
1019 CALL s_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
1027 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1028 btrig1, after1(i), now1(i), before1(i), 1)
1034 CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
1035 btrig1, after1(i), now1(i), before1(i), 1)
1041 IF (lot .LT. 1)
THEN
1043 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
1044 'least one 1-d FFT of this size even though this will'// &
1045 'reduce the performance for shorter transform lengths'
1051 mb = min(j + (lot - 1), n1)
1056 CALL s_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
1063 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1064 btrig2, after2(i), now2(i), before2(i), 1)
1070 CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
1077 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1078 ftrig2, after2(i), now2(i), before2(i), -1)
1084 CALL s_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
1093 mb = min(j + (lot - 1), n2)
1098 CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
1099 ftrig1, after1(i), now1(i), before1(i), -1)
1103 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1104 ftrig1, after1(i), now1(i), before1(i), -1)
1111 IF (nproc .EQ. 1)
THEN
1112 CALL s_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
1114 CALL s_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
1123 IF (nproc .GT. 1)
THEN
1125 CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
1133 DO j2 = 1, md2/nproc
1135 IF (iproc*(md2/nproc) + j2 .LE. n2)
THEN
1138 mb = min(i1 + (lot - 1), n1)
1143 CALL unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
1150 CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1151 ftrig3, after3(i), now3(i), before3(i), -1)
1157 CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
1171 DEALLOCATE (before1)
1176 DEALLOCATE (before2)
1181 DEALLOCATE (before3)
1185 DEALLOCATE (cosinarr)
1186 IF (nproc .GT. 1)
DEALLOCATE (zmpi1)
1189 CALL timestop(handle)
1206 SUBROUTINE s_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
1207 INTEGER,
INTENT(in) :: j3, nfft
1208 INTEGER,
INTENT(inout) :: jp2stb, j2stb
1209 INTEGER,
INTENT(in) :: lot, n1, md2, nd3, nproc
1211 DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
1213 REAL(kind=
dp),
DIMENSION(2, lot, n1), &
1216 INTEGER :: i1, j2, jp2, mfft
1219 DO jp2 = jp2stb, nproc
1220 DO j2 = j2stb, md2/nproc
1222 IF (mfft .GT. nfft)
THEN
1228 zw(1, mfft, i1) = zmpi1(1, i1, j2, j3, jp2)
1229 zw(2, mfft, i1) = zmpi1(2, i1, j2, j3, jp2)
1234 END SUBROUTINE s_mpiswitch_upcorn
1246 SUBROUTINE s_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
1247 INTEGER,
INTENT(in) :: nfft, n2, lot, n1, lzt
1248 REAL(kind=
dp),
DIMENSION(2, lzt, n1),
INTENT(in) :: zt
1249 REAL(kind=
dp),
DIMENSION(2, lot, n2), &
1256 zw(1, j, i) = zt(1, i, j)
1257 zw(2, j, i) = zt(2, i, j)
1260 END SUBROUTINE s_switch_upcorn
1272 SUBROUTINE s_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
1273 INTEGER,
INTENT(in) :: nfft, n2, lot, n1, lzt
1274 REAL(kind=
dp),
DIMENSION(2, lot, n2),
INTENT(in) :: zw
1275 REAL(kind=
dp),
DIMENSION(2, lzt, n1), &
1282 zt(1, i, j) = zw(1, j, i)
1283 zt(2, i, j) = zw(2, j, i)
1286 END SUBROUTINE s_unswitch_downcorn
1302 SUBROUTINE s_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
1303 INTEGER,
INTENT(in) :: j3, nfft
1304 INTEGER,
INTENT(inout) :: jp2stf, j2stf
1305 INTEGER,
INTENT(in) :: lot, n1, md2, nd3, nproc
1306 REAL(kind=
dp),
DIMENSION(2, lot, n1),
INTENT(in) :: zw
1308 DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
1309 INTENT(inout) :: zmpi1
1311 INTEGER :: i1, j2, jp2, mfft
1314 DO jp2 = jp2stf, nproc
1315 DO j2 = j2stf, md2/nproc
1317 IF (mfft .GT. nfft)
THEN
1323 zmpi1(1, i1, j2, j3, jp2) = zw(1, mfft, i1)
1324 zmpi1(2, i1, j2, j3, jp2) = zw(2, mfft, i1)
1329 END SUBROUTINE s_unmpiswitch_downcorn
1356 SUBROUTINE unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
1357 INTEGER,
INTENT(in) :: md1, md3, lot, nfft, n3
1358 REAL(kind=
dp),
DIMENSION(2, lot, n3/2),
INTENT(in) :: zw
1359 REAL(kind=
dp),
DIMENSION(md1, md3),
INTENT(inout) :: zf
1360 REAL(kind=
dp),
INTENT(in) :: scal
1363 REAL(kind=
dp) :: pot1
1367 pot1 = scal*zw(1, i1, i3)
1369 zf(i1, 2*i3 - 1) = pot1
1370 pot1 = scal*zw(2, i1, i3)
1375 END SUBROUTINE unfill_downcorn
1387 SUBROUTINE halfill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
1388 INTEGER :: md1, md3, lot, nfft, n3
1389 REAL(kind=
dp) :: zf(md1, md3), zw(2, lot, n3/2)
1398 zw(1, i1, i3) = 0._dp
1399 zw(2, i1, i3) = 0._dp
1402 DO i3 = n3/4 + 1, n3/2
1404 zw(1, i1, i3) = zf(i1, 2*i3 - 1 - n3/2)
1405 zw(2, i1, i3) = zf(i1, 2*i3 - n3/2)
1409 END SUBROUTINE halfill_upcorn
1439 SUBROUTINE scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
1440 INTEGER,
INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
1442 REAL(kind=
dp),
DIMENSION(2, lot, n3/2),
INTENT(in) :: zw
1443 REAL(kind=
dp),
DIMENSION(2, n1, md2/nproc, nd3), &
1444 INTENT(inout) :: zmpi2
1445 REAL(kind=
dp),
DIMENSION(2, n3/2),
INTENT(in) :: cosinarr
1447 INTEGER :: i, i3, ind1, ind2
1448 REAL(kind=
dp) :: a, b, c, cp, d, fei, fer, fi, foi,
for, &
1456 zmpi2(1, i1 + i, j2, 1) = a + b
1457 zmpi2(2, i1 + i, j2, 1) = 0._dp
1458 zmpi2(1, i1 + i, j2, n3/2 + 1) = a - b
1459 zmpi2(2, i1 + i, j2, n3/2 + 1) = 0._dp
1464 ind2 = n3/2 - i3 + 2
1465 cp = cosinarr(1, i3)
1466 sp = cosinarr(2, i3)
1468 a = zw(1, i + 1, ind1)
1469 b = zw(2, i + 1, ind1)
1470 c = zw(1, i + 1, ind2)
1471 d = zw(2, i + 1, ind2)
1476 fr = fer + cp*foi - sp*
for
1477 fi = fei - cp*
for - sp*foi
1478 zmpi2(1, i1 + i, j2, ind1) = fr
1479 zmpi2(2, i1 + i, j2, ind1) = fi
1513 SUBROUTINE unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw, cosinarr)
1514 INTEGER,
INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
1516 REAL(kind=
dp),
DIMENSION(2, n1, md2/nproc, nd3), &
1518 REAL(kind=
dp),
DIMENSION(2, lot, n3/2), &
1520 REAL(kind=
dp),
DIMENSION(2, n3/2),
INTENT(in) :: cosinarr
1522 INTEGER :: i, i3, inda, indb
1523 REAL(kind=
dp) :: a, b, c, cp, d, ie, ih, io, re, rh, ro, &
1528 indb = n3/2 + 2 - i3
1529 cp = cosinarr(1, i3)
1530 sp = cosinarr(2, i3)
1532 a = zmpi2(1, i1 + i, j2, inda)
1533 b = zmpi2(2, i1 + i, j2, inda)
1534 c = zmpi2(1, i1 + i, j2, indb)
1535 d = -zmpi2(2, i1 + i, j2, indb)
1538 ro = (a - c)*cp - (b - d)*sp
1539 io = (a - c)*sp + (b - d)*cp
1542 zw(1, i + 1, inda) = rh
1543 zw(2, i + 1, inda) = ih
1547 END SUBROUTINE unscramble_pack
1587 SUBROUTINE f_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
1589 INTEGER,
INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
1591 REAL(kind=
dp),
DIMENSION(nd1, nd2, nd3/nproc), &
1593 REAL(kind=
dp),
DIMENSION(md1, md3, md2/nproc), &
1595 REAL(kind=
dp),
INTENT(in) :: scal
1597 CLASS(mp_comm_type),
INTENT(in) :: mpi_group
1599 INTEGER,
PARAMETER :: ncache_optimal = 8*1024
1601 INTEGER :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
1602 j2stb, j2stf, j3, jp2stb, jp2stf, lot, &
1603 lzt, ma, mb, ncache, nfft
1604 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: after1, after2, after3, before1, &
1605 before2, before3, now1, now2, now3
1606 REAL(kind=
dp) :: twopion
1607 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: btrig1, btrig2, btrig3, cosinarr, &
1608 ftrig1, ftrig2, ftrig3
1609 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: zt, zw
1610 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :, :) :: zmpi2
1611 REAL(kind=
dp),
ALLOCATABLE, &
1612 DIMENSION(:, :, :, :, :) :: zmpi1
1614 IF (mod(n1, 2) .NE. 0) cpabort(
"Parallel convolution:ERROR:n1")
1615 IF (mod(n2, 2) .NE. 0) cpabort(
"Parallel convolution:ERROR:n2")
1616 IF (mod(n3, 2) .NE. 0) cpabort(
"Parallel convolution:ERROR:n3")
1617 IF (nd1 .LT. n1/2 + 1) cpabort(
"Parallel convolution:ERROR:nd1")
1618 IF (nd2 .LT. n2/2 + 1) cpabort(
"Parallel convolution:ERROR:nd2")
1619 IF (nd3 .LT. n3/2 + 1) cpabort(
"Parallel convolution:ERROR:nd3")
1620 IF (md1 .LT. n1/2) cpabort(
"Parallel convolution:ERROR:md1")
1621 IF (md2 .LT. n2/2) cpabort(
"Parallel convolution:ERROR:md2")
1622 IF (md3 .LT. n3/2) cpabort(
"Parallel convolution:ERROR:md3")
1623 IF (mod(nd3, nproc) .NE. 0) cpabort(
"Parallel convolution:ERROR:nd3")
1624 IF (mod(md2, nproc) .NE. 0) cpabort(
"Parallel convolution:ERROR:md2")
1628 ncache = ncache_optimal
1629 IF (ncache <= max(n1, n2, n3/2)*4) ncache = max(n1, n2, n3/2)*4
1631 IF (mod(n2/2, 2) .EQ. 0) lzt = lzt + 1
1632 IF (mod(n2/2, 4) .EQ. 0) lzt = lzt + 1
1637 ALLOCATE (after1(7))
1639 ALLOCATE (before1(7))
1642 ALLOCATE (after2(7))
1644 ALLOCATE (before2(7))
1647 ALLOCATE (after3(7))
1649 ALLOCATE (before3(7))
1650 ALLOCATE (zw(2, ncache/4, 2))
1651 ALLOCATE (zt(2, lzt, n1))
1652 ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
1654 ALLOCATE (cosinarr(2, n3/2))
1655 IF (nproc .GT. 1)
ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
1658 CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
1659 CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
1660 CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
1662 ftrig1(1, j) = btrig1(1, j)
1663 ftrig1(2, j) = -btrig1(2, j)
1666 ftrig2(1, j) = btrig2(1, j)
1667 ftrig2(2, j) = -btrig2(2, j)
1670 ftrig3(1, j) = btrig3(1, j)
1671 ftrig3(2, j) = -btrig3(2, j)
1675 twopion = 8._dp*atan(1._dp)/real(n3, kind=
dp)
1677 cosinarr(1, i3) = cos(twopion*(i3 - 1))
1678 cosinarr(2, i3) = -sin(twopion*(i3 - 1))
1683 IF (lot .LT. 1)
THEN
1685 'convolxc_on:ncache has to be enlarged to be able to hold at'// &
1686 'least one 1-d FFT of this size even though this will'// &
1687 'reduce the performance for shorter transform lengths'
1691 DO j2 = 1, md2/nproc
1693 IF (iproc*(md2/nproc) + j2 .LE. n2/2)
THEN
1694 DO i1 = 1, (n1/2), lot
1696 mb = min(i1 + (lot - 1), (n1/2))
1700 CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
1706 CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1707 btrig3, after3(i), now3(i), before3(i), 1)
1715 CALL scramble_unpack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
1723 IF (nproc .GT. 1)
THEN
1725 CALL mpi_group%alltoall(zmpi2, zmpi1, n1*(md2/nproc)*(nd3/nproc))
1730 DO j3 = 1, nd3/nproc
1732 IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1)
THEN
1740 IF (lot .LT. 1)
THEN
1742 'convolxc_on:ncache has to be enlarged to be able to hold at'// &
1743 'least one 1-d FFT of this size even though this will'// &
1744 'reduce the performance for shorter transform lengths'
1750 mb = min(j + (lot - 1), n2/2)
1755 IF (nproc .EQ. 1)
THEN
1756 CALL mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
1758 CALL mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
1766 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1767 btrig1, after1(i), now1(i), before1(i), 1)
1773 CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
1774 btrig1, after1(i), now1(i), before1(i), 1)
1780 IF (lot .LT. 1)
THEN
1782 'convolxc_on:ncache has to be enlarged to be able to hold at'// &
1783 'least one 1-d FFT of this size even though this will'// &
1784 'reduce the performance for shorter transform lengths'
1790 mb = min(j + (lot - 1), n1)
1795 CALL switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
1802 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1803 btrig2, after2(i), now2(i), before2(i), 1)
1809 CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
1816 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1817 ftrig2, after2(i), now2(i), before2(i), -1)
1823 CALL unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
1832 mb = min(j + (lot - 1), n2/2)
1837 CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
1838 ftrig1, after1(i), now1(i), before1(i), -1)
1842 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1843 ftrig1, after1(i), now1(i), before1(i), -1)
1850 IF (nproc .EQ. 1)
THEN
1851 CALL unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
1853 CALL unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
1862 IF (nproc .GT. 1)
THEN
1864 CALL mpi_group%alltoall(zmpi1, zmpi2, n1*(md2/nproc)*(nd3/nproc))
1871 DO j2 = 1, md2/nproc
1873 IF (iproc*(md2/nproc) + j2 .LE. n2/2)
THEN
1874 DO i1 = 1, (n1/2), lot
1876 mb = min(i1 + (lot - 1), (n1/2))
1881 CALL unscramble_pack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
1888 CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1889 ftrig3, after3(i), now3(i), before3(i), -1)
1895 CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
1909 DEALLOCATE (before1)
1914 DEALLOCATE (before2)
1919 DEALLOCATE (before3)
1923 DEALLOCATE (cosinarr)
1924 IF (nproc .GT. 1)
DEALLOCATE (zmpi1)
1938 SUBROUTINE switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
1939 INTEGER :: nfft, n2, lot, n1, lzt
1940 REAL(kind=
dp) :: zt(2, lzt, n1), zw(2, lot, n2)
1950 zw(1, j, i) = zt(1, i - n2/2, j)
1951 zw(2, j, i) = zt(2, i - n2/2, j)
1961 END SUBROUTINE switch_upcorn
1977 SUBROUTINE mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
1978 INTEGER :: j3, nfft, jp2stb, j2stb, lot, n1, md2, &
1980 REAL(kind=
dp) :: zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc), zw(2, lot, n1)
1982 INTEGER :: i1, j2, jp2, mfft
1988 main:
DO jp2 = jp2stb, nproc
1989 DO j2 = j2stb, md2/nproc
1991 IF (mfft .GT. nfft)
THEN
1997 zw(1, mfft, i1) = 0._dp
1998 zw(2, mfft, i1) = 0._dp
2000 DO i1 = n1/2 + 1, n1
2001 zw(1, mfft, i1) = zmpi1(1, i1 - n1/2, j2, j3, jp2)
2002 zw(2, mfft, i1) = zmpi1(2, i1 - n1/2, j2, j3, jp2)
2007 END SUBROUTINE mpiswitch_upcorn
2019 SUBROUTINE unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
2020 INTEGER :: nfft, n2, lot, n1, lzt
2021 REAL(kind=
dp) :: zw(2, lot, n2), zt(2, lzt, n1)
2031 zt(1, i, j) = zw(1, j, i)
2032 zt(2, i, j) = zw(2, j, i)
2036 END SUBROUTINE unswitch_downcorn
2052 SUBROUTINE unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
2053 INTEGER :: j3, nfft, jp2stf, j2stf, lot, n1, md2, &
2055 REAL(kind=
dp) :: zw(2, lot, n1), zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc)
2057 INTEGER :: i1, j2, jp2, mfft
2063 main:
DO jp2 = jp2stf, nproc
2064 DO j2 = j2stf, md2/nproc
2066 IF (mfft .GT. nfft)
THEN
2072 zmpi1(1, i1, j2, j3, jp2) = zw(1, mfft, i1)
2073 zmpi1(2, i1, j2, j3, jp2) = zw(2, mfft, i1)
2078 END SUBROUTINE unmpiswitch_downcorn
2106 SUBROUTINE f_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal, ehartreetmp)
2107 INTEGER,
INTENT(in) :: md1, md3, lot, nfft, n3
2108 REAL(kind=
dp),
DIMENSION(2, lot, n3/2),
INTENT(in) :: zw
2109 REAL(kind=
dp),
DIMENSION(md1, md3),
INTENT(inout) :: zf
2110 REAL(kind=
dp),
INTENT(in) :: scal
2111 REAL(kind=
dp),
INTENT(out) :: ehartreetmp
2114 REAL(kind=
dp) :: pot1
2119 pot1 = scal*zw(1, i1, i3)
2120 ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3 - 1)
2121 zf(i1, 2*i3 - 1) = pot1
2122 pot1 = scal*zw(2, i1, i3)
2123 ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3)
2127 END SUBROUTINE f_unfill_downcorn
int main(int argc, char *argv[])
Stand-alone miniapp for smoke-testing and benchmarking dbm_multiply.
for(int lxp=0;lxp<=lp;lxp++)
Defines the basic variable types.
integer, parameter, public dp
Interface to the message passing library MPI.
Creates the wavelet kernel for the wavelet based poisson solver.
subroutine, public s_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, scal, mpi_group)
!HERE POT MUST BE THE KERNEL (BEWARE THE HALF DIMENSION) ****h* BigDFT/S_PoissonSolver (Based on suit...
subroutine, public scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
(Based on suitable modifications of S.Goedecker routines) Assign the correct planes to the work array...
subroutine, public f_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, scal, mpi_group)
(Based on suitable modifications of S.Goedecker routines) Applies the local FFT space Kernel to the d...
subroutine, public p_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, zf, scal, hx, hy, hz, mpi_group)
...
integer, parameter, public ctrig_length
subroutine, public fftstp(mm, nfft, m, nn, n, zin, zout, trig, after, now, before, isign)
...
subroutine, public ctrig(n, trig, after, before, now, isign, ic)
...