(git:ccc2433)
ps_wavelet_kernel.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Creates the wavelet kernel for the wavelet based poisson solver.
10 !> \author Florian Schiffmann (09.2007,fschiff)
11 ! **************************************************************************************************
13 
14  USE kinds, ONLY: dp
15  USE message_passing, ONLY: mp_comm_type
17  USE ps_wavelet_fft3d, ONLY: ctrig,&
18  ctrig_length,&
19  fftstp
24 #include "../base/base_uses.f90"
25 
26  IMPLICIT NONE
27 
28  PRIVATE
29 
30  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ps_wavelet_kernel'
31 
32 ! *** Public data types ***
33 
34  PUBLIC :: createkernel
35 
36 CONTAINS
37 
38 ! **************************************************************************************************
39 !> \brief Allocate a pointer which corresponds to the zero-padded FFT slice needed for
40 !> calculating the convolution with the kernel expressed in the interpolating scaling
41 !> function basis. The kernel pointer is unallocated on input, allocated on output.
42 !> \param geocode Indicates the boundary conditions (BC) of the problem:
43 !> 'F' free BC, isolated systems.
44 !> The program calculates the solution as if the given density is
45 !> "alone" in R^3 space.
46 !> 'S' surface BC, isolated in y direction, periodic in xz plane
47 !> The given density is supposed to be periodic in the xz plane,
48 !> so the dimensions in these direction mus be compatible with the FFT
49 !> Beware of the fact that the isolated direction is y!
50 !> 'P' periodic BC.
51 !> The density is supposed to be periodic in all the three directions,
52 !> then all the dimensions must be compatible with the FFT.
53 !> No need for setting up the kernel.
54 !> \param n01 dimensions of the real space grid to be hit with the Poisson Solver
55 !> \param n02 dimensions of the real space grid to be hit with the Poisson Solver
56 !> \param n03 dimensions of the real space grid to be hit with the Poisson Solver
57 !> \param hx grid spacings. For the isolated BC case for the moment they are supposed to
58 !> be equal in the three directions
59 !> \param hy grid spacings. For the isolated BC case for the moment they are supposed to
60 !> be equal in the three directions
61 !> \param hz grid spacings. For the isolated BC case for the moment they are supposed to
62 !> be equal in the three directions
63 !> \param itype_scf order of the interpolating scaling functions used in the decomposition
64 !> \param iproc ,nproc: number of process, number of processes
65 !> \param nproc ...
66 !> \param kernel pointer for the kernel FFT. Unallocated on input, allocated on output.
67 !> Its dimensions are equivalent to the region of the FFT space for which the
68 !> kernel is injective. This will divide by two each direction,
69 !> since the kernel for the zero-padded convolution is real and symmetric.
70 !> \param mpi_group ...
71 !> \date February 2007
72 !> \author Luigi Genovese
73 !> \note Due to the fact that the kernel dimensions are unknown before the calling, the kernel
74 !> must be declared as pointer in input of this routine.
75 !> To avoid that, one can properly define the kernel dimensions by adding
76 !> the nd1,nd2,nd3 arguments to the PS_dim4allocation routine, then eliminating the pointer
77 !> declaration.
78 ! **************************************************************************************************
79  SUBROUTINE createkernel(geocode, n01, n02, n03, hx, hy, hz, itype_scf, iproc, nproc, kernel, mpi_group)
80 
81  CHARACTER(len=1), INTENT(in) :: geocode
82  INTEGER, INTENT(in) :: n01, n02, n03
83  REAL(kind=dp), INTENT(in) :: hx, hy, hz
84  INTEGER, INTENT(in) :: itype_scf, iproc, nproc
85  REAL(kind=dp), POINTER :: kernel(:)
86 
87  CLASS(mp_comm_type), INTENT(in) :: mpi_group
88 
89  INTEGER :: m1, m2, m3, md1, md2, md3, n1, n2, n3, &
90  nd1, nd2, nd3, nlimd, nlimk
91  REAL(kind=dp) :: hgrid
92 
93  hgrid = max(hx, hy, hz)
94 
95  IF (geocode == 'P') THEN
96 
97  CALL f_fft_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, &
98  md1, md2, md3, nd1, nd2, nd3, nproc)
99 
100  ALLOCATE (kernel(1))
101  nlimd = n2
102  nlimk = 0
103 
104  ELSE IF (geocode == 'S') THEN
105 
106  CALL s_fft_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, &
107  md1, md2, md3, nd1, nd2, nd3, nproc)
108 
109  ALLOCATE (kernel(nd1*nd2*nd3/nproc))
110 
111  !the kernel must be built and scattered to all the processes
112 
113  CALL surfaces_kernel(n1, n2, n3, m3, nd1, nd2, nd3, hx, hz, hy, &
114  itype_scf, kernel, iproc, nproc, mpi_group)
115  !last plane calculated for the density and the kernel
116 
117  nlimd = n2
118  nlimk = n3/2 + 1
119  ELSE IF (geocode == 'F') THEN
120 
121  !Build the Kernel
122 
123  CALL f_fft_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, &
124  md1, md2, md3, nd1, nd2, nd3, nproc)
125  ALLOCATE (kernel(nd1*nd2*nd3/nproc))
126 
127  !the kernel must be built and scattered to all the processes
128  CALL free_kernel(n01, n02, n03, n1, n2, n3, nd1, nd2, nd3, hgrid, &
129  itype_scf, iproc, nproc, kernel, mpi_group)
130 
131  !last plane calculated for the density and the kernel
132  nlimd = n2/2
133  nlimk = n3/2 + 1
134 
135  ELSE
136 
137  cpabort("No wavelet based poisson solver for given geometry")
138 
139  END IF
140 !!! IF (iproc==0) THEN
141 !!! write(*,*)'done.'
142 !!! write(*,'(1x,a,i0)') 'Allocate words for kernel ',nd1*nd2*nd3/nproc
143 !!! !print the load balancing of the different dimensions on screen
144 !!! write(*,'(1x,a,3(i5))')'Grid Dimensions:',n01,n02,n03
145 !!! if (nproc > 1) then
146 !!! write(*,'(1x,a,3(i5),a,3(i5),a,3(i5))')&
147 !!! 'Memory occ. per proc. Density',md1,md3,md2/nproc,' Kernel',nd1,nd2,nd3/nproc
148 !!! write(*,'(1x,a)')'Load Balancing--------------------------------------------'
149 !!! jhd=1000
150 !!! jzd=1000
151 !!! npd=0
152 !!! load_balancing: do jproc=0,nproc-1
153 !!! !print *,'jproc,jfull=',jproc,jproc*md2/nproc,(jproc+1)*md2/nproc
154 !!! if ((jproc+1)*md2/nproc <= nlimd) then
155 !!! jfd=jproc
156 !!! else if (jproc*md2/nproc <= nlimd) then
157 !!! jhd=jproc
158 !!! npd=nint(real(nlimd-(jproc)*md2/nproc,KIND=dp)/real(md2/nproc,KIND=dp)*100._dp)
159 !!! else
160 !!! jzd=jproc
161 !!! exit load_balancing
162 !!! end if
163 !!! end do load_balancing
164 !!! write(*,'(1x,a,i3,a)')'LB_den : processors 0 -',jfd,' work at 100%'
165 !!! if (jfd < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')' processor ',jhd,&
166 !!! ' work at ',npd,'%'
167 !!! if (jhd < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')' processors ',&
168 !!! jzd,' -',nproc-1,' work at 0%'
169 !!! jhk=1000
170 !!! jzk=1000
171 !!! npk=0
172 !!! if (geocode /= 'P') then
173 !!! load_balancingk: do jproc=0,nproc-1
174 !!! !print *,'jproc,jfull=',jproc,jproc*nd3/nproc,(jproc+1)*nd3/nproc
175 !!! if ((jproc+1)*nd3/nproc <= nlimk) then
176 !!! jfk=jproc
177 !!! else if (jproc*nd3/nproc <= nlimk) then
178 !!! jhk=jproc
179 !!! npk=nint(real(nlimk-(jproc)*nd3/nproc,KIND=dp)/real(nd3/nproc,KIND=dp)*100._dp)
180 !!! else
181 !!! jzk=jproc
182 !!! exit load_balancingk
183 !!! end if
184 !!! end do load_balancingk
185 !!! write(*,'(1x,a,i3,a)')'LB_ker : processors 0 -',jfk,' work at 100%'
186 !!! if (jfk < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')' processor ',jhk,&
187 !!! ' work at ',npk,'%'
188 !!! if (jhk < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')' processors ',jzk,' -',nproc-1,&
189 !!! ' work at 0%'
190 !!! end if
191 !!! write(*,'(1x,a)')'The LB per processor is 1/3 LB_den + 2/3 LB_ker-----------'
192 !!! end if
193 !!!
194 !!! END IF
195  END SUBROUTINE createkernel
196 
197 ! **************************************************************************************************
198 !> \brief Build the kernel of the Poisson operator with
199 !> surfaces Boundary conditions
200 !> in an interpolating scaling functions basis.
201 !> Beware of the fact that the nonperiodic direction is y!
202 !> \param n1 Dimensions for the FFT
203 !> \param n2 Dimensions for the FFT
204 !> \param n3 Dimensions for the FFT
205 !> \param m3 Actual dimension in non-periodic direction
206 !> \param nker1 Dimensions of the kernel (nker3=n3/2+1) nker(1,2)=n(1,2)/2+1
207 !> \param nker2 Dimensions of the kernel (nker3=n3/2+1) nker(1,2)=n(1,2)/2+1
208 !> \param nker3 Dimensions of the kernel (nker3=n3/2+1) nker(1,2)=n(1,2)/2+1
209 !> \param h1 Mesh steps in the three dimensions
210 !> \param h2 Mesh steps in the three dimensions
211 !> \param h3 Mesh steps in the three dimensions
212 !> \param itype_scf Order of the scaling function
213 !> \param karray output array
214 !> \param iproc Number of process
215 !> \param nproc number of processes
216 !> \param mpi_group ...
217 !> \date October 2006
218 !> \author L. Genovese
219 ! **************************************************************************************************
220  SUBROUTINE surfaces_kernel(n1, n2, n3, m3, nker1, nker2, nker3, h1, h2, h3, &
221  itype_scf, karray, iproc, nproc, mpi_group)
222 
223  INTEGER, INTENT(in) :: n1, n2, n3, m3, nker1, nker2, nker3
224  REAL(kind=dp), INTENT(in) :: h1, h2, h3
225  INTEGER, INTENT(in) :: itype_scf, nproc, iproc
226  REAL(kind=dp), &
227  DIMENSION(nker1, nker2, nker3/nproc), &
228  INTENT(out) :: karray
229  TYPE(mp_comm_type), INTENT(in) :: mpi_group
230 
231  INTEGER, PARAMETER :: n_points = 2**6, ncache_optimal = 8*1024
232 
233  INTEGER :: i, i1, i2, i3, ic, iend, imu, ind1, ind2, inzee, ipolyord, ireim, istart, j2, &
234  j2nd, j2st, jnd1, jp2, jreim, n_cell, n_range, n_scf, nact2, ncache, nfft, num_of_mus, &
235  shift
236  INTEGER, ALLOCATABLE, DIMENSION(:) :: after, before, now
237  REAL(kind=dp) :: a, b, c, cp, d, diff, dx, fei, fer, foi, &
238  for, fr, mu1, pi, pion, ponx, pony, &
239  sp, value, x
240  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: kernel_scf, x_scf, y_scf
241  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: btrig, cossinarr
242  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: halfft_cache, kernel
243  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: kernel_mpi
244  REAL(kind=dp), DIMENSION(9, 8) :: cpol
245 
246 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
247 ! include "perfdata.inc"
248 !FFT arrays
249 !coefficients for the polynomial interpolation
250 !assign the values of the coefficients
251 
252  karray = 0.0_dp
253  cpol(:, :) = 1._dp
254 
255  cpol(1, 2) = .25_dp
256 
257  cpol(1, 3) = 1._dp/3._dp
258 
259  cpol(1, 4) = 7._dp/12._dp
260  cpol(2, 4) = 8._dp/3._dp
261 
262  cpol(1, 5) = 19._dp/50._dp
263  cpol(2, 5) = 3._dp/2._dp
264 
265  cpol(1, 6) = 41._dp/272._dp
266  cpol(2, 6) = 27._dp/34._dp
267  cpol(3, 6) = 27._dp/272._dp
268 
269  cpol(1, 7) = 751._dp/2989._dp
270  cpol(2, 7) = 73._dp/61._dp
271  cpol(3, 7) = 27._dp/61._dp
272 
273  cpol(1, 8) = -989._dp/4540._dp
274  cpol(2, 8) = -1472._dp/1135._dp
275  cpol(3, 8) = 232._dp/1135._dp
276  cpol(4, 8) = -2624._dp/1135._dp
277 
278  !renormalize values
279  cpol(1, 1) = .5_dp*cpol(1, 1)
280  cpol(1:2, 2) = 2._dp/3._dp*cpol(1:2, 2)
281  cpol(1:2, 3) = 3._dp/8._dp*cpol(1:2, 3)
282  cpol(1:3, 4) = 2._dp/15._dp*cpol(1:3, 4)
283  cpol(1:3, 5) = 25._dp/144._dp*cpol(1:3, 5)
284  cpol(1:4, 6) = 34._dp/105._dp*cpol(1:4, 6)
285  cpol(1:4, 7) = 2989._dp/17280._dp*cpol(1:4, 7)
286  cpol(1:5, 8) = -454._dp/2835._dp*cpol(1:5, 8)
287 
288  !assign the complete values
289  cpol(2, 1) = cpol(1, 1)
290 
291  cpol(3, 2) = cpol(1, 2)
292 
293  cpol(3, 3) = cpol(2, 3)
294  cpol(4, 3) = cpol(1, 3)
295 
296  cpol(4, 4) = cpol(2, 4)
297  cpol(5, 4) = cpol(1, 4)
298 
299  cpol(4, 5) = cpol(3, 5)
300  cpol(5, 5) = cpol(2, 5)
301  cpol(6, 5) = cpol(1, 5)
302 
303  cpol(5, 6) = cpol(3, 6)
304  cpol(6, 6) = cpol(2, 6)
305  cpol(7, 6) = cpol(1, 6)
306 
307  cpol(5, 7) = cpol(4, 7)
308  cpol(6, 7) = cpol(3, 7)
309  cpol(7, 7) = cpol(2, 7)
310  cpol(8, 7) = cpol(1, 7)
311 
312  cpol(6, 8) = cpol(4, 8)
313  cpol(7, 8) = cpol(3, 8)
314  cpol(8, 8) = cpol(2, 8)
315  cpol(9, 8) = cpol(1, 8)
316 
317  !Number of integration points : 2*itype_scf*n_points
318  n_scf = 2*itype_scf*n_points
319  !Allocations
320  ALLOCATE (x_scf(0:n_scf))
321  ALLOCATE (y_scf(0:n_scf))
322 
323  !Build the scaling function
324  CALL scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf)
325  !Step grid for the integration
326  dx = real(n_range, kind=dp)/real(n_scf, kind=dp)
327  !Extend the range (no more calculations because fill in by 0._dp)
328  n_cell = m3
329  n_range = max(n_cell, n_range)
330 
331  !Allocations
332  ncache = ncache_optimal
333  !the HalFFT must be performed only in the third dimension,
334  !and nker3=n3/2+1, hence
335  IF (ncache <= (nker3 - 1)*4) ncache = nker3 - 1*4
336 
337  !enlarge the second dimension of the kernel to be compatible with nproc
338  nact2 = nker2
339  enlarge_ydim: DO
340  IF (nproc*(nact2/nproc) /= nact2) THEN
341  nact2 = nact2 + 1
342  ELSE
343  EXIT enlarge_ydim
344  END IF
345  END DO enlarge_ydim
346 
347  !array for the MPI procedure
348  ALLOCATE (kernel(nker1, nact2/nproc, nker3))
349  ALLOCATE (kernel_mpi(nker1, nact2/nproc, nker3/nproc, nproc))
350  ALLOCATE (kernel_scf(n_range))
351  ALLOCATE (halfft_cache(2, ncache/4, 2))
352  ALLOCATE (cossinarr(2, n3/2 - 1))
353  ALLOCATE (btrig(2, ctrig_length))
354  ALLOCATE (after(7))
355  ALLOCATE (now(7))
356  ALLOCATE (before(7))
357 
358  !constants
359  pi = 4._dp*atan(1._dp)
360 
361  !arrays for the halFFT
362  CALL ctrig(n3/2, btrig, after, before, now, 1, ic)
363 
364  !build the phases for the HalFFT reconstruction
365  pion = 2._dp*pi/real(n3, kind=dp)
366  DO i3 = 2, n3/2
367  x = real(i3 - 1, kind=dp)*pion
368  cossinarr(1, i3 - 1) = cos(x)
369  cossinarr(2, i3 - 1) = -sin(x)
370  END DO
371 
372  ! satisfy valgrind, init arrays to large value, even if the offending bit is (likely?) padding
373  kernel = huge(0._dp)
374  kernel_mpi = huge(0._dp)
375 
376  !calculate the limits of the FFT calculations
377  !that can be performed in a row remaining inside the cache
378  num_of_mus = ncache/(2*n3)
379 
380  diff = 0._dp
381  !order of the polynomial to be used for integration (must be a power of two)
382  ipolyord = 8 !this part should be incorporated inside the numerical integration
383  !here we have to choice the piece of the x-y grid to cover
384 
385  !let us now calculate the fraction of mu that will be considered
386  j2st = iproc*(nact2/nproc)
387  j2nd = min((iproc + 1)*(nact2/nproc), n2/2 + 1)
388 
389  DO ind2 = (n1/2 + 1)*j2st + 1, (n1/2 + 1)*j2nd, num_of_mus
390  istart = ind2
391  iend = min(ind2 + (num_of_mus - 1), (n1/2 + 1)*j2nd)
392  nfft = iend - istart + 1
393  shift = 0
394 
395  !initialization of the interesting part of the cache array
396  halfft_cache(:, :, :) = 0._dp
397 
398  IF (istart == 1) THEN
399  !i2=1
400  shift = 1
401 
402  CALL calculates_green_opt_muzero(n_range, n_scf, ipolyord, x_scf, y_scf, &
403  cpol(1, ipolyord), dx, kernel_scf)
404 
405  !copy of the first zero value
406  halfft_cache(1, 1, 1) = 0._dp
407 
408  DO i3 = 1, m3
409 
410  value = 0.5_dp*h3*kernel_scf(i3)
411  !index in where to copy the value of the kernel
412  CALL indices(ireim, num_of_mus, n3/2 + i3, 1, ind1)
413  !index in where to copy the symmetric value
414  CALL indices(jreim, num_of_mus, n3/2 + 2 - i3, 1, jnd1)
415  halfft_cache(ireim, ind1, 1) = value
416  halfft_cache(jreim, jnd1, 1) = value
417 
418  END DO
419 
420  END IF
421 
422  loopimpulses: DO imu = istart + shift, iend
423 
424  !here there is the value of mu associated to hgrid
425  !note that we have multiplicated mu for hgrid to be comparable
426  !with mu0ref
427 
428  !calculate the proper value of mu taking into account the periodic dimensions
429  !corresponding value of i1 and i2
430  i1 = mod(imu, n1/2 + 1)
431  IF (i1 == 0) i1 = n1/2 + 1
432  i2 = (imu - i1)/(n1/2 + 1) + 1
433  ponx = real(i1 - 1, kind=dp)/real(n1, kind=dp)
434  pony = real(i2 - 1, kind=dp)/real(n2, kind=dp)
435 
436  mu1 = 2._dp*pi*sqrt((ponx/h1)**2 + (pony/h2)**2)*h3
437 
438  CALL calculates_green_opt(n_range, n_scf, itype_scf, ipolyord, x_scf, y_scf, &
439  cpol(1, ipolyord), mu1, dx, kernel_scf)
440 
441  !readjust the coefficient and define the final kernel
442 
443  !copy of the first zero value
444  halfft_cache(1, imu - istart + 1, 1) = 0._dp
445  DO i3 = 1, m3
446  value = -0.5_dp*h3/mu1*kernel_scf(i3)
447  !write(80,*)mu1,i3,kernel_scf(i03)
448  !index in where to copy the value of the kernel
449  CALL indices(ireim, num_of_mus, n3/2 + i3, imu - istart + 1, ind1)
450  !index in where to copy the symmetric value
451  CALL indices(jreim, num_of_mus, n3/2 + 2 - i3, imu - istart + 1, jnd1)
452  halfft_cache(ireim, ind1, 1) = value
453  halfft_cache(jreim, jnd1, 1) = value
454  END DO
455 
456  END DO loopimpulses
457 
458  !now perform the FFT of the array in cache
459  inzee = 1
460  DO i = 1, ic
461  CALL fftstp(num_of_mus, nfft, n3/2, num_of_mus, n3/2, &
462  halfft_cache(1, 1, inzee), halfft_cache(1, 1, 3 - inzee), &
463  btrig, after(i), now(i), before(i), 1)
464  inzee = 3 - inzee
465  END DO
466  !assign the values of the FFT array
467  !and compare with the good results
468  DO imu = istart, iend
469 
470  !corresponding value of i1 and i2
471  i1 = mod(imu, n1/2 + 1)
472  IF (i1 == 0) i1 = n1/2 + 1
473  i2 = (imu - i1)/(n1/2 + 1) + 1
474 
475  j2 = i2 - j2st
476 
477  a = halfft_cache(1, imu - istart + 1, inzee)
478  b = halfft_cache(2, imu - istart + 1, inzee)
479  kernel(i1, j2, 1) = a + b
480  kernel(i1, j2, n3/2 + 1) = a - b
481 
482  DO i3 = 2, n3/2
483  ind1 = imu - istart + 1 + num_of_mus*(i3 - 1)
484  jnd1 = imu - istart + 1 + num_of_mus*(n3/2 + 2 - i3 - 1)
485  cp = cossinarr(1, i3 - 1)
486  sp = cossinarr(2, i3 - 1)
487  a = halfft_cache(1, ind1, inzee)
488  b = halfft_cache(2, ind1, inzee)
489  c = halfft_cache(1, jnd1, inzee)
490  d = halfft_cache(2, jnd1, inzee)
491  fer = .5_dp*(a + c)
492  fei = .5_dp*(b - d)
493  for = .5_dp*(a - c)
494  foi = .5_dp*(b + d)
495  fr = fer + cp*foi - sp*for
496  kernel(i1, j2, i3) = fr
497  END DO
498  END DO
499 
500  END DO
501 
502  !give to each processor a slice of the third dimension
503  IF (nproc > 1) THEN
504  CALL mpi_group%alltoall(kernel, &!nker1*(nact2/nproc)*(nker3/nproc), &
505  kernel_mpi, nker1*(nact2/nproc)*(nker3/nproc))
506 
507  DO jp2 = 1, nproc
508  DO i3 = 1, nker3/nproc
509  DO i2 = 1, nact2/nproc
510  j2 = i2 + (jp2 - 1)*(nact2/nproc)
511  IF (j2 <= nker2) THEN
512  DO i1 = 1, nker1
513  karray(i1, j2, i3) = &
514  kernel_mpi(i1, i2, i3, jp2)
515  END DO
516  END IF
517  END DO
518  END DO
519  END DO
520 
521  ELSE
522  karray(1:nker1, 1:nker2, 1:nker3) = kernel(1:nker1, 1:nker2, 1:nker3)
523  END IF
524 
525  !De-allocations
526  DEALLOCATE (kernel)
527  DEALLOCATE (kernel_mpi)
528  DEALLOCATE (btrig)
529  DEALLOCATE (after)
530  DEALLOCATE (now)
531  DEALLOCATE (before)
532  DEALLOCATE (halfft_cache)
533  DEALLOCATE (kernel_scf)
534  DEALLOCATE (x_scf)
535  DEALLOCATE (y_scf)
536 
537  END SUBROUTINE surfaces_kernel
538 
539 ! **************************************************************************************************
540 !> \brief ...
541 !> \param n ...
542 !> \param n_scf ...
543 !> \param itype_scf ...
544 !> \param intorder ...
545 !> \param xval ...
546 !> \param yval ...
547 !> \param c ...
548 !> \param mu ...
549 !> \param hres ...
550 !> \param g_mu ...
551 ! **************************************************************************************************
552  SUBROUTINE calculates_green_opt(n, n_scf, itype_scf, intorder, xval, yval, c, mu, hres, g_mu)
553  INTEGER, INTENT(in) :: n, n_scf, itype_scf, intorder
554  REAL(kind=dp), DIMENSION(0:n_scf), INTENT(in) :: xval, yval
555  REAL(kind=dp), DIMENSION(intorder+1), INTENT(in) :: c
556  REAL(kind=dp), INTENT(in) :: mu, hres
557  REAL(kind=dp), DIMENSION(n), INTENT(out) :: g_mu
558 
559  REAL(kind=dp), PARAMETER :: mu_max = 0.2_dp
560 
561  INTEGER :: i, iend, ikern, ivalue, izero, n_iter, &
562  nrec
563  REAL(kind=dp) :: f, filter, fl, fr, gleft, gltmp, gright, &
564  grtmp, mu0, ratio, x, x0, x1
565  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: green, green1
566 
567  g_mu = 0.0_dp
568  !We calculate the number of iterations to go from mu0 to mu0_ref
569  IF (mu <= mu_max) THEN
570  n_iter = 0
571  mu0 = mu
572  ELSE
573  n_iter = 1
574  loop_iter: DO
575  ratio = real(2**n_iter, kind=dp)
576  mu0 = mu/ratio
577  IF (mu0 <= mu_max) THEN
578  EXIT loop_iter
579  END IF
580  n_iter = n_iter + 1
581  END DO loop_iter
582  END IF
583 
584  !dimension needed for the correct calculation of the recursion
585  nrec = 2**n_iter*n
586 
587  ALLOCATE (green(-nrec:nrec))
588 
589  !initialization of the branching value
590  ikern = 0
591  izero = 0
592  initialization: DO
593  IF (xval(izero) >= real(ikern, kind=dp) .OR. izero == n_scf) EXIT initialization
594  izero = izero + 1
595  END DO initialization
596  green = 0._dp
597  !now perform the interpolation in right direction
598  ivalue = izero
599  gright = 0._dp
600  loop_right: DO
601  IF (ivalue >= n_scf - intorder - 1) EXIT loop_right
602  DO i = 1, intorder + 1
603  x = xval(ivalue) - real(ikern, kind=dp)
604  f = yval(ivalue)*exp(-mu0*x)
605  filter = intorder*c(i)
606  gright = gright + filter*f
607  ivalue = ivalue + 1
608  END DO
609  ivalue = ivalue - 1
610  END DO loop_right
611  iend = n_scf - ivalue
612  DO i = 1, iend
613  x = xval(ivalue) - real(ikern, kind=dp)
614  f = yval(ivalue)*exp(-mu0*x)
615  filter = intorder*c(i)
616  gright = gright + filter*f
617  ivalue = ivalue + 1
618  END DO
619  gright = hres*gright
620 
621  !the scaling function is symmetric, so the same for the other direction
622  gleft = gright
623 
624  green(ikern) = gleft + gright
625 
626  !now the loop until the last value
627  DO ikern = 1, nrec
628  gltmp = 0._dp
629  grtmp = 0._dp
630  ivalue = izero
631  x0 = xval(izero)
632  loop_integration: DO
633  IF (izero == n_scf) EXIT loop_integration
634  DO i = 1, intorder + 1
635  x = xval(ivalue)
636  fl = yval(ivalue)*exp(mu0*x)
637  fr = yval(ivalue)*exp(-mu0*x)
638  filter = intorder*c(i)
639  gltmp = gltmp + filter*fl
640  grtmp = grtmp + filter*fr
641  ivalue = ivalue + 1
642  IF (xval(izero) >= real(ikern, kind=dp) .OR. izero == n_scf) THEN
643  x1 = xval(izero)
644  EXIT loop_integration
645  END IF
646  izero = izero + 1
647  END DO
648  ivalue = ivalue - 1
649  izero = izero - 1
650  END DO loop_integration
651  gleft = exp(-mu0)*(gleft + hres*exp(-mu0*real(ikern - 1, kind=dp))*gltmp)
652  IF (izero == n_scf) THEN
653  gright = 0._dp
654  ELSE
655  gright = exp(mu0)*(gright - hres*exp(mu0*real(ikern - 1, kind=dp))*grtmp)
656  END IF
657  green(ikern) = gleft + gright
658  green(-ikern) = gleft + gright
659  IF (abs(green(ikern)) <= 1.e-20_dp) THEN
660  nrec = ikern
661  EXIT
662  END IF
663  !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern)
664  END DO
665  !now we must calculate the recursion
666  ALLOCATE (green1(-nrec:nrec))
667 
668  !Start the iteration to go from mu0 to mu
669  CALL scf_recursion(itype_scf, n_iter, nrec, green(-nrec), green1(-nrec))
670 
671  DO i = 1, min(n, nrec)
672  g_mu(i) = green(i - 1)
673  END DO
674  DO i = min(n, nrec) + 1, n
675  g_mu(i) = 0._dp
676  END DO
677 
678  DEALLOCATE (green, green1)
679 
680  END SUBROUTINE calculates_green_opt
681 
682 ! **************************************************************************************************
683 !> \brief ...
684 !> \param n ...
685 !> \param n_scf ...
686 !> \param intorder ...
687 !> \param xval ...
688 !> \param yval ...
689 !> \param c ...
690 !> \param hres ...
691 !> \param green ...
692 ! **************************************************************************************************
693  SUBROUTINE calculates_green_opt_muzero(n, n_scf, intorder, xval, yval, c, hres, green)
694  INTEGER, INTENT(in) :: n, n_scf, intorder
695  REAL(kind=dp), DIMENSION(0:n_scf), INTENT(in) :: xval, yval
696  REAL(kind=dp), DIMENSION(intorder+1), INTENT(in) :: c
697  REAL(kind=dp), INTENT(in) :: hres
698  REAL(kind=dp), DIMENSION(n), INTENT(out) :: green
699 
700  INTEGER :: i, iend, ikern, ivalue, izero
701  REAL(kind=dp) :: c0, c1, filter, gl0, gl1, gr0, gr1, x, y
702 
703 !initialization of the branching value
704 
705  ikern = 0
706  izero = 0
707  initialization: DO
708  IF (xval(izero) >= real(ikern, kind=dp) .OR. izero == n_scf) EXIT initialization
709  izero = izero + 1
710  END DO initialization
711  green = 0._dp
712  !first case, ikern=0
713  !now perform the interpolation in right direction
714  ivalue = izero
715  gr1 = 0._dp
716  loop_right: DO
717  IF (ivalue >= n_scf - intorder - 1) EXIT loop_right
718  DO i = 1, intorder + 1
719  x = xval(ivalue)
720  y = yval(ivalue)
721  filter = intorder*c(i)
722  gr1 = gr1 + filter*x*y
723  ivalue = ivalue + 1
724  END DO
725  ivalue = ivalue - 1
726  END DO loop_right
727  iend = n_scf - ivalue
728  DO i = 1, iend
729  x = xval(ivalue)
730  y = yval(ivalue)
731  filter = intorder*c(i)
732  gr1 = gr1 + filter*x*y
733  ivalue = ivalue + 1
734  END DO
735  gr1 = hres*gr1
736  !the scaling function is symmetric
737  gl1 = -gr1
738  gl0 = 0.5_dp
739  gr0 = 0.5_dp
740 
741  green(1) = 2._dp*gr1
742 
743  !now the loop until the last value
744  DO ikern = 1, n - 1
745  c0 = 0._dp
746  c1 = 0._dp
747  ivalue = izero
748  loop_integration: DO
749  IF (izero == n_scf) EXIT loop_integration
750  DO i = 1, intorder + 1
751  x = xval(ivalue)
752  y = yval(ivalue)
753  filter = intorder*c(i)
754  c0 = c0 + filter*y
755  c1 = c1 + filter*y*x
756  ivalue = ivalue + 1
757  IF (xval(izero) >= real(ikern, kind=dp) .OR. izero == n_scf) THEN
758  EXIT loop_integration
759  END IF
760  izero = izero + 1
761  END DO
762  ivalue = ivalue - 1
763  izero = izero - 1
764  END DO loop_integration
765  c0 = hres*c0
766  c1 = hres*c1
767 
768  gl0 = gl0 + c0
769  gl1 = gl1 + c1
770  gr0 = gr0 - c0
771  gr1 = gr1 - c1
772  !general case
773  green(ikern + 1) = real(ikern, kind=dp)*(gl0 - gr0) + gr1 - gl1
774  !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern)
775  END DO
776 
777  END SUBROUTINE calculates_green_opt_muzero
778 
779 ! **************************************************************************************************
780 !> \brief ...
781 !> \param var_realimag ...
782 !> \param nelem ...
783 !> \param intrn ...
784 !> \param extrn ...
785 !> \param index ...
786 ! **************************************************************************************************
787  SUBROUTINE indices(var_realimag, nelem, intrn, extrn, index)
788 
789  INTEGER, INTENT(out) :: var_realimag
790  INTEGER, INTENT(in) :: nelem, intrn, extrn
791  INTEGER, INTENT(out) :: index
792 
793  INTEGER :: i
794 
795 !real or imaginary part
796 
797  var_realimag = 2 - mod(intrn, 2)
798 !actual index over half the length
799 
800  i = (intrn + 1)/2
801  !check
802  IF (2*(i - 1) + var_realimag /= intrn) THEN
803  print *, 'error, index=', intrn, 'var_realimag=', var_realimag, 'i=', i
804  END IF
805  !complete index to be assigned
806  index = extrn + nelem*(i - 1)
807 
808  END SUBROUTINE indices
809 
810 ! **************************************************************************************************
811 !> \brief Build the kernel of a gaussian function
812 !> for interpolating scaling functions.
813 !> Do the parallel HalFFT of the symmetrized function and stores into
814 !> memory only 1/8 of the grid divided by the number of processes nproc
815 !>
816 !> Build the kernel (karray) of a gaussian function
817 !> for interpolating scaling functions
818 !> $$ K(j) = \sum_k \omega_k \int \int \phi(x) g_k(x'-x) \delta(x'- j) dx dx' $$
819 !> \param n01 Mesh dimensions of the density
820 !> \param n02 Mesh dimensions of the density
821 !> \param n03 Mesh dimensions of the density
822 !> \param nfft1 Dimensions of the FFT grid (HalFFT in the third direction)
823 !> \param nfft2 Dimensions of the FFT grid (HalFFT in the third direction)
824 !> \param nfft3 Dimensions of the FFT grid (HalFFT in the third direction)
825 !> \param n1k Dimensions of the kernel FFT
826 !> \param n2k Dimensions of the kernel FFT
827 !> \param n3k Dimensions of the kernel FFT
828 !> \param hgrid Mesh step
829 !> \param itype_scf Order of the scaling function (8,14,16)
830 !> \param iproc ...
831 !> \param nproc ...
832 !> \param karray ...
833 !> \param mpi_group ...
834 !> \date February 2006
835 !> \author T. Deutsch, L. Genovese
836 ! **************************************************************************************************
837  SUBROUTINE free_kernel(n01, n02, n03, nfft1, nfft2, nfft3, n1k, n2k, n3k, &
838  hgrid, itype_scf, iproc, nproc, karray, mpi_group)
839 
840  INTEGER, INTENT(in) :: n01, n02, n03, nfft1, nfft2, nfft3, n1k, &
841  n2k, n3k
842  REAL(kind=dp), INTENT(in) :: hgrid
843  INTEGER, INTENT(in) :: itype_scf, iproc, nproc
844  REAL(kind=dp), DIMENSION(n1k, n2k, n3k/nproc), &
845  INTENT(out) :: karray
846  TYPE(mp_comm_type), INTENT(in) :: mpi_group
847 
848  INTEGER, PARAMETER :: n_gauss = 89, n_points = 2**6
849  REAL(kind=dp), PARAMETER :: p0_ref = 1._dp
850 
851  INTEGER :: i, i01, i02, i03, i1, i2, i3, i_gauss, &
852  i_kern, iend, istart, istart1, n1h, &
853  n2h, n3h, n_cell, n_iter, n_range, &
854  n_scf, nker1, nker2, nker3
855  REAL(kind=dp) :: a1, a2, a3, a_range, absci, acc_gauss, &
856  dr_gauss, dx, factor, factor2, kern, &
857  p0_cell, p0gauss, pgauss, ur_gauss
858  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: kern_1_scf, kernel_scf, x_scf, y_scf
859  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: kp
860  REAL(kind=dp), DIMENSION(n_gauss) :: p_gauss, w_gauss
861 
862 !Do not touch !!!!
863 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
864 !Better p_gauss for calculation
865 !(the support of the exponential should be inside [-n_range/2,n_range/2])
866 !Number of integration points : 2*itype_scf*n_points
867 
868  n_scf = 2*itype_scf*n_points
869  !Set karray
870  karray = 0.0_dp
871  !here we must set the dimensions for the fft part, starting from the nfft
872  !remember that actually nfft2 is associated to n03 and viceversa
873 
874  !dimensions that define the center of symmetry
875  n1h = nfft1/2
876  n2h = nfft2/2
877  n3h = nfft3/2
878 
879  !Auxiliary dimensions only for building the FFT part
880  nker1 = nfft1
881  nker2 = nfft2
882  nker3 = nfft3/2 + 1
883 
884  !adjusting the last two dimensions to be multiples of nproc
885  DO
886  IF (modulo(nker2, nproc) == 0) EXIT
887  nker2 = nker2 + 1
888  END DO
889  DO
890  IF (modulo(nker3, nproc) == 0) EXIT
891  nker3 = nker3 + 1
892  END DO
893 
894  !this will be the array of the kernel in the real space
895  ALLOCATE (kp(n1h + 1, n3h + 1, nker2/nproc))
896 
897  !defining proper extremes for the calculation of the
898  !local part of the kernel
899 
900  istart = iproc*nker2/nproc + 1
901  iend = min((iproc + 1)*nker2/nproc, n2h + n03)
902 
903  istart1 = istart
904  IF (iproc .EQ. 0) istart1 = n2h - n03 + 2
905 
906  !Allocations
907  ALLOCATE (x_scf(0:n_scf))
908  ALLOCATE (y_scf(0:n_scf))
909 
910  !Build the scaling function
911  CALL scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf)
912  !Step grid for the integration
913  dx = real(n_range, kind=dp)/real(n_scf, kind=dp)
914  !Extend the range (no more calculations because fill in by 0._dp)
915  n_cell = max(n01, n02, n03)
916  n_range = max(n_cell, n_range)
917 
918  !Allocations
919  ALLOCATE (kernel_scf(-n_range:n_range))
920  ALLOCATE (kern_1_scf(-n_range:n_range))
921 
922  !Lengthes of the box (use FFT dimension)
923  a1 = hgrid*real(n01, kind=dp)
924  a2 = hgrid*real(n02, kind=dp)
925  a3 = hgrid*real(n03, kind=dp)
926 
927  x_scf(:) = hgrid*x_scf(:)
928  y_scf(:) = 1._dp/hgrid*y_scf(:)
929  dx = hgrid*dx
930  !To have a correct integration
931  p0_cell = p0_ref/(hgrid*hgrid)
932 
933  !Initialization of the gaussian (Beylkin)
934  CALL gequad(p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss)
935  !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3)
936  !(biggest length in the cube)
937  !We divide the p_gauss by a_range**2 and a_gauss by a_range
938  a_range = sqrt(a1*a1 + a2*a2 + a3*a3)
939  factor = 1._dp/a_range
940  !factor2 = factor*factor
941  factor2 = 1._dp/(a1*a1 + a2*a2 + a3*a3)
942  DO i_gauss = 1, n_gauss
943  p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
944  END DO
945  DO i_gauss = 1, n_gauss
946  w_gauss(i_gauss) = factor*w_gauss(i_gauss)
947  END DO
948 
949  kp(:, :, :) = 0._dp
950  !Use in this order (better for accuracy).
951  loop_gauss: DO i_gauss = n_gauss, 1, -1
952  !Gaussian
953  pgauss = p_gauss(i_gauss)
954 
955  !We calculate the number of iterations to go from pgauss to p0_ref
956  n_iter = nint((log(pgauss) - log(p0_cell))/log(4._dp))
957  IF (n_iter <= 0) THEN
958  n_iter = 0
959  p0gauss = pgauss
960  ELSE
961  p0gauss = pgauss/4._dp**n_iter
962  END IF
963 
964  !Stupid integration
965  !Do the integration with the exponential centered in i_kern
966  kernel_scf(:) = 0._dp
967  DO i_kern = 0, n_range
968  kern = 0._dp
969  DO i = 0, n_scf
970  absci = x_scf(i) - real(i_kern, kind=dp)*hgrid
971  absci = absci*absci
972  kern = kern + y_scf(i)*exp(-p0gauss*absci)*dx
973  END DO
974  kernel_scf(i_kern) = kern
975  kernel_scf(-i_kern) = kern
976  IF (abs(kern) < 1.e-18_dp) THEN
977  !Too small not useful to calculate
978  EXIT
979  END IF
980  END DO
981 
982  !Start the iteration to go from p0gauss to pgauss
983  CALL scf_recursion(itype_scf, n_iter, n_range, kernel_scf, kern_1_scf)
984 
985  !Add to the kernel (only the local part)
986 
987  DO i3 = istart1, iend
988  i03 = i3 - n2h - 1
989  ! Crash if index out of range
990  ! Without compiler bounds checking, the calculation might run successfully but
991  ! it is also possible that the Hartree energy will blow up
992  ! This seems to happen with large MPI processor counts if the size of the
993  ! RS grid is not directly compatible with the allowed FFT dimensions in
994  ! subroutine fourier_dim (ps_wavelet_fft3d.F)
995  IF (i03 .LT. -n_range .OR. i03 .GT. n_range) THEN
996  CALL cp_abort(__location__, "Index out of range in wavelet solver. "// &
997  "Try decreasing the number of MPI processors, or adjust the PW_CUTOFF or cell size "// &
998  "so that 2*(number of RS grid points) matches the allowed FFT dimensions "// &
999  "(see ps_wavelet_fft3d.F) exactly.")
1000  END IF
1001  DO i2 = 1, n02
1002  i02 = i2 - 1
1003  DO i1 = 1, n01
1004  i01 = i1 - 1
1005  kp(i1, i2, i3 - istart + 1) = kp(i1, i2, i3 - istart + 1) + w_gauss(i_gauss)* &
1006  kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
1007  END DO
1008  END DO
1009  END DO
1010 
1011  END DO loop_gauss
1012 
1013  !De-allocations
1014  DEALLOCATE (kernel_scf)
1015  DEALLOCATE (kern_1_scf)
1016  DEALLOCATE (x_scf)
1017  DEALLOCATE (y_scf)
1018 
1019 !!!!END KERNEL CONSTRUCTION
1020 
1021 !!$ if(iproc .eq. 0) print *,"Do a 3D PHalFFT for the kernel"
1022 
1023  CALL kernelfft(nfft1, nfft2, nfft3, nker1, nker2, nker3, n1k, n2k, n3k, nproc, iproc, &
1024  kp, karray, mpi_group)
1025 
1026  !De-allocations
1027  DEALLOCATE (kp)
1028 
1029  END SUBROUTINE free_kernel
1030 
1031 ! **************************************************************************************************
1032 !> \brief ...
1033 !> \param n1 ...
1034 !> \param n3 ...
1035 !> \param lot ...
1036 !> \param nfft ...
1037 !> \param i1 ...
1038 !> \param zf ...
1039 !> \param zw ...
1040 ! **************************************************************************************************
1041  SUBROUTINE inserthalf(n1, n3, lot, nfft, i1, zf, zw)
1042  INTEGER, INTENT(in) :: n1, n3, lot, nfft, i1
1043  REAL(kind=dp), DIMENSION(n1/2+1, n3/2+1), &
1044  INTENT(in) :: zf
1045  REAL(kind=dp), DIMENSION(2, lot, n3/2), &
1046  INTENT(out) :: zw
1047 
1048  INTEGER :: i01, i03i, i03r, i3, l1, l3
1049 
1050  zw = 0.0_dp
1051  i3 = 0
1052  DO l3 = 1, n3, 2
1053  i3 = i3 + 1
1054  i03r = abs(l3 - n3/2 - 1) + 1
1055  i03i = abs(l3 - n3/2) + 1
1056  DO l1 = 1, nfft
1057  i01 = abs(l1 - 1 + i1 - n1/2 - 1) + 1
1058  zw(1, l1, i3) = zf(i01, i03r)
1059  zw(2, l1, i3) = zf(i01, i03i)
1060  END DO
1061  END DO
1062 
1063  END SUBROUTINE inserthalf
1064 
1065 ! **************************************************************************************************
1066 !> \brief (Based on suitable modifications of S.Goedecker routines)
1067 !> Calculates the FFT of the distributed kernel
1068 !> \param n1 logical dimension of the transform.
1069 !> \param n2 logical dimension of the transform.
1070 !> \param n3 logical dimension of the transform.
1071 !> \param nd1 Dimensions of work arrays
1072 !> \param nd2 Dimensions of work arrays
1073 !> \param nd3 Dimensions of work arrays
1074 !> \param nk1 ...
1075 !> \param nk2 ...
1076 !> \param nk3 ...
1077 !> \param nproc number of processors used as returned by MPI_COMM_SIZE
1078 !> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
1079 !> \param zf Real kernel (input)
1080 !> zf(i1,i2,i3)
1081 !> \param zr Distributed Kernel FFT
1082 !> zr(2,i1,i2,i3)
1083 !> \param mpi_group ...
1084 !> \date February 2006
1085 !> \par Restrictions
1086 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
1087 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
1088 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
1089 !> This file is distributed under the terms of the
1090 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
1091 !> \author S. Goedecker, L. Genovese
1092 !> \note As transform lengths
1093 !> most products of the prime factors 2,3,5 are allowed.
1094 !> The detailed table with allowed transform lengths can
1095 !> be found in subroutine CTRIG
1096 ! **************************************************************************************************
1097  SUBROUTINE kernelfft(n1, n2, n3, nd1, nd2, nd3, nk1, nk2, nk3, nproc, iproc, zf, zr, mpi_group)
1098 
1099  INTEGER, INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, nk1, nk2, &
1100  nk3, nproc, iproc
1101  REAL(kind=dp), &
1102  DIMENSION(n1/2+1, n3/2+1, nd2/nproc), &
1103  INTENT(in) :: zf
1104  REAL(kind=dp), DIMENSION(nk1, nk2, nk3/nproc), &
1105  INTENT(inout) :: zr
1106  TYPE(mp_comm_type), INTENT(in) :: mpi_group
1107 
1108  INTEGER, PARAMETER :: ncache_optimal = 8*1024
1109 
1110  INTEGER :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
1111  j2st, j3, jp2st, lot, lzt, ma, mb, &
1112  ncache, nfft
1113  INTEGER, ALLOCATABLE, DIMENSION(:) :: after1, after2, after3, before1, &
1114  before2, before3, now1, now2, now3
1115  REAL(kind=dp) :: twopion
1116  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: cosinarr, trig1, trig2, trig3
1117  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: zt, zw
1118  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: zmpi2
1119  REAL(kind=dp), ALLOCATABLE, &
1120  DIMENSION(:, :, :, :, :) :: zmpi1
1121 
1122 ! include "perfdata.inc"
1123 !work arrays for transpositions
1124 !work arrays for MPI
1125 !cache work array
1126 !FFT work arrays
1127 !Body
1128 !check input
1129 
1130  cpassert(nd1 .GE. n1)
1131  cpassert(nd2 .GE. n2)
1132  cpassert(nd3 .GE. n3/2 + 1)
1133  cpassert(mod(nd3, nproc) .EQ. 0)
1134  cpassert(mod(nd2, nproc) .EQ. 0)
1135  mark_used(nd1)
1136 
1137  !defining work arrays dimensions
1138  ncache = ncache_optimal
1139  IF (ncache <= max(n1, n2, n3/2)*4) ncache = max(n1, n2, n3/2)*4
1140  lzt = n2
1141  IF (mod(n2, 2) .EQ. 0) lzt = lzt + 1
1142  IF (mod(n2, 4) .EQ. 0) lzt = lzt + 1
1143 
1144  !Allocations
1145  ALLOCATE (trig1(2, ctrig_length))
1146  ALLOCATE (after1(7))
1147  ALLOCATE (now1(7))
1148  ALLOCATE (before1(7))
1149  ALLOCATE (trig2(2, ctrig_length))
1150  ALLOCATE (after2(7))
1151  ALLOCATE (now2(7))
1152  ALLOCATE (before2(7))
1153  ALLOCATE (trig3(2, ctrig_length))
1154  ALLOCATE (after3(7))
1155  ALLOCATE (now3(7))
1156  ALLOCATE (before3(7))
1157  ALLOCATE (zw(2, ncache/4, 2))
1158  ALLOCATE (zt(2, lzt, n1))
1159  ALLOCATE (zmpi2(2, n1, nd2/nproc, nd3))
1160  ALLOCATE (cosinarr(2, n3/2))
1161  IF (nproc .GT. 1) THEN
1162  ALLOCATE (zmpi1(2, n1, nd2/nproc, nd3/nproc, nproc))
1163  zmpi1 = 0.0_dp
1164  END IF
1165 
1166  zmpi2 = 0.0_dp
1167  !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
1168  CALL ctrig(n3/2, trig3, after3, before3, now3, 1, ic3)
1169  CALL ctrig(n1, trig1, after1, before1, now1, 1, ic1)
1170  CALL ctrig(n2, trig2, after2, before2, now2, 1, ic2)
1171 
1172  !Calculating array of phases for HalFFT decoding
1173  twopion = 8._dp*atan(1._dp)/real(n3, kind=dp)
1174  DO i3 = 1, n3/2
1175  cosinarr(1, i3) = cos(twopion*(i3 - 1))
1176  cosinarr(2, i3) = -sin(twopion*(i3 - 1))
1177  END DO
1178 
1179  !transform along z axis
1180 
1181  lot = ncache/(2*n3)
1182  cpassert(lot .GE. 1)
1183 
1184  DO j2 = 1, nd2/nproc
1185  !this condition ensures that we manage only the interesting part for the FFT
1186  IF (iproc*(nd2/nproc) + j2 .LE. n2) THEN
1187  DO i1 = 1, n1, lot
1188  ma = i1
1189  mb = min(i1 + (lot - 1), n1)
1190  nfft = mb - ma + 1
1191 
1192  !inserting real data into complex array of half length
1193  !input: I1,I3,J2,(Jp2)
1194 
1195  CALL inserthalf(n1, n3, lot, nfft, i1, zf(1, 1, j2), zw(1, 1, 1))
1196 
1197  !performing FFT
1198  inzee = 1
1199  DO i = 1, ic3
1200  CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1201  trig3, after3(i), now3(i), before3(i), 1)
1202  inzee = 3 - inzee
1203  END DO
1204  !output: I1,i3,J2,(Jp2)
1205 
1206  !unpacking FFT in order to restore correct result,
1207  !while exchanging components
1208  !input: I1,i3,J2,(Jp2)
1209  CALL scramble_unpack(i1, j2, lot, nfft, n1, n3, nd2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
1210  !output: I1,J2,i3,(Jp2)
1211  END DO
1212  END IF
1213  END DO
1214 
1215  !Interprocessor data transposition
1216  !input: I1,J2,j3,jp3,(Jp2)
1217  IF (nproc .GT. 1) THEN
1218  !communication scheduling
1219  CALL mpi_group%alltoall(zmpi2, &!2*n1*(nd2/nproc)*(nd3/nproc), &
1220  zmpi1, 2*n1*(nd2/nproc)*(nd3/nproc))
1221  ! output: I1,J2,j3,Jp2,(jp3)
1222  END IF
1223 
1224  DO j3 = 1, nd3/nproc
1225  !this condition ensures that we manage only the interesting part for the FFT
1226  IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
1227  jp2st = 1
1228  j2st = 1
1229 
1230  !transform along x axis
1231  lot = ncache/(4*n1)
1232  cpassert(lot .GE. 1)
1233 
1234  DO j = 1, n2, lot
1235  ma = j
1236  mb = min(j + (lot - 1), n2)
1237  nfft = mb - ma + 1
1238 
1239  !reverse ordering
1240  !input: I1,J2,j3,Jp2,(jp3)
1241  IF (nproc .EQ. 1) THEN
1242  CALL mpiswitch(j3, nfft, jp2st, j2st, lot, n1, nd2, nd3, nproc, zmpi2, zw(1, 1, 1))
1243  ELSE
1244  CALL mpiswitch(j3, nfft, jp2st, j2st, lot, n1, nd2, nd3, nproc, zmpi1, zw(1, 1, 1))
1245  END IF
1246  !output: J2,Jp2,I1,j3,(jp3)
1247 
1248  !performing FFT
1249  !input: I2,I1,j3,(jp3)
1250  inzee = 1
1251  DO i = 1, ic1 - 1
1252  CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1253  trig1, after1(i), now1(i), before1(i), 1)
1254  inzee = 3 - inzee
1255  END DO
1256  !storing the last step into zt
1257  i = ic1
1258  CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
1259  trig1, after1(i), now1(i), before1(i), 1)
1260  !output: I2,i1,j3,(jp3)
1261  END DO
1262 
1263  !transform along y axis, and taking only the first half
1264  lot = ncache/(4*n2)
1265  cpassert(lot .GE. 1)
1266 
1267  DO j = 1, nk1, lot
1268  ma = j
1269  mb = min(j + (lot - 1), nk1)
1270  nfft = mb - ma + 1
1271 
1272  !reverse ordering
1273  !input: I2,i1,j3,(jp3)
1274  CALL switch(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
1275  !output: i1,I2,j3,(jp3)
1276 
1277  !performing FFT
1278  !input: i1,I2,j3,(jp3)
1279  inzee = 1
1280  DO i = 1, ic2
1281  CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1282  trig2, after2(i), now2(i), before2(i), 1)
1283  inzee = 3 - inzee
1284  END DO
1285 
1286  CALL realcopy(lot, nfft, n2, nk1, nk2, zw(1, 1, inzee), zr(j, 1, j3))
1287 
1288  END DO
1289  !output: i1,i2,j3,(jp3)
1290  END IF
1291  END DO
1292 
1293  !De-allocations
1294  DEALLOCATE (trig1)
1295  DEALLOCATE (after1)
1296  DEALLOCATE (now1)
1297  DEALLOCATE (before1)
1298  DEALLOCATE (trig2)
1299  DEALLOCATE (after2)
1300  DEALLOCATE (now2)
1301  DEALLOCATE (before2)
1302  DEALLOCATE (trig3)
1303  DEALLOCATE (after3)
1304  DEALLOCATE (now3)
1305  DEALLOCATE (before3)
1306  DEALLOCATE (zmpi2)
1307  DEALLOCATE (zw)
1308  DEALLOCATE (zt)
1309  DEALLOCATE (cosinarr)
1310  IF (nproc .GT. 1) DEALLOCATE (zmpi1)
1311 
1312  END SUBROUTINE kernelfft
1313 
1314 ! **************************************************************************************************
1315 !> \brief ...
1316 !> \param lot ...
1317 !> \param nfft ...
1318 !> \param n2 ...
1319 !> \param nk1 ...
1320 !> \param nk2 ...
1321 !> \param zin ...
1322 !> \param zout ...
1323 ! **************************************************************************************************
1324  SUBROUTINE realcopy(lot, nfft, n2, nk1, nk2, zin, zout)
1325  INTEGER, INTENT(in) :: lot, nfft, n2, nk1, nk2
1326  REAL(kind=dp), DIMENSION(2, lot, n2), INTENT(in) :: zin
1327  REAL(kind=dp), DIMENSION(nk1, nk2), INTENT(inout) :: zout
1328 
1329  INTEGER :: i, j
1330 
1331  DO i = 1, nk2
1332  DO j = 1, nfft
1333  zout(j, i) = zin(1, j, i)
1334  END DO
1335  END DO
1336 
1337  END SUBROUTINE realcopy
1338 
1339 ! **************************************************************************************************
1340 !> \brief ...
1341 !> \param nfft ...
1342 !> \param n2 ...
1343 !> \param lot ...
1344 !> \param n1 ...
1345 !> \param lzt ...
1346 !> \param zt ...
1347 !> \param zw ...
1348 ! **************************************************************************************************
1349  SUBROUTINE switch(nfft, n2, lot, n1, lzt, zt, zw)
1350  INTEGER :: nfft, n2, lot, n1, lzt
1351  REAL(kind=dp) :: zt(2, lzt, n1), zw(2, lot, n2)
1352 
1353  INTEGER :: i, j
1354 
1355  DO 200, j = 1, nfft
1356  DO 100, i = 1, n2
1357  zw(1, j, i) = zt(1, i, j)
1358  zw(2, j, i) = zt(2, i, j)
1359 100 CONTINUE
1360 200 CONTINUE
1361  RETURN
1362  END SUBROUTINE switch
1363 
1364 ! **************************************************************************************************
1365 !> \brief ...
1366 !> \param j3 ...
1367 !> \param nfft ...
1368 !> \param Jp2st ...
1369 !> \param J2st ...
1370 !> \param lot ...
1371 !> \param n1 ...
1372 !> \param nd2 ...
1373 !> \param nd3 ...
1374 !> \param nproc ...
1375 !> \param zmpi1 ...
1376 !> \param zw ...
1377 ! **************************************************************************************************
1378  SUBROUTINE mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi1, zw)
1379  INTEGER :: j3, nfft, jp2st, j2st, lot, n1, nd2, &
1380  nd3, nproc
1381  REAL(kind=dp) :: zmpi1(2, n1, nd2/nproc, nd3/nproc, nproc), zw(2, lot, n1)
1382 
1383  INTEGER :: i1, j2, jp2, mfft
1384 
1385  mfft = 0
1386  DO 300, jp2 = jp2st, nproc
1387  DO 200, j2 = j2st, nd2/nproc
1388  mfft = mfft + 1
1389  IF (mfft .GT. nfft) THEN
1390  jp2st = jp2
1391  j2st = j2
1392  RETURN
1393  END IF
1394  DO 100, i1 = 1, n1
1395  zw(1, mfft, i1) = zmpi1(1, i1, j2, j3, jp2)
1396  zw(2, mfft, i1) = zmpi1(2, i1, j2, j3, jp2)
1397 100 CONTINUE
1398 200 CONTINUE
1399  j2st = 1
1400 300 CONTINUE
1401  END SUBROUTINE mpiswitch
1402 
1403 ! **************************************************************************************************
1404 !> \brief ...
1405 !> \param p ...
1406 !> \param w ...
1407 !> \param urange ...
1408 !> \param drange ...
1409 !> \param acc ...
1410 ! **************************************************************************************************
1411  SUBROUTINE gequad(p, w, urange, drange, acc)
1412 !
1413  REAL(kind=dp) :: p(*), w(*), urange, drange, acc
1414 
1415 !
1416 !
1417 ! range [10^(-9),1] and accuracy ~10^(-8);
1418 !
1419 !
1420 
1421  p(1) = 4.96142640560223544e19_dp
1422  p(2) = 1.37454269147978052e19_dp
1423  p(3) = 7.58610013441204679e18_dp
1424  p(4) = 4.42040691347806996e18_dp
1425  p(5) = 2.61986077948367892e18_dp
1426  p(6) = 1.56320138155496681e18_dp
1427  p(7) = 9.35645215863028402e17_dp
1428  p(8) = 5.60962910452691703e17_dp
1429  p(9) = 3.3666225119686761e17_dp
1430  p(10) = 2.0218253197947866e17_dp
1431  p(11) = 1.21477756091902017e17_dp
1432  p(12) = 7.3012982513608503e16_dp
1433  p(13) = 4.38951893556421099e16_dp
1434  p(14) = 2.63949482512262325e16_dp
1435  p(15) = 1.58742054072786174e16_dp
1436  p(16) = 9.54806587737665531e15_dp
1437  p(17) = 5.74353712364571709e15_dp
1438  p(18) = 3.455214877389445e15_dp
1439  p(19) = 2.07871658520326804e15_dp
1440  p(20) = 1.25064667315629928e15_dp
1441  p(21) = 7.52469429541933745e14_dp
1442  p(22) = 4.5274603337253175e14_dp
1443  p(23) = 2.72414006900059548e14_dp
1444  p(24) = 1.63912168349216752e14_dp
1445  p(25) = 9.86275802590865738e13_dp
1446  p(26) = 5.93457701624974985e13_dp
1447  p(27) = 3.5709554322296296e13_dp
1448  p(28) = 2.14872890367310454e13_dp
1449  p(29) = 1.29294719957726902e13_dp
1450  p(30) = 7.78003375426361016e12_dp
1451  p(31) = 4.68148199759876704e12_dp
1452  p(32) = 2.8169955024829868e12_dp
1453  p(33) = 1.69507790481958464e12_dp
1454  p(34) = 1.01998486064607581e12_dp
1455  p(35) = 6.13759486539856459e11_dp
1456  p(36) = 3.69320183828682544e11_dp
1457  p(37) = 2.22232783898905102e11_dp
1458  p(38) = 1.33725247623668682e11_dp
1459  p(39) = 8.0467192739036288e10_dp
1460  p(40) = 4.84199582415144143e10_dp
1461  p(41) = 2.91360091170559564e10_dp
1462  p(42) = 1.75321747475309216e10_dp
1463  p(43) = 1.0549735552210995e10_dp
1464  p(44) = 6.34815321079006586e9_dp
1465  p(45) = 3.81991113733594231e9_dp
1466  p(46) = 2.29857747533101109e9_dp
1467  p(47) = 1.38313653595483694e9_dp
1468  p(48) = 8.32282908580025358e8_dp
1469  p(49) = 5.00814519374587467e8_dp
1470  p(50) = 3.01358090773319025e8_dp
1471  p(51) = 1.81337994217503535e8_dp
1472  p(52) = 1.09117589961086823e8_dp
1473  p(53) = 6.56599771718640323e7_dp
1474  p(54) = 3.95099693638497164e7_dp
1475  p(55) = 2.37745694710665991e7_dp
1476  p(56) = 1.43060135285912813e7_dp
1477  p(57) = 8.60844290313506695e6_dp
1478  p(58) = 5.18000974075383424e6_dp
1479  p(59) = 3.116998193057466e6_dp
1480  p(60) = 1.87560993870024029e6_dp
1481  p(61) = 1.12862197183979562e6_dp
1482  p(62) = 679132.441326077231_dp
1483  p(63) = 408658.421279877969_dp
1484  p(64) = 245904.473450669789_dp
1485  p(65) = 147969.568088321005_dp
1486  p(66) = 89038.612357311147_dp
1487  p(67) = 53577.7362552358895_dp
1488  p(68) = 32239.6513926914668_dp
1489  p(69) = 19399.7580852362791_dp
1490  p(70) = 11673.5323603058634_dp
1491  p(71) = 7024.38438577707758_dp
1492  p(72) = 4226.82479307685999_dp
1493  p(73) = 2543.43254175354295_dp
1494  p(74) = 1530.47486269122675_dp
1495  p(75) = 920.941785160749482_dp
1496  p(76) = 554.163803906291646_dp
1497  p(77) = 333.46029740785694_dp
1498  p(78) = 200.6550575335041_dp
1499  p(79) = 120.741366914147284_dp
1500  p(80) = 72.6544243200329916_dp
1501  p(81) = 43.7187810415471025_dp
1502  p(82) = 26.3071631447061043_dp
1503  p(83) = 15.8299486353816329_dp
1504  p(84) = 9.52493152341244004_dp
1505  p(85) = 5.72200417067776041_dp
1506  p(86) = 3.36242234070940928_dp
1507  p(87) = 1.75371394604499472_dp
1508  p(88) = 0.64705932650658966_dp
1509  p(89) = 0.072765905943708247_dp
1510  !
1511  w(1) = 47.67445484528304247e10_dp
1512  w(2) = 11.37485774750442175e9_dp
1513  w(3) = 78.64340976880190239e8_dp
1514  w(4) = 46.27335788759590498e8_dp
1515  w(5) = 24.7380464827152951e8_dp
1516  w(6) = 13.62904116438987719e8_dp
1517  w(7) = 92.79560029045882433e8_dp
1518  w(8) = 52.15931216254660251e8_dp
1519  w(9) = 31.67018011061666244e8_dp
1520  w(10) = 1.29291036801493046e8_dp
1521  w(11) = 1.00139319988015862e8_dp
1522  w(12) = 7.75892350510188341e7_dp
1523  w(13) = 6.01333567950731271e7_dp
1524  w(14) = 4.66141178654796875e7_dp
1525  w(15) = 3.61398903394911448e7_dp
1526  w(16) = 2.80225846672956389e7_dp
1527  w(17) = 2.1730509180930247e7_dp
1528  w(18) = 1.68524482625876965e7_dp
1529  w(19) = 1.30701489345870338e7_dp
1530  w(20) = 1.01371784832269282e7_dp
1531  w(21) = 7.86264116300379329e6_dp
1532  w(22) = 6.09861667912273717e6_dp
1533  w(23) = 4.73045784039455683e6_dp
1534  w(24) = 3.66928949951594161e6_dp
1535  w(25) = 2.8462050836230259e6_dp
1536  w(26) = 2.20777394798527011e6_dp
1537  w(27) = 1.71256191589205524e6_dp
1538  w(28) = 1.32843556197737076e6_dp
1539  w(29) = 1.0304731275955989e6_dp
1540  w(30) = 799345.206572271448_dp
1541  w(31) = 620059.354143595343_dp
1542  w(32) = 480986.704107449333_dp
1543  w(33) = 373107.167700228515_dp
1544  w(34) = 289424.08337412132_dp
1545  w(35) = 224510.248231581788_dp
1546  w(36) = 174155.825690028966_dp
1547  w(37) = 135095.256919654065_dp
1548  w(38) = 104795.442776800312_dp
1549  w(39) = 81291.4458222430418_dp
1550  w(40) = 63059.0493649328682_dp
1551  w(41) = 48915.9040455329689_dp
1552  w(42) = 37944.8484018048756_dp
1553  w(43) = 29434.4290473253969_dp
1554  w(44) = 22832.7622054490044_dp
1555  w(45) = 17711.743950151233_dp
1556  w(46) = 13739.287867104177_dp
1557  w(47) = 10657.7895710752585_dp
1558  w(48) = 8267.42141053961834_dp
1559  w(49) = 6413.17397520136448_dp
1560  w(50) = 4974.80402838654277_dp
1561  w(51) = 3859.03698188553047_dp
1562  w(52) = 2993.51824493299154_dp
1563  w(53) = 2322.1211966811754_dp
1564  w(54) = 1801.30750964719641_dp
1565  w(55) = 1397.30379659817038_dp
1566  w(56) = 1083.91149143250697_dp
1567  w(57) = 840.807939169209188_dp
1568  w(58) = 652.228524366749422_dp
1569  w(59) = 505.944376983506128_dp
1570  w(60) = 392.469362317941064_dp
1571  w(61) = 304.444930257324312_dp
1572  w(62) = 236.162932842453601_dp
1573  w(63) = 183.195466078603525_dp
1574  w(64) = 142.107732186551471_dp
1575  w(65) = 110.23530215723992_dp
1576  w(66) = 85.5113346705382257_dp
1577  w(67) = 66.3325469806696621_dp
1578  w(68) = 51.4552463353841373_dp
1579  w(69) = 39.9146798429449273_dp
1580  w(70) = 30.9624728409162095_dp
1581  w(71) = 24.018098812215013_dp
1582  w(72) = 18.6312338024296588_dp
1583  w(73) = 14.4525541233150501_dp
1584  w(74) = 11.2110836519105938_dp
1585  w(75) = 8.69662175848497178_dp
1586  w(76) = 6.74611236165731961_dp
1587  w(77) = 5.23307018057529994_dp
1588  w(78) = 4.05937850501539556_dp
1589  w(79) = 3.14892659076635714_dp
1590  w(80) = 2.44267408211071604_dp
1591  w(81) = 1.89482240522855261_dp
1592  w(82) = 1.46984505907050079_dp
1593  w(83) = 1.14019261330527007_dp
1594  w(84) = 0.884791217422925293_dp
1595  w(85) = 0.692686387080616483_dp
1596  w(86) = 0.585244576897023282_dp
1597  w(87) = 0.576182522545327589_dp
1598  w(88) = 0.596688817388997178_dp
1599  w(89) = 0.607879901151108771_dp
1600  !
1601  !
1602  urange = 1._dp
1603  drange = 1e-08_dp
1604  acc = 1e-08_dp
1605  !
1606  RETURN
1607  END SUBROUTINE gequad
1608 
1609  END MODULE ps_wavelet_kernel
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Definition: grid_common.h:117
for(int lxp=0;lxp<=lp;lxp++)
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
Interface to the message passing library MPI.
Creates the wavelet kernel for the wavelet based poisson solver.
subroutine, public scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
(Based on suitable modifications of S.Goedecker routines) Assign the correct planes to the work array...
integer, parameter, public ctrig_length
subroutine, public fftstp(mm, nfft, m, nn, n, zin, zout, trig, after, now, before, isign)
...
subroutine, public ctrig(n, trig, after, before, now, isign, ic)
...
Creates the wavelet kernel for the wavelet based poisson solver.
subroutine, public createkernel(geocode, n01, n02, n03, hx, hy, hz, itype_scf, iproc, nproc, kernel, mpi_group)
Allocate a pointer which corresponds to the zero-padded FFT slice needed for calculating the convolut...
Creates the wavelet kernel for the wavelet based poisson solver.
subroutine, public scf_recursion(itype, n_iter, n_range, kernel_scf, kern_1_scf)
Do iterations to go from p0gauss to pgauss order interpolating scaling function.
subroutine, public scaling_function(itype, nd, nrange, a, x)
Calculate the values of a scaling function in real uniform grid.
Performs a wavelet based solution of the Poisson equation.
subroutine, public s_fft_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc)
Calculate four sets of dimension needed for the calculation of the convolution for the surface system...
subroutine, public f_fft_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc)
Calculate four sets of dimension needed for the calculation of the zero-padded convolution.