(git:ccc2433)
ps_wavelet_base.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Creates the wavelet kernel for the wavelet based poisson solver.
10 !> \author Florian Schiffmann (09.2007,fschiff)
11 ! **************************************************************************************************
13 
14  USE kinds, ONLY: dp
15  USE message_passing, ONLY: mp_comm_type
16  USE ps_wavelet_fft3d, ONLY: ctrig,&
17  ctrig_length,&
18  fftstp
19 #include "../base/base_uses.f90"
20 
21  IMPLICIT NONE
22 
23  PRIVATE
24 
25  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ps_wavelet_base'
26 
28 
29 CONTAINS
30 
31 ! **************************************************************************************************
32 !> \brief ...
33 !> \param n1 ...
34 !> \param n2 ...
35 !> \param n3 ...
36 !> \param nd1 ...
37 !> \param nd2 ...
38 !> \param nd3 ...
39 !> \param md1 ...
40 !> \param md2 ...
41 !> \param md3 ...
42 !> \param nproc ...
43 !> \param iproc ...
44 !> \param zf ...
45 !> \param scal ...
46 !> \param hx ...
47 !> \param hy ...
48 !> \param hz ...
49 !> \param mpi_group ...
50 ! **************************************************************************************************
51  SUBROUTINE p_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, zf &
52  , scal, hx, hy, hz, mpi_group)
53  INTEGER, INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
54  md3, nproc, iproc
55  REAL(kind=dp), DIMENSION(md1, md3, md2/nproc), &
56  INTENT(inout) :: zf
57  REAL(kind=dp), INTENT(in) :: scal, hx, hy, hz
58 
59  CLASS(mp_comm_type), INTENT(in) :: mpi_group
60 
61  INTEGER, PARAMETER :: ncache_optimal = 8*1024
62 
63  INTEGER :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
64  j2stb, j2stf, j3, jp2stb, jp2stf, lot, &
65  lzt, ma, mb, ncache, nfft
66  INTEGER, ALLOCATABLE, DIMENSION(:) :: after1, after2, after3, before1, &
67  before2, before3, now1, now2, now3
68  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: btrig1, btrig2, btrig3, ftrig1, ftrig2, &
69  ftrig3
70  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: zt, zw
71  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: zmpi2
72  REAL(kind=dp), ALLOCATABLE, &
73  DIMENSION(:, :, :, :, :) :: zmpi1
74 
75  IF (nd1 .LT. n1/2 + 1) cpabort("Parallel convolution:ERROR:nd1")
76  IF (nd2 .LT. n2/2 + 1) cpabort("Parallel convolution:ERROR:nd2")
77  IF (nd3 .LT. n3/2 + 1) cpabort("Parallel convolution:ERROR:nd3")
78  IF (md1 .LT. n1) cpabort("Parallel convolution:ERROR:md1")
79  IF (md2 .LT. n2) cpabort("Parallel convolution:ERROR:md2")
80  IF (md3 .LT. n3) cpabort("Parallel convolution:ERROR:md3")
81  IF (mod(nd3, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:nd3")
82  IF (mod(md2, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:md2")
83 
84  !defining work arrays dimensions
85  ncache = ncache_optimal
86  IF (ncache <= max(n1, n2, n3)*4) ncache = max(n1, n2, n3)*4
87 
88  lzt = n2
89  IF (mod(n2, 2) .EQ. 0) lzt = lzt + 1
90  IF (mod(n2, 4) .EQ. 0) lzt = lzt + 1 !maybe this is useless
91 
92  !Allocations
93  ALLOCATE (btrig1(2, ctrig_length))
94  ALLOCATE (ftrig1(2, ctrig_length))
95  ALLOCATE (after1(7))
96  ALLOCATE (now1(7))
97  ALLOCATE (before1(7))
98  ALLOCATE (btrig2(2, ctrig_length))
99  ALLOCATE (ftrig2(2, ctrig_length))
100  ALLOCATE (after2(7))
101  ALLOCATE (now2(7))
102  ALLOCATE (before2(7))
103  ALLOCATE (btrig3(2, ctrig_length))
104  ALLOCATE (ftrig3(2, ctrig_length))
105  ALLOCATE (after3(7))
106  ALLOCATE (now3(7))
107  ALLOCATE (before3(7))
108  ALLOCATE (zw(2, ncache/4, 2))
109  ALLOCATE (zt(2, lzt, n1))
110  ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
111  IF (nproc .GT. 1) ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
112 
113  !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
114  CALL ctrig(n3, btrig3, after3, before3, now3, 1, ic3)
115  CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
116  CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
117  DO j = 1, n1
118  ftrig1(1, j) = btrig1(1, j)
119  ftrig1(2, j) = -btrig1(2, j)
120  END DO
121  DO j = 1, n2
122  ftrig2(1, j) = btrig2(1, j)
123  ftrig2(2, j) = -btrig2(2, j)
124  END DO
125  DO j = 1, n3
126  ftrig3(1, j) = btrig3(1, j)
127  ftrig3(2, j) = -btrig3(2, j)
128  END DO
129 
130  ! transform along z axis
131  lot = ncache/(4*n3)
132  IF (lot .LT. 1) THEN
133  WRITE (*, *) &
134  'convolxc_off:ncache has to be enlarged to be able to hold at'// &
135  'least one 1-d FFT of this size even though this will'// &
136  'reduce the performance for shorter transform lengths'
137  cpabort("")
138  END IF
139  DO j2 = 1, md2/nproc
140  !this condition ensures that we manage only the interesting part for the FFT
141  IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
142  DO i1 = 1, n1, lot
143  ma = i1
144  mb = min(i1 + (lot - 1), n1)
145  nfft = mb - ma + 1
146  !inserting real data into complex array of half length
147  CALL p_fill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
148 
149  !performing FFT
150  !input: I1,I3,J2,(Jp2)
151  inzee = 1
152  DO i = 1, ic3
153  CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
154  btrig3, after3(i), now3(i), before3(i), 1)
155  inzee = 3 - inzee
156  END DO
157 
158  !output: I1,i3,J2,(Jp2)
159  !exchanging components
160  !input: I1,i3,J2,(Jp2)
161  CALL scramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2)
162  !output: I1,J2,i3,(Jp2)
163  END DO
164  END IF
165  END DO
166 
167  !Interprocessor data transposition
168  !input: I1,J2,j3,jp3,(Jp2)
169  IF (nproc .GT. 1) THEN
170  !communication scheduling
171  CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
172  END IF
173  !output: I1,J2,j3,Jp2,(jp3)
174 
175  !now each process perform complete convolution of its planes
176  DO j3 = 1, nd3/nproc
177  !this condition ensures that we manage only the interesting part for the FFT
178  IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
179  jp2stb = 1
180  j2stb = 1
181  jp2stf = 1
182  j2stf = 1
183 
184  ! transform along x axis
185  lot = ncache/(4*n1)
186  IF (lot .LT. 1) THEN
187  WRITE (*, *) &
188  'convolxc_off:ncache has to be enlarged to be able to hold at'// &
189  'least one 1-d FFT of this size even though this will'// &
190  'reduce the performance for shorter transform lengths'
191  cpabort("")
192  END IF
193 
194  DO j = 1, n2, lot
195  ma = j
196  mb = min(j + (lot - 1), n2)
197  nfft = mb - ma + 1
198 
199  !reverse index ordering, leaving the planes to be transformed at the end
200  !input: I1,J2,j3,Jp2,(jp3)
201  IF (nproc .EQ. 1) THEN
202  CALL p_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
203  ELSE
204  CALL p_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
205  END IF
206  !output: J2,Jp2,I1,j3,(jp3)
207 
208  !performing FFT
209  !input: I2,I1,j3,(jp3)
210  inzee = 1
211  DO i = 1, ic1 - 1
212  CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
213  btrig1, after1(i), now1(i), before1(i), 1)
214  inzee = 3 - inzee
215  END DO
216 
217  !storing the last step into zt array
218  i = ic1
219  CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
220  btrig1, after1(i), now1(i), before1(i), 1)
221  !output: I2,i1,j3,(jp3)
222  END DO
223 
224  !transform along y axis
225  lot = ncache/(4*n2)
226  IF (lot .LT. 1) THEN
227  WRITE (*, *) &
228  'convolxc_off:ncache has to be enlarged to be able to hold at'// &
229  'least one 1-d FFT of this size even though this will'// &
230  'reduce the performance for shorter transform lengths'
231  cpabort("")
232  END IF
233 
234  DO j = 1, n1, lot
235  ma = j
236  mb = min(j + (lot - 1), n1)
237  nfft = mb - ma + 1
238 
239  !reverse ordering
240  !input: I2,i1,j3,(jp3)
241  CALL p_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
242  !output: i1,I2,j3,(jp3)
243 
244  !performing FFT
245  !input: i1,I2,j3,(jp3)
246  inzee = 1
247  DO i = 1, ic2
248  CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
249  btrig2, after2(i), now2(i), before2(i), 1)
250  inzee = 3 - inzee
251  END DO
252  !output: i1,i2,j3,(jp3)
253 
254  !Multiply with kernel in fourier space
255  i3 = iproc*(nd3/nproc) + j3
256  CALL p_multkernel(n1, n2, n3, lot, nfft, j, i3, zw(1, 1, inzee), hx, hy, hz)
257 
258  !TRANSFORM BACK IN REAL SPACE
259 
260  !transform along y axis
261  !input: i1,i2,j3,(jp3)
262  DO i = 1, ic2
263  CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
264  ftrig2, after2(i), now2(i), before2(i), -1)
265  inzee = 3 - inzee
266  END DO
267 
268  !reverse ordering
269  !input: i1,I2,j3,(jp3)
270  CALL p_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
271  !output: I2,i1,j3,(jp3)
272  END DO
273 
274  !transform along x axis
275  !input: I2,i1,j3,(jp3)
276  lot = ncache/(4*n1)
277  DO j = 1, n2, lot
278  ma = j
279  mb = min(j + (lot - 1), n2)
280  nfft = mb - ma + 1
281 
282  !performing FFT
283  i = 1
284  CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
285  ftrig1, after1(i), now1(i), before1(i), -1)
286 
287  inzee = 1
288  DO i = 2, ic1
289  CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
290  ftrig1, after1(i), now1(i), before1(i), -1)
291  inzee = 3 - inzee
292  END DO
293  !output: I2,I1,j3,(jp3)
294 
295  !reverse ordering
296  !input: J2,Jp2,I1,j3,(jp3)
297  IF (nproc .EQ. 1) THEN
298  CALL p_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
299  ELSE
300  CALL p_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
301  END IF
302  ! output: I1,J2,j3,Jp2,(jp3)
303  END DO
304  END IF
305  END DO
306 
307  !Interprocessor data transposition
308  !input: I1,J2,j3,Jp2,(jp3)
309  IF (nproc .GT. 1) THEN
310  !communication scheduling
311  CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
312  END IF
313  !output: I1,J2,j3,jp3,(Jp2)
314  !transform along z axis
315  !input: I1,J2,i3,(Jp2)
316  lot = ncache/(4*n3)
317  DO j2 = 1, md2/nproc
318  !this condition ensures that we manage only the interesting part for the FFT
319  IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
320  DO i1 = 1, n1, lot
321  ma = i1
322  mb = min(i1 + (lot - 1), n1)
323  nfft = mb - ma + 1
324 
325  !reverse ordering
326  !input: I1,J2,i3,(Jp2)
327  CALL unscramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1))
328  !output: I1,i3,J2,(Jp2)
329 
330  !performing FFT
331  !input: I1,i3,J2,(Jp2)
332  inzee = 1
333  DO i = 1, ic3
334  CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
335  ftrig3, after3(i), now3(i), before3(i), -1)
336  inzee = 3 - inzee
337  END DO
338  !output: I1,I3,J2,(Jp2)
339 
340  !rebuild the output array
341  CALL p_unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2), scal)
342 
343  END DO
344  END IF
345  END DO
346 
347  !De-allocations
348  DEALLOCATE (btrig1)
349  DEALLOCATE (ftrig1)
350  DEALLOCATE (after1)
351  DEALLOCATE (now1)
352  DEALLOCATE (before1)
353  DEALLOCATE (btrig2)
354  DEALLOCATE (ftrig2)
355  DEALLOCATE (after2)
356  DEALLOCATE (now2)
357  DEALLOCATE (before2)
358  DEALLOCATE (btrig3)
359  DEALLOCATE (ftrig3)
360  DEALLOCATE (after3)
361  DEALLOCATE (now3)
362  DEALLOCATE (before3)
363  DEALLOCATE (zmpi2)
364  DEALLOCATE (zw)
365  DEALLOCATE (zt)
366  IF (nproc .GT. 1) DEALLOCATE (zmpi1)
367  ! call timing(iproc,'PSolv_comput ','OF')
368  END SUBROUTINE p_poissonsolver
369 
370 ! **************************************************************************************************
371 !> \brief ...
372 !> \param j3 ...
373 !> \param nfft ...
374 !> \param Jp2stb ...
375 !> \param J2stb ...
376 !> \param lot ...
377 !> \param n1 ...
378 !> \param md2 ...
379 !> \param nd3 ...
380 !> \param nproc ...
381 !> \param zmpi1 ...
382 !> \param zw ...
383 ! **************************************************************************************************
384  SUBROUTINE p_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
385  INTEGER, INTENT(in) :: j3, nfft
386  INTEGER, INTENT(inout) :: jp2stb, j2stb
387  INTEGER, INTENT(in) :: lot, n1, md2, nd3, nproc
388  REAL(kind=dp), &
389  DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
390  INTENT(in) :: zmpi1
391  REAL(kind=dp), DIMENSION(2, lot, n1), &
392  INTENT(inout) :: zw
393 
394  INTEGER :: i1, j2, jp2, mfft
395 
396  mfft = 0
397  DO jp2 = jp2stb, nproc
398  DO j2 = j2stb, md2/nproc
399  mfft = mfft + 1
400  IF (mfft .GT. nfft) THEN
401  jp2stb = jp2
402  j2stb = j2
403  RETURN
404  END IF
405  DO i1 = 1, n1
406  zw(1, mfft, i1) = zmpi1(1, i1, j2, j3, jp2)
407  zw(2, mfft, i1) = zmpi1(2, i1, j2, j3, jp2)
408  END DO
409  END DO
410  j2stb = 1
411  END DO
412  END SUBROUTINE p_mpiswitch_upcorn
413 
414 ! **************************************************************************************************
415 !> \brief ...
416 !> \param nfft ...
417 !> \param n2 ...
418 !> \param lot ...
419 !> \param n1 ...
420 !> \param lzt ...
421 !> \param zt ...
422 !> \param zw ...
423 ! **************************************************************************************************
424  SUBROUTINE p_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
425  INTEGER, INTENT(in) :: nfft, n2, lot, n1, lzt
426  REAL(kind=dp), DIMENSION(2, lzt, n1), INTENT(in) :: zt
427  REAL(kind=dp), DIMENSION(2, lot, n2), &
428  INTENT(inout) :: zw
429 
430  INTEGER :: i, j
431 
432  DO j = 1, nfft
433  DO i = 1, n2
434  zw(1, j, i) = zt(1, i, j)
435  zw(2, j, i) = zt(2, i, j)
436  END DO
437  END DO
438 
439  END SUBROUTINE p_switch_upcorn
440 
441 ! **************************************************************************************************
442 !> \brief ...
443 !> \param nfft ...
444 !> \param n2 ...
445 !> \param lot ...
446 !> \param n1 ...
447 !> \param lzt ...
448 !> \param zw ...
449 !> \param zt ...
450 ! **************************************************************************************************
451  SUBROUTINE p_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
452  INTEGER, INTENT(in) :: nfft, n2, lot, n1, lzt
453  REAL(kind=dp), DIMENSION(2, lot, n2), INTENT(in) :: zw
454  REAL(kind=dp), DIMENSION(2, lzt, n1), &
455  INTENT(inout) :: zt
456 
457  INTEGER :: i, j
458 
459  DO j = 1, nfft
460  DO i = 1, n2
461  zt(1, i, j) = zw(1, j, i)
462  zt(2, i, j) = zw(2, j, i)
463  END DO
464  END DO
465 
466  END SUBROUTINE p_unswitch_downcorn
467 
468 ! **************************************************************************************************
469 !> \brief ...
470 !> \param j3 ...
471 !> \param nfft ...
472 !> \param Jp2stf ...
473 !> \param J2stf ...
474 !> \param lot ...
475 !> \param n1 ...
476 !> \param md2 ...
477 !> \param nd3 ...
478 !> \param nproc ...
479 !> \param zw ...
480 !> \param zmpi1 ...
481 ! **************************************************************************************************
482  SUBROUTINE p_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
483  INTEGER, INTENT(in) :: j3, nfft
484  INTEGER, INTENT(inout) :: jp2stf, j2stf
485  INTEGER, INTENT(in) :: lot, n1, md2, nd3, nproc
486  REAL(kind=dp), DIMENSION(2, lot, n1), INTENT(in) :: zw
487  REAL(kind=dp), &
488  DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
489  INTENT(inout) :: zmpi1
490 
491  INTEGER :: i1, j2, jp2, mfft
492 
493  mfft = 0
494  DO jp2 = jp2stf, nproc
495  DO j2 = j2stf, md2/nproc
496  mfft = mfft + 1
497  IF (mfft .GT. nfft) THEN
498  jp2stf = jp2
499  j2stf = j2
500  RETURN
501  END IF
502  DO i1 = 1, n1
503  zmpi1(1, i1, j2, j3, jp2) = zw(1, mfft, i1)
504  zmpi1(2, i1, j2, j3, jp2) = zw(2, mfft, i1)
505  END DO
506  END DO
507  j2stf = 1
508  END DO
509  END SUBROUTINE p_unmpiswitch_downcorn
510 
511 ! **************************************************************************************************
512 !> \brief (Based on suitable modifications of S.Goedecker routines)
513 !> Restore data into output array
514 !> \param md1 Dimensions of the undistributed part of the real grid
515 !> \param md3 Dimensions of the undistributed part of the real grid
516 !> \param lot ...
517 !> \param nfft number of planes
518 !> \param n3 (twice the) dimension of the last FFTtransform
519 !> \param zw FFT work array
520 !> \param zf Original distributed density as well as
521 !> Distributed solution of the poisson equation (inout)
522 !> \param scal Needed to achieve unitarity and correct dimensions
523 !> \date February 2006
524 !> \author S. Goedecker, L. Genovese
525 !> \note Assuming that high frequencies are in the corners
526 !> and that n3 is multiple of 4
527 !>
528 !> RESTRICTIONS on USAGE
529 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
530 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
531 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
532 !> This file is distributed under the terms of the
533 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
534 ! **************************************************************************************************
535  SUBROUTINE p_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
536  INTEGER, INTENT(in) :: md1, md3, lot, nfft, n3
537  REAL(kind=dp), DIMENSION(2, lot, n3), INTENT(in) :: zw
538  REAL(kind=dp), DIMENSION(md1, md3), INTENT(inout) :: zf
539  REAL(kind=dp), INTENT(in) :: scal
540 
541  INTEGER :: i1, i3
542  REAL(kind=dp) :: pot1
543 
544  DO i3 = 1, n3
545  DO i1 = 1, nfft
546  pot1 = scal*zw(1, i1, i3)
547  zf(i1, i3) = pot1
548  END DO
549  END DO
550 
551  END SUBROUTINE p_unfill_downcorn
552 
553 ! **************************************************************************************************
554 !> \brief ...
555 !> \param md1 ...
556 !> \param md3 ...
557 !> \param lot ...
558 !> \param nfft ...
559 !> \param n3 ...
560 !> \param zf ...
561 !> \param zw ...
562 ! **************************************************************************************************
563  SUBROUTINE p_fill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
564  INTEGER, INTENT(in) :: md1, md3, lot, nfft, n3
565  REAL(kind=dp), DIMENSION(md1, md3), INTENT(in) :: zf
566  REAL(kind=dp), DIMENSION(2, lot, n3), &
567  INTENT(inout) :: zw
568 
569  INTEGER :: i1, i3
570 
571  DO i3 = 1, n3
572  DO i1 = 1, nfft
573  zw(1, i1, i3) = zf(i1, i3)
574  zw(2, i1, i3) = 0._dp
575  END DO
576  END DO
577 
578  END SUBROUTINE p_fill_upcorn
579 
580 ! **************************************************************************************************
581 !> \brief (Based on suitable modifications of S.Goedecker routines)
582 !> Assign the correct planes to the work array zmpi2
583 !> in order to prepare for interprocessor data transposition.
584 !> \param i1 Starting points of the plane and number of remaining lines
585 !> \param j2 Starting points of the plane and number of remaining lines
586 !> \param lot Starting points of the plane and number of remaining lines
587 !> \param nfft Starting points of the plane and number of remaining lines
588 !> \param n1 logical dimension of the FFT transform, reference for work arrays
589 !> \param n3 logical dimension of the FFT transform, reference for work arrays
590 !> \param md2 Dimensions of real grid
591 !> \param nproc ...
592 !> \param nd3 Dimensions of the kernel
593 !> \param zw Work array (input)
594 !> \param zmpi2 Work array for multiprocessor manipulation (output)
595 !> \date February 2006
596 !> \author S. Goedecker, L. Genovese
597 !> \note
598 !> RESTRICTIONS on USAGE
599 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
600 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
601 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
602 !> This file is distributed under the terms of the
603 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
604 ! **************************************************************************************************
605  SUBROUTINE scramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2)
606  INTEGER, INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
607  nd3
608  REAL(kind=dp), DIMENSION(2, lot, n3), INTENT(in) :: zw
609  REAL(kind=dp), DIMENSION(2, n1, md2/nproc, nd3), &
610  INTENT(inout) :: zmpi2
611 
612  INTEGER :: i, i3
613 
614  DO i3 = 1, n3/2 + 1
615  DO i = 0, nfft - 1
616  zmpi2(1, i1 + i, j2, i3) = zw(1, i + 1, i3)
617  zmpi2(2, i1 + i, j2, i3) = zw(2, i + 1, i3)
618  END DO
619  END DO
620 
621  END SUBROUTINE scramble_p
622 
623 ! **************************************************************************************************
624 !> \brief (Based on suitable modifications of S.Goedecker routines)
625 !> Insert the correct planes of the work array zmpi2
626 !> in order to prepare for backward FFT transform
627 !> \param i1 Starting points of the plane and number of remaining lines
628 !> \param j2 Starting points of the plane and number of remaining lines
629 !> \param lot Starting points of the plane and number of remaining lines
630 !> \param nfft Starting points of the plane and number of remaining lines
631 !> \param n1 logical dimension of the FFT transform, reference for work arrays
632 !> \param n3 logical dimension of the FFT transform, reference for work arrays
633 !> \param md2 Dimensions of real grid
634 !> \param nproc ...
635 !> \param nd3 Dimensions of the kernel
636 !> \param zmpi2 Work array for multiprocessor manipulation (output)
637 !> \param zw Work array (input)
638 !> \date February 2006
639 !> \author S. Goedecker, L. Genovese
640 !> \note
641 !> RESTRICTIONS on USAGE
642 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
643 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
644 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
645 !> This file is distributed under the terms of the
646 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
647 ! **************************************************************************************************
648  SUBROUTINE unscramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw)
649  INTEGER, INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
650  nd3
651  REAL(kind=dp), DIMENSION(2, n1, md2/nproc, nd3), &
652  INTENT(in) :: zmpi2
653  REAL(kind=dp), DIMENSION(2, lot, n3), &
654  INTENT(inout) :: zw
655 
656  INTEGER :: i, i3, j3
657 
658  i3 = 1
659  DO i = 0, nfft - 1
660  zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
661  zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
662  END DO
663 
664  DO i3 = 2, n3/2 + 1
665  j3 = n3 + 2 - i3
666  DO i = 0, nfft - 1
667  zw(1, i + 1, j3) = zmpi2(1, i1 + i, j2, i3)
668  zw(2, i + 1, j3) = -zmpi2(2, i1 + i, j2, i3)
669  zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
670  zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
671  END DO
672  END DO
673 
674  END SUBROUTINE unscramble_p
675 
676 ! **************************************************************************************************
677 !> \brief (Based on suitable modifications of S.Goedecker routines)
678 !> Multiply with the kernel taking into account its symmetry
679 !> Conceived to be used into convolution loops
680 !> \param n1 ...
681 !> \param n2 ...
682 !> \param n3 ...
683 !> \param lot ...
684 !> \param nfft ...
685 !> \param jS ...
686 !> \param i3 ...
687 !> \param zw Work array (input/output)
688 !> n1,n2: logical dimension of the FFT transform, reference for zw
689 !> nd1,nd2: Dimensions of POT
690 !> jS,j3,nfft: starting point of the plane and number of remaining lines
691 !>
692 !> RESTRICTIONS on USAGE
693 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
694 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
695 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
696 !> This file is distributed under the terms of the
697 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
698 !> \param hx ...
699 !> \param hy ...
700 !> \param hz ...
701 !> \date February 2006
702 !> \author S. Goedecker, L. Genovese
703 ! **************************************************************************************************
704  SUBROUTINE p_multkernel(n1, n2, n3, lot, nfft, jS, i3, zw, hx, hy, hz)
705  INTEGER, INTENT(in) :: n1, n2, n3, lot, nfft, js, i3
706  REAL(kind=dp), DIMENSION(2, lot, n2), &
707  INTENT(inout) :: zw
708  REAL(kind=dp), INTENT(in) :: hx, hy, hz
709 
710  INTEGER :: i1, i2, j1, j2, j3
711  REAL(kind=dp) :: fourpi2, ker, mu3, p1, p2, pi
712 
713  pi = 4._dp*atan(1._dp)
714  fourpi2 = 4._dp*pi**2
715  j3 = i3 !n3/2+1-abs(n3/2+2-i3)
716  mu3 = real(j3 - 1, kind=dp)/real(n3, kind=dp)
717  mu3 = (mu3/hy)**2 !beware of the exchanged dimension
718  !Body
719  !generic case
720  DO i2 = 1, n2
721  DO i1 = 1, nfft
722  j1 = i1 + js - 1
723  j1 = j1 - (j1/(n1/2 + 2))*n1 !n1/2+1-abs(n1/2+2-jS-i1)
724  j2 = i2 - (i2/(n2/2 + 2))*n2 !n2/2+1-abs(n2/2+1-i2)
725  p1 = real(j1 - 1, kind=dp)/real(n1, kind=dp)
726  p2 = real(j2 - 1, kind=dp)/real(n2, kind=dp)
727  ker = -fourpi2*((p1/hx)**2 + (p2/hz)**2 + mu3) !beware of the exchanged dimension
728  IF (ker /= 0._dp) ker = 1._dp/ker
729  zw(1, i1, i2) = zw(1, i1, i2)*ker
730  zw(2, i1, i2) = zw(2, i1, i2)*ker
731  END DO
732  END DO
733 
734  END SUBROUTINE p_multkernel
735 
736 ! **************************************************************************************************
737 !> \brief (Based on suitable modifications of S.Goedecker routines)
738 !> Multiply with the kernel taking into account its symmetry
739 !> Conceived to be used into convolution loops
740 !> \param nd1 ...
741 !> \param nd2 ...
742 !> \param n1 ...
743 !> \param n2 ...
744 !> \param lot ...
745 !> \param nfft ...
746 !> \param jS ...
747 !> \param pot Kernel, symmetric and real, half the length
748 !> \param zw Work array (input/output)
749 !> n1,n2: logical dimension of the FFT transform, reference for zw
750 !> nd1,nd2: Dimensions of POT
751 !> jS, nfft: starting point of the plane and number of remaining lines
752 !>
753 !> RESTRICTIONS on USAGE
754 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
755 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
756 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
757 !> This file is distributed under the terms of the
758 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
759 !> \date February 2006
760 !> \author S. Goedecker, L. Genovese
761 ! **************************************************************************************************
762  SUBROUTINE multkernel(nd1, nd2, n1, n2, lot, nfft, jS, pot, zw)
763  INTEGER, INTENT(in) :: nd1, nd2, n1, n2, lot, nfft, js
764  REAL(kind=dp), DIMENSION(nd1, nd2), INTENT(in) :: pot
765  REAL(kind=dp), DIMENSION(2, lot, n2), &
766  INTENT(inout) :: zw
767 
768  INTEGER :: i2, j, j1, j2
769 
770  DO j = 1, nfft
771  j1 = j + js - 1
772  j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
773  zw(1, j, 1) = zw(1, j, 1)*pot(j1, 1)
774  zw(2, j, 1) = zw(2, j, 1)*pot(j1, 1)
775  END DO
776 
777  !generic case
778  DO i2 = 2, n2/2
779  DO j = 1, nfft
780  j1 = j + js - 1
781  j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
782  j2 = n2 + 2 - i2
783  zw(1, j, i2) = zw(1, j, i2)*pot(j1, i2)
784  zw(2, j, i2) = zw(2, j, i2)*pot(j1, i2)
785  zw(1, j, j2) = zw(1, j, j2)*pot(j1, i2)
786  zw(2, j, j2) = zw(2, j, j2)*pot(j1, i2)
787  END DO
788  END DO
789 
790  !case i2=n2/2+1
791  DO j = 1, nfft
792  j1 = j + js - 1
793  j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
794  j2 = n2/2 + 1
795  zw(1, j, j2) = zw(1, j, j2)*pot(j1, j2)
796  zw(2, j, j2) = zw(2, j, j2)*pot(j1, j2)
797  END DO
798 
799  END SUBROUTINE multkernel
800 
801 ! **************************************************************************************************
802 !> \brief !HERE POT MUST BE THE KERNEL (BEWARE THE HALF DIMENSION)
803 !> ****h* BigDFT/S_PoissonSolver
804 !> (Based on suitable modifications of S.Goedecker routines)
805 !> Applies the local FFT space Kernel to the density in Real space.
806 !> Does NOT calculate the LDA exchange-correlation terms
807 !> \param n1 logical dimension of the transform.
808 !> \param n2 logical dimension of the transform.
809 !> \param n3 logical dimension of the transform.
810 !> \param nd1 Dimension of POT
811 !> \param nd2 Dimension of POT
812 !> \param nd3 Dimension of POT
813 !> \param md1 Dimension of ZF
814 !> \param md2 Dimension of ZF
815 !> \param md3 Dimension of ZF
816 !> \param nproc number of processors used as returned by MPI_COMM_SIZE
817 !> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
818 !> \param pot Kernel, only the distributed part (REAL)
819 !> POT(i1,i2,i3)
820 !> i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
821 !> \param zf Density (input/output)
822 !> ZF(i1,i3,i2)
823 !> i1=1,md1 , i2=1,md2/nproc , i3=1,md3
824 !> \param scal factor of renormalization of the FFT in order to acheve unitarity
825 !> and the correct dimension
826 !> \param mpi_group ...
827 !> \date October 2006
828 !> \author S. Goedecker, L. Genovese
829 !> \note As transform lengths most products of the prime factors 2,3,5 are allowed.
830 !> The detailed table with allowed transform lengths can
831 !> be found in subroutine CTRIG
832 !> \note
833 !> RESTRICTIONS on USAGE
834 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
835 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
836 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
837 !> This file is distributed under the terms of the
838 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
839 ! **************************************************************************************************
840  SUBROUTINE s_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
841  scal, mpi_group)
842  INTEGER, INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
843  md3, nproc, iproc
844  REAL(kind=dp), DIMENSION(nd1, nd2, nd3/nproc), &
845  INTENT(in) :: pot
846  REAL(kind=dp), DIMENSION(md1, md3, md2/nproc), &
847  INTENT(inout) :: zf
848  REAL(kind=dp), INTENT(in) :: scal
849 
850  CLASS(mp_comm_type), INTENT(in) :: mpi_group
851 
852  CHARACTER(len=*), PARAMETER :: routinen = 'S_PoissonSolver'
853  INTEGER, PARAMETER :: ncache_optimal = 8*1024
854 
855  INTEGER :: handle, i, i1, i3, ic1, ic2, ic3, inzee, &
856  j, j2, j2stb, j2stf, j3, jp2stb, &
857  jp2stf, lot, lzt, ma, mb, ncache, nfft
858  INTEGER, ALLOCATABLE, DIMENSION(:) :: after1, after2, after3, before1, &
859  before2, before3, now1, now2, now3
860  REAL(kind=dp) :: twopion
861  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: btrig1, btrig2, btrig3, cosinarr, &
862  ftrig1, ftrig2, ftrig3
863  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: zt, zw
864  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: zmpi2
865  REAL(kind=dp), ALLOCATABLE, &
866  DIMENSION(:, :, :, :, :) :: zmpi1
867 
868  CALL timeset(routinen, handle)
869  ! check input
870  IF (mod(n3, 2) .NE. 0) cpabort("Parallel convolution:ERROR:n3")
871  IF (nd1 .LT. n1/2 + 1) cpabort("Parallel convolution:ERROR:nd1")
872  IF (nd2 .LT. n2/2 + 1) cpabort("Parallel convolution:ERROR:nd2")
873  IF (nd3 .LT. n3/2 + 1) cpabort("Parallel convolution:ERROR:nd3")
874  IF (md1 .LT. n1) cpabort("Parallel convolution:ERROR:md1")
875  IF (md2 .LT. n2) cpabort("Parallel convolution:ERROR:md2")
876  IF (md3 .LT. n3/2) cpabort("Parallel convolution:ERROR:md3")
877  IF (mod(nd3, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:nd3")
878  IF (mod(md2, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:md2")
879 
880  !defining work arrays dimensions
881  ncache = ncache_optimal
882  IF (ncache <= max(n1, n2, n3/2)*4) ncache = max(n1, n2, n3/2)*4
883 
884  ! if (timing_flag == 1 .and. iproc ==0) print *,'parallel ncache=',ncache
885 
886  lzt = n2
887  IF (mod(n2, 2) .EQ. 0) lzt = lzt + 1
888  IF (mod(n2, 4) .EQ. 0) lzt = lzt + 1 !maybe this is useless
889 
890  !Allocations
891  ALLOCATE (btrig1(2, ctrig_length))
892  ALLOCATE (ftrig1(2, ctrig_length))
893  ALLOCATE (after1(7))
894  ALLOCATE (now1(7))
895  ALLOCATE (before1(7))
896  ALLOCATE (btrig2(2, ctrig_length))
897  ALLOCATE (ftrig2(2, ctrig_length))
898  ALLOCATE (after2(7))
899  ALLOCATE (now2(7))
900  ALLOCATE (before2(7))
901  ALLOCATE (btrig3(2, ctrig_length))
902  ALLOCATE (ftrig3(2, ctrig_length))
903  ALLOCATE (after3(7))
904  ALLOCATE (now3(7))
905  ALLOCATE (before3(7))
906  ALLOCATE (zw(2, ncache/4, 2))
907  ALLOCATE (zt(2, lzt, n1))
908  ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
909  ALLOCATE (cosinarr(2, n3/2))
910  IF (nproc .GT. 1) THEN
911  ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
912  zmpi1 = 0.0_dp
913  END IF
914  zmpi2 = 0.0_dp
915  zw = 0.0_dp
916  zt = 0.0_dp
917 
918  !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
919  CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
920  CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
921  CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
922  DO j = 1, n1
923  ftrig1(1, j) = btrig1(1, j)
924  ftrig1(2, j) = -btrig1(2, j)
925  END DO
926  DO j = 1, n2
927  ftrig2(1, j) = btrig2(1, j)
928  ftrig2(2, j) = -btrig2(2, j)
929  END DO
930  DO j = 1, n3/2
931  ftrig3(1, j) = btrig3(1, j)
932  ftrig3(2, j) = -btrig3(2, j)
933  END DO
934 
935  !Calculating array of phases for HalFFT decoding
936  twopion = 8._dp*atan(1._dp)/real(n3, kind=dp)
937  DO i3 = 1, n3/2
938  cosinarr(1, i3) = cos(twopion*(i3 - 1))
939  cosinarr(2, i3) = -sin(twopion*(i3 - 1))
940  END DO
941 
942  !initializing integral
943  !ehartree=0._dp
944 
945  ! transform along z axis
946  lot = ncache/(2*n3)
947  IF (lot .LT. 1) THEN
948  WRITE (*, *) &
949  'convolxc_off:ncache has to be enlarged to be able to hold at'// &
950  'least one 1-d FFT of this size even though this will'// &
951  'reduce the performance for shorter transform lengths', n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc
952  cpabort("")
953  END IF
954 
955  DO j2 = 1, md2/nproc
956  !this condition ensures that we manage only the interesting part for the FFT
957  IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
958  DO i1 = 1, n1, lot
959  ma = i1
960  mb = min(i1 + (lot - 1), n1)
961  nfft = mb - ma + 1
962 
963  !inserting real data into complex array of half length
964  CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
965 
966  !performing FFT
967  !input: I1,I3,J2,(Jp2)
968  inzee = 1
969  DO i = 1, ic3
970  CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
971  btrig3, after3(i), now3(i), before3(i), 1)
972  inzee = 3 - inzee
973  END DO
974  !output: I1,i3,J2,(Jp2)
975  !unpacking FFT in order to restore correct result,
976  !while exchanging components
977  !input: I1,i3,J2,(Jp2)
978  CALL scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
979  !output: I1,J2,i3,(Jp2)
980  END DO
981  END IF
982  END DO
983  !Interprocessor data transposition
984  !input: I1,J2,j3,jp3,(Jp2)
985  IF (nproc .GT. 1) THEN
986  CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
987  END IF
988  !output: I1,J2,j3,Jp2,(jp3)
989 
990  !now each process perform complete convolution of its planes
991  DO j3 = 1, nd3/nproc
992  !this condition ensures that we manage only the interesting part for the FFT
993  IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
994  jp2stb = 1
995  j2stb = 1
996  jp2stf = 1
997  j2stf = 1
998 
999  ! transform along x axis
1000  lot = ncache/(4*n1)
1001  IF (lot .LT. 1) THEN
1002  WRITE (*, *) &
1003  'convolxc_off:ncache has to be enlarged to be able to hold at'// &
1004  'least one 1-d FFT of this size even though this will'// &
1005  'reduce the performance for shorter transform lengths'
1006  cpabort("")
1007  END IF
1008 
1009  DO j = 1, n2, lot
1010  ma = j
1011  mb = min(j + (lot - 1), n2)
1012  nfft = mb - ma + 1
1013 
1014  !reverse index ordering, leaving the planes to be transformed at the end
1015  !input: I1,J2,j3,Jp2,(jp3)
1016  IF (nproc .EQ. 1) THEN
1017  CALL s_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
1018  ELSE
1019  CALL s_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
1020  END IF
1021  !output: J2,Jp2,I1,j3,(jp3)
1022 
1023  !performing FFT
1024  !input: I2,I1,j3,(jp3)
1025  inzee = 1
1026  DO i = 1, ic1 - 1
1027  CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1028  btrig1, after1(i), now1(i), before1(i), 1)
1029  inzee = 3 - inzee
1030  END DO
1031 
1032  !storing the last step into zt array
1033  i = ic1
1034  CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
1035  btrig1, after1(i), now1(i), before1(i), 1)
1036  !output: I2,i1,j3,(jp3)
1037  END DO
1038 
1039  !transform along y axis
1040  lot = ncache/(4*n2)
1041  IF (lot .LT. 1) THEN
1042  WRITE (*, *) &
1043  'convolxc_off:ncache has to be enlarged to be able to hold at'// &
1044  'least one 1-d FFT of this size even though this will'// &
1045  'reduce the performance for shorter transform lengths'
1046  cpabort("")
1047  END IF
1048 
1049  DO j = 1, n1, lot
1050  ma = j
1051  mb = min(j + (lot - 1), n1)
1052  nfft = mb - ma + 1
1053 
1054  !reverse ordering
1055  !input: I2,i1,j3,(jp3)
1056  CALL s_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
1057  !output: i1,I2,j3,(jp3)
1058 
1059  !performing FFT
1060  !input: i1,I2,j3,(jp3)
1061  inzee = 1
1062  DO i = 1, ic2
1063  CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1064  btrig2, after2(i), now2(i), before2(i), 1)
1065  inzee = 3 - inzee
1066  END DO
1067  !output: i1,i2,j3,(jp3)
1068 
1069  !Multiply with kernel in fourier space
1070  CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
1071 
1072  !TRANSFORM BACK IN REAL SPACE
1073 
1074  !transform along y axis
1075  !input: i1,i2,j3,(jp3)
1076  DO i = 1, ic2
1077  CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1078  ftrig2, after2(i), now2(i), before2(i), -1)
1079  inzee = 3 - inzee
1080  END DO
1081 
1082  !reverse ordering
1083  !input: i1,I2,j3,(jp3)
1084  CALL s_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
1085  !output: I2,i1,j3,(jp3)
1086  END DO
1087 
1088  !transform along x axis
1089  !input: I2,i1,j3,(jp3)
1090  lot = ncache/(4*n1)
1091  DO j = 1, n2, lot
1092  ma = j
1093  mb = min(j + (lot - 1), n2)
1094  nfft = mb - ma + 1
1095 
1096  !performing FFT
1097  i = 1
1098  CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
1099  ftrig1, after1(i), now1(i), before1(i), -1)
1100 
1101  inzee = 1
1102  DO i = 2, ic1
1103  CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1104  ftrig1, after1(i), now1(i), before1(i), -1)
1105  inzee = 3 - inzee
1106  END DO
1107  !output: I2,I1,j3,(jp3)
1108 
1109  !reverse ordering
1110  !input: J2,Jp2,I1,j3,(jp3)
1111  IF (nproc .EQ. 1) THEN
1112  CALL s_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
1113  ELSE
1114  CALL s_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
1115  END IF
1116  ! output: I1,J2,j3,Jp2,(jp3)
1117  END DO
1118  END IF
1119  END DO
1120 
1121  !Interprocessor data transposition
1122  !input: I1,J2,j3,Jp2,(jp3)
1123  IF (nproc .GT. 1) THEN
1124  !communication scheduling
1125  CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
1126  END IF
1127 
1128  !output: I1,J2,j3,jp3,(Jp2)
1129 
1130  !transform along z axis
1131  !input: I1,J2,i3,(Jp2)
1132  lot = ncache/(2*n3)
1133  DO j2 = 1, md2/nproc
1134  !this condition ensures that we manage only the interesting part for the FFT
1135  IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
1136  DO i1 = 1, n1, lot
1137  ma = i1
1138  mb = min(i1 + (lot - 1), n1)
1139  nfft = mb - ma + 1
1140 
1141  !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
1142  !input: I1,J2,i3,(Jp2)
1143  CALL unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
1144  !output: I1,i3,J2,(Jp2)
1145 
1146  !performing FFT
1147  !input: I1,i3,J2,(Jp2)
1148  inzee = 1
1149  DO i = 1, ic3
1150  CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1151  ftrig3, after3(i), now3(i), before3(i), -1)
1152  inzee = 3 - inzee
1153  END DO
1154  !output: I1,I3,J2,(Jp2)
1155 
1156  !rebuild the output array
1157  CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
1158  , scal)
1159 
1160  !integrate local pieces together
1161  !ehartree=ehartree+0.5_dp*ehartreetmp*hx*hy*hz
1162  END DO
1163  END IF
1164  END DO
1165 
1166  !De-allocations
1167  DEALLOCATE (btrig1)
1168  DEALLOCATE (ftrig1)
1169  DEALLOCATE (after1)
1170  DEALLOCATE (now1)
1171  DEALLOCATE (before1)
1172  DEALLOCATE (btrig2)
1173  DEALLOCATE (ftrig2)
1174  DEALLOCATE (after2)
1175  DEALLOCATE (now2)
1176  DEALLOCATE (before2)
1177  DEALLOCATE (btrig3)
1178  DEALLOCATE (ftrig3)
1179  DEALLOCATE (after3)
1180  DEALLOCATE (now3)
1181  DEALLOCATE (before3)
1182  DEALLOCATE (zmpi2)
1183  DEALLOCATE (zw)
1184  DEALLOCATE (zt)
1185  DEALLOCATE (cosinarr)
1186  IF (nproc .GT. 1) DEALLOCATE (zmpi1)
1187 
1188  ! call timing(iproc,'PSolv_comput ','OF')
1189  CALL timestop(handle)
1190  END SUBROUTINE s_poissonsolver
1191 
1192 ! **************************************************************************************************
1193 !> \brief ...
1194 !> \param j3 ...
1195 !> \param nfft ...
1196 !> \param Jp2stb ...
1197 !> \param J2stb ...
1198 !> \param lot ...
1199 !> \param n1 ...
1200 !> \param md2 ...
1201 !> \param nd3 ...
1202 !> \param nproc ...
1203 !> \param zmpi1 ...
1204 !> \param zw ...
1205 ! **************************************************************************************************
1206  SUBROUTINE s_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
1207  INTEGER, INTENT(in) :: j3, nfft
1208  INTEGER, INTENT(inout) :: jp2stb, j2stb
1209  INTEGER, INTENT(in) :: lot, n1, md2, nd3, nproc
1210  REAL(kind=dp), &
1211  DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
1212  INTENT(in) :: zmpi1
1213  REAL(kind=dp), DIMENSION(2, lot, n1), &
1214  INTENT(inout) :: zw
1215 
1216  INTEGER :: i1, j2, jp2, mfft
1217 
1218  mfft = 0
1219  DO jp2 = jp2stb, nproc
1220  DO j2 = j2stb, md2/nproc
1221  mfft = mfft + 1
1222  IF (mfft .GT. nfft) THEN
1223  jp2stb = jp2
1224  j2stb = j2
1225  RETURN
1226  END IF
1227  DO i1 = 1, n1
1228  zw(1, mfft, i1) = zmpi1(1, i1, j2, j3, jp2)
1229  zw(2, mfft, i1) = zmpi1(2, i1, j2, j3, jp2)
1230  END DO
1231  END DO
1232  j2stb = 1
1233  END DO
1234  END SUBROUTINE s_mpiswitch_upcorn
1235 
1236 ! **************************************************************************************************
1237 !> \brief ...
1238 !> \param nfft ...
1239 !> \param n2 ...
1240 !> \param lot ...
1241 !> \param n1 ...
1242 !> \param lzt ...
1243 !> \param zt ...
1244 !> \param zw ...
1245 ! **************************************************************************************************
1246  SUBROUTINE s_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
1247  INTEGER, INTENT(in) :: nfft, n2, lot, n1, lzt
1248  REAL(kind=dp), DIMENSION(2, lzt, n1), INTENT(in) :: zt
1249  REAL(kind=dp), DIMENSION(2, lot, n2), &
1250  INTENT(inout) :: zw
1251 
1252  INTEGER :: i, j
1253 
1254  DO j = 1, nfft
1255  DO i = 1, n2
1256  zw(1, j, i) = zt(1, i, j)
1257  zw(2, j, i) = zt(2, i, j)
1258  END DO
1259  END DO
1260  END SUBROUTINE s_switch_upcorn
1261 
1262 ! **************************************************************************************************
1263 !> \brief ...
1264 !> \param nfft ...
1265 !> \param n2 ...
1266 !> \param lot ...
1267 !> \param n1 ...
1268 !> \param lzt ...
1269 !> \param zw ...
1270 !> \param zt ...
1271 ! **************************************************************************************************
1272  SUBROUTINE s_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
1273  INTEGER, INTENT(in) :: nfft, n2, lot, n1, lzt
1274  REAL(kind=dp), DIMENSION(2, lot, n2), INTENT(in) :: zw
1275  REAL(kind=dp), DIMENSION(2, lzt, n1), &
1276  INTENT(inout) :: zt
1277 
1278  INTEGER :: i, j
1279 
1280  DO j = 1, nfft
1281  DO i = 1, n2
1282  zt(1, i, j) = zw(1, j, i)
1283  zt(2, i, j) = zw(2, j, i)
1284  END DO
1285  END DO
1286  END SUBROUTINE s_unswitch_downcorn
1287 
1288 ! **************************************************************************************************
1289 !> \brief ...
1290 !> \param j3 ...
1291 !> \param nfft ...
1292 !> \param Jp2stf ...
1293 !> \param J2stf ...
1294 !> \param lot ...
1295 !> \param n1 ...
1296 !> \param md2 ...
1297 !> \param nd3 ...
1298 !> \param nproc ...
1299 !> \param zw ...
1300 !> \param zmpi1 ...
1301 ! **************************************************************************************************
1302  SUBROUTINE s_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
1303  INTEGER, INTENT(in) :: j3, nfft
1304  INTEGER, INTENT(inout) :: jp2stf, j2stf
1305  INTEGER, INTENT(in) :: lot, n1, md2, nd3, nproc
1306  REAL(kind=dp), DIMENSION(2, lot, n1), INTENT(in) :: zw
1307  REAL(kind=dp), &
1308  DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
1309  INTENT(inout) :: zmpi1
1310 
1311  INTEGER :: i1, j2, jp2, mfft
1312 
1313  mfft = 0
1314  DO jp2 = jp2stf, nproc
1315  DO j2 = j2stf, md2/nproc
1316  mfft = mfft + 1
1317  IF (mfft .GT. nfft) THEN
1318  jp2stf = jp2
1319  j2stf = j2
1320  RETURN
1321  END IF
1322  DO i1 = 1, n1
1323  zmpi1(1, i1, j2, j3, jp2) = zw(1, mfft, i1)
1324  zmpi1(2, i1, j2, j3, jp2) = zw(2, mfft, i1)
1325  END DO
1326  END DO
1327  j2stf = 1
1328  END DO
1329  END SUBROUTINE s_unmpiswitch_downcorn
1330 
1331 ! **************************************************************************************************
1332 !> \brief (Based on suitable modifications of S.Goedecker routines)
1333 !> Restore data into output array, calculating in the meanwhile
1334 !> Hartree energy of the potential
1335 !> \param md1 Dimensions of the undistributed part of the real grid
1336 !> \param md3 Dimensions of the undistributed part of the real grid
1337 !> \param lot ...
1338 !> \param nfft number of planes
1339 !> \param n3 (twice the) dimension of the last FFTtransform.
1340 !> \param zw FFT work array
1341 !> \param zf Original distributed density as well as
1342 !> Distributed solution of the poisson equation (inout)
1343 !> \param scal Needed to achieve unitarity and correct dimensions
1344 !> \date February 2006
1345 !> \author S. Goedecker, L. Genovese
1346 !> \note Assuming that high frequencies are in the corners
1347 !> and that n3 is multiple of 4
1348 !>
1349 !> RESTRICTIONS on USAGE
1350 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
1351 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
1352 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
1353 !> This file is distributed under the terms of the
1354 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
1355 ! **************************************************************************************************
1356  SUBROUTINE unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
1357  INTEGER, INTENT(in) :: md1, md3, lot, nfft, n3
1358  REAL(kind=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
1359  REAL(kind=dp), DIMENSION(md1, md3), INTENT(inout) :: zf
1360  REAL(kind=dp), INTENT(in) :: scal
1361 
1362  INTEGER :: i1, i3
1363  REAL(kind=dp) :: pot1
1364 
1365  DO i3 = 1, n3/4
1366  DO i1 = 1, nfft
1367  pot1 = scal*zw(1, i1, i3)
1368  !ehartreetmp =ehartreetmp + pot1* zf(i1,2*i3-1)
1369  zf(i1, 2*i3 - 1) = pot1
1370  pot1 = scal*zw(2, i1, i3)
1371  !ehartreetmp =ehartreetmp + pot1* zf(i1,2*i3)
1372  zf(i1, 2*i3) = pot1
1373  END DO
1374  END DO
1375  END SUBROUTINE unfill_downcorn
1376 
1377 ! **************************************************************************************************
1378 !> \brief ...
1379 !> \param md1 ...
1380 !> \param md3 ...
1381 !> \param lot ...
1382 !> \param nfft ...
1383 !> \param n3 ...
1384 !> \param zf ...
1385 !> \param zw ...
1386 ! **************************************************************************************************
1387  SUBROUTINE halfill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
1388  INTEGER :: md1, md3, lot, nfft, n3
1389  REAL(kind=dp) :: zf(md1, md3), zw(2, lot, n3/2)
1390 
1391  INTEGER :: i1, i3
1392 
1393  DO i3 = 1, n3/4
1394  ! WARNING: Assuming that high frequencies are in the corners
1395  ! and that n3 is multiple of 4
1396  !in principle we can relax this condition
1397  DO i1 = 1, nfft
1398  zw(1, i1, i3) = 0._dp
1399  zw(2, i1, i3) = 0._dp
1400  END DO
1401  END DO
1402  DO i3 = n3/4 + 1, n3/2
1403  DO i1 = 1, nfft
1404  zw(1, i1, i3) = zf(i1, 2*i3 - 1 - n3/2)
1405  zw(2, i1, i3) = zf(i1, 2*i3 - n3/2)
1406  END DO
1407  END DO
1408 
1409  END SUBROUTINE halfill_upcorn
1410 
1411 ! **************************************************************************************************
1412 !> \brief (Based on suitable modifications of S.Goedecker routines)
1413 !> Assign the correct planes to the work array zmpi2
1414 !> in order to prepare for interprocessor data transposition.
1415 !> In the meanwhile, it unpacks the data of the HalFFT in order to prepare for
1416 !> multiplication with the kernel
1417 !> \param i1 Starting points of the plane and number of remaining lines
1418 !> \param j2 Starting points of the plane and number of remaining lines
1419 !> \param lot Starting points of the plane and number of remaining lines
1420 !> \param nfft Starting points of the plane and number of remaining lines
1421 !> \param n1 logical dimension of the FFT transform, reference for work arrays
1422 !> \param n3 logical dimension of the FFT transform, reference for work arrays
1423 !> \param md2 Dimensions of real grid
1424 !> \param nproc ...
1425 !> \param nd3 Dimensions of the kernel
1426 !> \param zw Work array (input)
1427 !> \param zmpi2 Work array for multiprocessor manipulation (output)
1428 !> \param cosinarr Array of the phases needed for unpacking
1429 !> \date February 2006
1430 !> \author S. Goedecker, L. Genovese
1431 !> \note
1432 !> RESTRICTIONS on USAGE
1433 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
1434 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
1435 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
1436 !> This file is distributed under the terms of the
1437 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
1438 ! **************************************************************************************************
1439  SUBROUTINE scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
1440  INTEGER, INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
1441  nd3
1442  REAL(kind=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
1443  REAL(kind=dp), DIMENSION(2, n1, md2/nproc, nd3), &
1444  INTENT(inout) :: zmpi2
1445  REAL(kind=dp), DIMENSION(2, n3/2), INTENT(in) :: cosinarr
1446 
1447  INTEGER :: i, i3, ind1, ind2
1448  REAL(kind=dp) :: a, b, c, cp, d, fei, fer, fi, foi, for, &
1449  fr, sp
1450 
1451 !case i3=1 and i3=n3/2+1
1452 
1453  DO i = 0, nfft - 1
1454  a = zw(1, i + 1, 1)
1455  b = zw(2, i + 1, 1)
1456  zmpi2(1, i1 + i, j2, 1) = a + b
1457  zmpi2(2, i1 + i, j2, 1) = 0._dp
1458  zmpi2(1, i1 + i, j2, n3/2 + 1) = a - b
1459  zmpi2(2, i1 + i, j2, n3/2 + 1) = 0._dp
1460  END DO
1461  !case 2<=i3<=n3/2
1462  DO i3 = 2, n3/2
1463  ind1 = i3
1464  ind2 = n3/2 - i3 + 2
1465  cp = cosinarr(1, i3)
1466  sp = cosinarr(2, i3)
1467  DO i = 0, nfft - 1
1468  a = zw(1, i + 1, ind1)
1469  b = zw(2, i + 1, ind1)
1470  c = zw(1, i + 1, ind2)
1471  d = zw(2, i + 1, ind2)
1472  fer = .5_dp*(a + c)
1473  fei = .5_dp*(b - d)
1474  for = .5_dp*(a - c)
1475  foi = .5_dp*(b + d)
1476  fr = fer + cp*foi - sp*for
1477  fi = fei - cp*for - sp*foi
1478  zmpi2(1, i1 + i, j2, ind1) = fr
1479  zmpi2(2, i1 + i, j2, ind1) = fi
1480  END DO
1481  END DO
1482 
1483  END SUBROUTINE scramble_unpack
1484 
1485 ! **************************************************************************************************
1486 !> \brief (Based on suitable modifications of S.Goedecker routines)
1487 !> Insert the correct planes of the work array zmpi2
1488 !> in order to prepare for backward FFT transform
1489 !> In the meanwhile, it packs the data in order to be transformed with the HalFFT
1490 !> procedure
1491 !> \param i1 Starting points of the plane and number of remaining lines
1492 !> \param j2 Starting points of the plane and number of remaining lines
1493 !> \param lot Starting points of the plane and number of remaining lines
1494 !> \param nfft Starting points of the plane and number of remaining lines
1495 !> \param n1 logical dimension of the FFT transform, reference for work arrays
1496 !> \param n3 logical dimension of the FFT transform, reference for work arrays
1497 !> \param md2 Dimensions of real grid
1498 !> \param nproc ...
1499 !> \param nd3 Dimensions of the kernel
1500 !> \param zmpi2 Work array for multiprocessor manipulation (output)
1501 !> \param zw Work array (inout)
1502 !> \param cosinarr Array of the phases needed for packing
1503 !> \date February 2006
1504 !> \author S. Goedecker, L. Genovese
1505 !> \note
1506 !> RESTRICTIONS on USAGE
1507 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
1508 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
1509 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
1510 !> This file is distributed under the terms of the
1511 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
1512 ! **************************************************************************************************
1513  SUBROUTINE unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw, cosinarr)
1514  INTEGER, INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
1515  nd3
1516  REAL(kind=dp), DIMENSION(2, n1, md2/nproc, nd3), &
1517  INTENT(in) :: zmpi2
1518  REAL(kind=dp), DIMENSION(2, lot, n3/2), &
1519  INTENT(inout) :: zw
1520  REAL(kind=dp), DIMENSION(2, n3/2), INTENT(in) :: cosinarr
1521 
1522  INTEGER :: i, i3, inda, indb
1523  REAL(kind=dp) :: a, b, c, cp, d, ie, ih, io, re, rh, ro, &
1524  sp
1525 
1526  DO i3 = 1, n3/2
1527  inda = i3
1528  indb = n3/2 + 2 - i3
1529  cp = cosinarr(1, i3)
1530  sp = cosinarr(2, i3)
1531  DO i = 0, nfft - 1
1532  a = zmpi2(1, i1 + i, j2, inda)
1533  b = zmpi2(2, i1 + i, j2, inda)
1534  c = zmpi2(1, i1 + i, j2, indb)
1535  d = -zmpi2(2, i1 + i, j2, indb)
1536  re = (a + c)
1537  ie = (b + d)
1538  ro = (a - c)*cp - (b - d)*sp
1539  io = (a - c)*sp + (b - d)*cp
1540  rh = re - io
1541  ih = ie + ro
1542  zw(1, i + 1, inda) = rh
1543  zw(2, i + 1, inda) = ih
1544  END DO
1545  END DO
1546 
1547  END SUBROUTINE unscramble_pack
1548 
1549 ! **************************************************************************************************
1550 !> \brief (Based on suitable modifications of S.Goedecker routines)
1551 !> Applies the local FFT space Kernel to the density in Real space.
1552 !> Calculates also the LDA exchange-correlation terms
1553 !> \param n1 logical dimension of the transform.
1554 !> \param n2 logical dimension of the transform.
1555 !> \param n3 logical dimension of the transform.
1556 !> \param nd1 Dimension of POT
1557 !> \param nd2 Dimension of POT
1558 !> \param nd3 Dimension of POT
1559 !> \param md1 Dimension of ZF
1560 !> \param md2 Dimension of ZF
1561 !> \param md3 Dimension of ZF
1562 !> \param nproc number of processors used as returned by MPI_COMM_SIZE
1563 !> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
1564 !> \param pot Kernel, only the distributed part (REAL)
1565 !> POT(i1,i2,i3)
1566 !> i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
1567 !> \param zf Density (input/output)
1568 !> ZF(i1,i3,i2)
1569 !> i1=1,md1 , i2=1,md2/nproc , i3=1,md3
1570 !> \param scal factor of renormalization of the FFT in order to acheve unitarity
1571 !> and the correct dimension
1572 !> \param mpi_group ...
1573 !> \date February 2006
1574 !> \author S. Goedecker, L. Genovese
1575 !> \note As transform lengths
1576 !> most products of the prime factors 2,3,5 are allowed.
1577 !> The detailed table with allowed transform lengths can
1578 !> be found in subroutine CTRIG
1579 !> \note
1580 !> RESTRICTIONS on USAGE
1581 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
1582 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
1583 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
1584 !> This file is distributed under the terms of the
1585 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
1586 ! **************************************************************************************************
1587  SUBROUTINE f_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
1588  scal, mpi_group)
1589  INTEGER, INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
1590  md3, nproc, iproc
1591  REAL(kind=dp), DIMENSION(nd1, nd2, nd3/nproc), &
1592  INTENT(in) :: pot
1593  REAL(kind=dp), DIMENSION(md1, md3, md2/nproc), &
1594  INTENT(inout) :: zf
1595  REAL(kind=dp), INTENT(in) :: scal
1596 
1597  CLASS(mp_comm_type), INTENT(in) :: mpi_group
1598 
1599  INTEGER, PARAMETER :: ncache_optimal = 8*1024
1600 
1601  INTEGER :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
1602  j2stb, j2stf, j3, jp2stb, jp2stf, lot, &
1603  lzt, ma, mb, ncache, nfft
1604  INTEGER, ALLOCATABLE, DIMENSION(:) :: after1, after2, after3, before1, &
1605  before2, before3, now1, now2, now3
1606  REAL(kind=dp) :: twopion
1607  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: btrig1, btrig2, btrig3, cosinarr, &
1608  ftrig1, ftrig2, ftrig3
1609  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: zt, zw
1610  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: zmpi2
1611  REAL(kind=dp), ALLOCATABLE, &
1612  DIMENSION(:, :, :, :, :) :: zmpi1
1613 
1614  IF (mod(n1, 2) .NE. 0) cpabort("Parallel convolution:ERROR:n1")
1615  IF (mod(n2, 2) .NE. 0) cpabort("Parallel convolution:ERROR:n2")
1616  IF (mod(n3, 2) .NE. 0) cpabort("Parallel convolution:ERROR:n3")
1617  IF (nd1 .LT. n1/2 + 1) cpabort("Parallel convolution:ERROR:nd1")
1618  IF (nd2 .LT. n2/2 + 1) cpabort("Parallel convolution:ERROR:nd2")
1619  IF (nd3 .LT. n3/2 + 1) cpabort("Parallel convolution:ERROR:nd3")
1620  IF (md1 .LT. n1/2) cpabort("Parallel convolution:ERROR:md1")
1621  IF (md2 .LT. n2/2) cpabort("Parallel convolution:ERROR:md2")
1622  IF (md3 .LT. n3/2) cpabort("Parallel convolution:ERROR:md3")
1623  IF (mod(nd3, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:nd3")
1624  IF (mod(md2, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:md2")
1625 
1626  !defining work arrays dimensions
1627 
1628  ncache = ncache_optimal
1629  IF (ncache <= max(n1, n2, n3/2)*4) ncache = max(n1, n2, n3/2)*4
1630  lzt = n2/2
1631  IF (mod(n2/2, 2) .EQ. 0) lzt = lzt + 1
1632  IF (mod(n2/2, 4) .EQ. 0) lzt = lzt + 1
1633 
1634  !Allocations
1635  ALLOCATE (btrig1(2, ctrig_length))
1636  ALLOCATE (ftrig1(2, ctrig_length))
1637  ALLOCATE (after1(7))
1638  ALLOCATE (now1(7))
1639  ALLOCATE (before1(7))
1640  ALLOCATE (btrig2(2, ctrig_length))
1641  ALLOCATE (ftrig2(2, ctrig_length))
1642  ALLOCATE (after2(7))
1643  ALLOCATE (now2(7))
1644  ALLOCATE (before2(7))
1645  ALLOCATE (btrig3(2, ctrig_length))
1646  ALLOCATE (ftrig3(2, ctrig_length))
1647  ALLOCATE (after3(7))
1648  ALLOCATE (now3(7))
1649  ALLOCATE (before3(7))
1650  ALLOCATE (zw(2, ncache/4, 2))
1651  ALLOCATE (zt(2, lzt, n1))
1652  ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
1653  zmpi2 = 1.0e99_dp ! init mpi'ed buffers to silence warnings under valgrind
1654  ALLOCATE (cosinarr(2, n3/2))
1655  IF (nproc .GT. 1) ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
1656 
1657  !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
1658  CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
1659  CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
1660  CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
1661  DO j = 1, n1
1662  ftrig1(1, j) = btrig1(1, j)
1663  ftrig1(2, j) = -btrig1(2, j)
1664  END DO
1665  DO j = 1, n2
1666  ftrig2(1, j) = btrig2(1, j)
1667  ftrig2(2, j) = -btrig2(2, j)
1668  END DO
1669  DO j = 1, n3
1670  ftrig3(1, j) = btrig3(1, j)
1671  ftrig3(2, j) = -btrig3(2, j)
1672  END DO
1673 
1674  !Calculating array of phases for HalFFT decoding
1675  twopion = 8._dp*atan(1._dp)/real(n3, kind=dp)
1676  DO i3 = 1, n3/2
1677  cosinarr(1, i3) = cos(twopion*(i3 - 1))
1678  cosinarr(2, i3) = -sin(twopion*(i3 - 1))
1679  END DO
1680 
1681  ! transform along z axis
1682  lot = ncache/(2*n3)
1683  IF (lot .LT. 1) THEN
1684  WRITE (*, *) &
1685  'convolxc_on:ncache has to be enlarged to be able to hold at'// &
1686  'least one 1-d FFT of this size even though this will'// &
1687  'reduce the performance for shorter transform lengths'
1688  cpabort("")
1689  END IF
1690 
1691  DO j2 = 1, md2/nproc
1692  !this condition ensures that we manage only the interesting part for the FFT
1693  IF (iproc*(md2/nproc) + j2 .LE. n2/2) THEN
1694  DO i1 = 1, (n1/2), lot
1695  ma = i1
1696  mb = min(i1 + (lot - 1), (n1/2))
1697  nfft = mb - ma + 1
1698 
1699  !inserting real data into complex array of half length
1700  CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
1701 
1702  !performing FFT
1703  !input: I1,I3,J2,(Jp2)
1704  inzee = 1
1705  DO i = 1, ic3
1706  CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1707  btrig3, after3(i), now3(i), before3(i), 1)
1708  inzee = 3 - inzee
1709  END DO
1710  !output: I1,i3,J2,(Jp2)
1711 
1712  !unpacking FFT in order to restore correct result,
1713  !while exchanging components
1714  !input: I1,i3,J2,(Jp2)
1715  CALL scramble_unpack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
1716  !output: I1,J2,i3,(Jp2)
1717  END DO
1718  END IF
1719  END DO
1720 
1721  !Interprocessor data transposition
1722  !input: I1,J2,j3,jp3,(Jp2)
1723  IF (nproc .GT. 1) THEN
1724  !communication scheduling
1725  CALL mpi_group%alltoall(zmpi2, zmpi1, n1*(md2/nproc)*(nd3/nproc))
1726  END IF
1727  !output: I1,J2,j3,Jp2,(jp3)
1728 
1729  !now each process perform complete convolution of its planes
1730  DO j3 = 1, nd3/nproc
1731  !this condition ensures that we manage only the interesting part for the FFT
1732  IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
1733  jp2stb = 1
1734  j2stb = 1
1735  jp2stf = 1
1736  j2stf = 1
1737 
1738  ! transform along x axis
1739  lot = ncache/(4*n1)
1740  IF (lot .LT. 1) THEN
1741  WRITE (*, *) &
1742  'convolxc_on:ncache has to be enlarged to be able to hold at'// &
1743  'least one 1-d FFT of this size even though this will'// &
1744  'reduce the performance for shorter transform lengths'
1745  cpabort("")
1746  END IF
1747 
1748  DO j = 1, n2/2, lot
1749  ma = j
1750  mb = min(j + (lot - 1), n2/2)
1751  nfft = mb - ma + 1
1752 
1753  !reverse index ordering, leaving the planes to be transformed at the end
1754  !input: I1,J2,j3,Jp2,(jp3)
1755  IF (nproc .EQ. 1) THEN
1756  CALL mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
1757  ELSE
1758  CALL mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
1759  END IF
1760  !output: J2,Jp2,I1,j3,(jp3)
1761 
1762  !performing FFT
1763  !input: I2,I1,j3,(jp3)
1764  inzee = 1
1765  DO i = 1, ic1 - 1
1766  CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1767  btrig1, after1(i), now1(i), before1(i), 1)
1768  inzee = 3 - inzee
1769  END DO
1770 
1771  !storing the last step into zt array
1772  i = ic1
1773  CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
1774  btrig1, after1(i), now1(i), before1(i), 1)
1775  !output: I2,i1,j3,(jp3)
1776  END DO
1777 
1778  !transform along y axis
1779  lot = ncache/(4*n2)
1780  IF (lot .LT. 1) THEN
1781  WRITE (*, *) &
1782  'convolxc_on:ncache has to be enlarged to be able to hold at'// &
1783  'least one 1-d FFT of this size even though this will'// &
1784  'reduce the performance for shorter transform lengths'
1785  cpabort("")
1786  END IF
1787 
1788  DO j = 1, n1, lot
1789  ma = j
1790  mb = min(j + (lot - 1), n1)
1791  nfft = mb - ma + 1
1792 
1793  !reverse ordering
1794  !input: I2,i1,j3,(jp3)
1795  CALL switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
1796  !output: i1,I2,j3,(jp3)
1797 
1798  !performing FFT
1799  !input: i1,I2,j3,(jp3)
1800  inzee = 1
1801  DO i = 1, ic2
1802  CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1803  btrig2, after2(i), now2(i), before2(i), 1)
1804  inzee = 3 - inzee
1805  END DO
1806  !output: i1,i2,j3,(jp3)
1807 
1808  !Multiply with kernel in fourier space
1809  CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
1810 
1811  !TRANSFORM BACK IN REAL SPACE
1812 
1813  !transform along y axis
1814  !input: i1,i2,j3,(jp3)
1815  DO i = 1, ic2
1816  CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1817  ftrig2, after2(i), now2(i), before2(i), -1)
1818  inzee = 3 - inzee
1819  END DO
1820 
1821  !reverse ordering
1822  !input: i1,I2,j3,(jp3)
1823  CALL unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
1824  !output: I2,i1,j3,(jp3)
1825  END DO
1826 
1827  !transform along x axis
1828  !input: I2,i1,j3,(jp3)
1829  lot = ncache/(4*n1)
1830  DO j = 1, n2/2, lot
1831  ma = j
1832  mb = min(j + (lot - 1), n2/2)
1833  nfft = mb - ma + 1
1834 
1835  !performing FFT
1836  i = 1
1837  CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
1838  ftrig1, after1(i), now1(i), before1(i), -1)
1839 
1840  inzee = 1
1841  DO i = 2, ic1
1842  CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1843  ftrig1, after1(i), now1(i), before1(i), -1)
1844  inzee = 3 - inzee
1845  END DO
1846  !output: I2,I1,j3,(jp3)
1847 
1848  !reverse ordering
1849  !input: J2,Jp2,I1,j3,(jp3)
1850  IF (nproc .EQ. 1) THEN
1851  CALL unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
1852  ELSE
1853  CALL unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
1854  END IF
1855  ! output: I1,J2,j3,Jp2,(jp3)
1856  END DO
1857  END IF
1858  END DO
1859 
1860  !Interprocessor data transposition
1861  !input: I1,J2,j3,Jp2,(jp3)
1862  IF (nproc .GT. 1) THEN
1863  !communication scheduling
1864  CALL mpi_group%alltoall(zmpi1, zmpi2, n1*(md2/nproc)*(nd3/nproc))
1865  !output: I1,J2,j3,jp3,(Jp2)
1866  END IF
1867 
1868  !transform along z axis
1869  !input: I1,J2,i3,(Jp2)
1870  lot = ncache/(2*n3)
1871  DO j2 = 1, md2/nproc
1872  !this condition ensures that we manage only the interesting part for the FFT
1873  IF (iproc*(md2/nproc) + j2 .LE. n2/2) THEN
1874  DO i1 = 1, (n1/2), lot
1875  ma = i1
1876  mb = min(i1 + (lot - 1), (n1/2))
1877  nfft = mb - ma + 1
1878 
1879  !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
1880  !input: I1,J2,i3,(Jp2)
1881  CALL unscramble_pack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
1882  !output: I1,i3,J2,(Jp2)
1883 
1884  !performing FFT
1885  !input: I1,i3,J2,(Jp2)
1886  inzee = 1
1887  DO i = 1, ic3
1888  CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1889  ftrig3, after3(i), now3(i), before3(i), -1)
1890  inzee = 3 - inzee
1891  END DO
1892  !output: I1,I3,J2,(Jp2)
1893 
1894  !calculates the exchange correlation terms locally and rebuild the output array
1895  CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
1896  , scal) !,ehartreetmp)
1897 
1898  !integrate local pieces together
1899  !ehartree=ehartree+0.5_dp*ehartreetmp*hgrid**3
1900  END DO
1901  END IF
1902  END DO
1903 
1904  !De-allocations
1905  DEALLOCATE (btrig1)
1906  DEALLOCATE (ftrig1)
1907  DEALLOCATE (after1)
1908  DEALLOCATE (now1)
1909  DEALLOCATE (before1)
1910  DEALLOCATE (btrig2)
1911  DEALLOCATE (ftrig2)
1912  DEALLOCATE (after2)
1913  DEALLOCATE (now2)
1914  DEALLOCATE (before2)
1915  DEALLOCATE (btrig3)
1916  DEALLOCATE (ftrig3)
1917  DEALLOCATE (after3)
1918  DEALLOCATE (now3)
1919  DEALLOCATE (before3)
1920  DEALLOCATE (zmpi2)
1921  DEALLOCATE (zw)
1922  DEALLOCATE (zt)
1923  DEALLOCATE (cosinarr)
1924  IF (nproc .GT. 1) DEALLOCATE (zmpi1)
1925 
1926  END SUBROUTINE f_poissonsolver
1927 
1928 ! **************************************************************************************************
1929 !> \brief ...
1930 !> \param nfft ...
1931 !> \param n2 ...
1932 !> \param lot ...
1933 !> \param n1 ...
1934 !> \param lzt ...
1935 !> \param zt ...
1936 !> \param zw ...
1937 ! **************************************************************************************************
1938  SUBROUTINE switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
1939  INTEGER :: nfft, n2, lot, n1, lzt
1940  REAL(kind=dp) :: zt(2, lzt, n1), zw(2, lot, n2)
1941 
1942  INTEGER :: i, j
1943 
1944 ! WARNING: Assuming that high frequencies are in the corners
1945 ! and that n2 is multiple of 2
1946 ! Low frequencies
1947 
1948  DO j = 1, nfft
1949  DO i = n2/2 + 1, n2
1950  zw(1, j, i) = zt(1, i - n2/2, j)
1951  zw(2, j, i) = zt(2, i - n2/2, j)
1952  END DO
1953  END DO
1954  ! High frequencies
1955  DO i = 1, n2/2
1956  DO j = 1, nfft
1957  zw(1, j, i) = 0._dp
1958  zw(2, j, i) = 0._dp
1959  END DO
1960  END DO
1961  END SUBROUTINE switch_upcorn
1962 
1963 ! **************************************************************************************************
1964 !> \brief ...
1965 !> \param j3 ...
1966 !> \param nfft ...
1967 !> \param Jp2stb ...
1968 !> \param J2stb ...
1969 !> \param lot ...
1970 !> \param n1 ...
1971 !> \param md2 ...
1972 !> \param nd3 ...
1973 !> \param nproc ...
1974 !> \param zmpi1 ...
1975 !> \param zw ...
1976 ! **************************************************************************************************
1977  SUBROUTINE mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
1978  INTEGER :: j3, nfft, jp2stb, j2stb, lot, n1, md2, &
1979  nd3, nproc
1980  REAL(kind=dp) :: zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc), zw(2, lot, n1)
1981 
1982  INTEGER :: i1, j2, jp2, mfft
1983 
1984 ! WARNING: Assuming that high frequencies are in the corners
1985 ! and that n1 is multiple of 2
1986 
1987  mfft = 0
1988  main: DO jp2 = jp2stb, nproc
1989  DO j2 = j2stb, md2/nproc
1990  mfft = mfft + 1
1991  IF (mfft .GT. nfft) THEN
1992  jp2stb = jp2
1993  j2stb = j2
1994  EXIT main
1995  END IF
1996  DO i1 = 1, n1/2
1997  zw(1, mfft, i1) = 0._dp
1998  zw(2, mfft, i1) = 0._dp
1999  END DO
2000  DO i1 = n1/2 + 1, n1
2001  zw(1, mfft, i1) = zmpi1(1, i1 - n1/2, j2, j3, jp2)
2002  zw(2, mfft, i1) = zmpi1(2, i1 - n1/2, j2, j3, jp2)
2003  END DO
2004  END DO
2005  j2stb = 1
2006  END DO main
2007  END SUBROUTINE mpiswitch_upcorn
2008 
2009 ! **************************************************************************************************
2010 !> \brief ...
2011 !> \param nfft ...
2012 !> \param n2 ...
2013 !> \param lot ...
2014 !> \param n1 ...
2015 !> \param lzt ...
2016 !> \param zw ...
2017 !> \param zt ...
2018 ! **************************************************************************************************
2019  SUBROUTINE unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
2020  INTEGER :: nfft, n2, lot, n1, lzt
2021  REAL(kind=dp) :: zw(2, lot, n2), zt(2, lzt, n1)
2022 
2023  INTEGER :: i, j
2024 
2025 ! WARNING: Assuming that high frequencies are in the corners
2026 ! and that n2 is multiple of 2
2027 ! Low frequencies
2028 
2029  DO j = 1, nfft
2030  DO i = 1, n2/2
2031  zt(1, i, j) = zw(1, j, i)
2032  zt(2, i, j) = zw(2, j, i)
2033  END DO
2034  END DO
2035  RETURN
2036  END SUBROUTINE unswitch_downcorn
2037 
2038 ! **************************************************************************************************
2039 !> \brief ...
2040 !> \param j3 ...
2041 !> \param nfft ...
2042 !> \param Jp2stf ...
2043 !> \param J2stf ...
2044 !> \param lot ...
2045 !> \param n1 ...
2046 !> \param md2 ...
2047 !> \param nd3 ...
2048 !> \param nproc ...
2049 !> \param zw ...
2050 !> \param zmpi1 ...
2051 ! **************************************************************************************************
2052  SUBROUTINE unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
2053  INTEGER :: j3, nfft, jp2stf, j2stf, lot, n1, md2, &
2054  nd3, nproc
2055  REAL(kind=dp) :: zw(2, lot, n1), zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc)
2056 
2057  INTEGER :: i1, j2, jp2, mfft
2058 
2059 ! WARNING: Assuming that high frequencies are in the corners
2060 ! and that n1 is multiple of 2
2061 
2062  mfft = 0
2063  main: DO jp2 = jp2stf, nproc
2064  DO j2 = j2stf, md2/nproc
2065  mfft = mfft + 1
2066  IF (mfft .GT. nfft) THEN
2067  jp2stf = jp2
2068  j2stf = j2
2069  EXIT main
2070  END IF
2071  DO i1 = 1, n1/2
2072  zmpi1(1, i1, j2, j3, jp2) = zw(1, mfft, i1)
2073  zmpi1(2, i1, j2, j3, jp2) = zw(2, mfft, i1)
2074  END DO
2075  END DO
2076  j2stf = 1
2077  END DO main
2078  END SUBROUTINE unmpiswitch_downcorn
2079 
2080 ! **************************************************************************************************
2081 !> \brief (Based on suitable modifications of S.Goedecker routines)
2082 !> Restore data into output array, calculating in the meanwhile
2083 !> Hartree energy of the potential
2084 !> \param md1 Dimensions of the undistributed part of the real grid
2085 !> \param md3 Dimensions of the undistributed part of the real grid
2086 !> \param lot ...
2087 !> \param nfft number of planes
2088 !> \param n3 (twice the) dimension of the last FFTtransform.
2089 !> \param zw FFT work array
2090 !> \param zf Original distributed density as well as
2091 !> Distributed solution of the poisson equation (inout)
2092 !> \param scal Needed to achieve unitarity and correct dimensions
2093 !> \param ehartreetmp Hartree energy
2094 !> \date February 2006
2095 !> \author S. Goedecker, L. Genovese
2096 !> \note Assuming that high frequencies are in the corners
2097 !> and that n3 is multiple of 4
2098 !>
2099 !> RESTRICTIONS on USAGE
2100 !> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
2101 !> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
2102 !> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
2103 !> This file is distributed under the terms of the
2104 !> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
2105 ! **************************************************************************************************
2106  SUBROUTINE f_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal, ehartreetmp)
2107  INTEGER, INTENT(in) :: md1, md3, lot, nfft, n3
2108  REAL(kind=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
2109  REAL(kind=dp), DIMENSION(md1, md3), INTENT(inout) :: zf
2110  REAL(kind=dp), INTENT(in) :: scal
2111  REAL(kind=dp), INTENT(out) :: ehartreetmp
2112 
2113  INTEGER :: i1, i3
2114  REAL(kind=dp) :: pot1
2115 
2116  ehartreetmp = 0._dp
2117  DO i3 = 1, n3/4
2118  DO i1 = 1, nfft
2119  pot1 = scal*zw(1, i1, i3)
2120  ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3 - 1)
2121  zf(i1, 2*i3 - 1) = pot1
2122  pot1 = scal*zw(2, i1, i3)
2123  ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3)
2124  zf(i1, 2*i3) = pot1
2125  END DO
2126  END DO
2127  END SUBROUTINE f_unfill_downcorn
2128 
2129 END MODULE ps_wavelet_base
int main(int argc, char *argv[])
Stand-alone miniapp for smoke-testing and benchmarking dbm_multiply.
Definition: dbm_miniapp.c:197
for(int lxp=0;lxp<=lp;lxp++)
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
Interface to the message passing library MPI.
Creates the wavelet kernel for the wavelet based poisson solver.
subroutine, public s_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, scal, mpi_group)
!HERE POT MUST BE THE KERNEL (BEWARE THE HALF DIMENSION) ****h* BigDFT/S_PoissonSolver (Based on suit...
subroutine, public scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
(Based on suitable modifications of S.Goedecker routines) Assign the correct planes to the work array...
subroutine, public f_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, scal, mpi_group)
(Based on suitable modifications of S.Goedecker routines) Applies the local FFT space Kernel to the d...
subroutine, public p_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, zf, scal, hx, hy, hz, mpi_group)
...
integer, parameter, public ctrig_length
subroutine, public fftstp(mm, nfft, m, nn, n, zin, zout, trig, after, now, before, isign)
...
subroutine, public ctrig(n, trig, after, before, now, isign, ic)
...