(git:374b731)
Loading...
Searching...
No Matches
ps_wavelet_base.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Creates the wavelet kernel for the wavelet based poisson solver.
10!> \author Florian Schiffmann (09.2007,fschiff)
11! **************************************************************************************************
13
14 USE kinds, ONLY: dp
16 USE ps_wavelet_fft3d, ONLY: ctrig,&
18 fftstp
19#include "../base/base_uses.f90"
20
21 IMPLICIT NONE
22
23 PRIVATE
24
25 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ps_wavelet_base'
26
28
29CONTAINS
30
31! **************************************************************************************************
32!> \brief ...
33!> \param n1 ...
34!> \param n2 ...
35!> \param n3 ...
36!> \param nd1 ...
37!> \param nd2 ...
38!> \param nd3 ...
39!> \param md1 ...
40!> \param md2 ...
41!> \param md3 ...
42!> \param nproc ...
43!> \param iproc ...
44!> \param zf ...
45!> \param scal ...
46!> \param hx ...
47!> \param hy ...
48!> \param hz ...
49!> \param mpi_group ...
50! **************************************************************************************************
51 SUBROUTINE p_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, zf &
52 , scal, hx, hy, hz, mpi_group)
53 INTEGER, INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
54 md3, nproc, iproc
55 REAL(kind=dp), DIMENSION(md1, md3, md2/nproc), &
56 INTENT(inout) :: zf
57 REAL(kind=dp), INTENT(in) :: scal, hx, hy, hz
58
59 CLASS(mp_comm_type), INTENT(in) :: mpi_group
60
61 INTEGER, PARAMETER :: ncache_optimal = 8*1024
62
63 INTEGER :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
64 j2stb, j2stf, j3, jp2stb, jp2stf, lot, &
65 lzt, ma, mb, ncache, nfft
66 INTEGER, ALLOCATABLE, DIMENSION(:) :: after1, after2, after3, before1, &
67 before2, before3, now1, now2, now3
68 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: btrig1, btrig2, btrig3, ftrig1, ftrig2, &
69 ftrig3
70 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: zt, zw
71 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: zmpi2
72 REAL(kind=dp), ALLOCATABLE, &
73 DIMENSION(:, :, :, :, :) :: zmpi1
74
75 IF (nd1 .LT. n1/2 + 1) cpabort("Parallel convolution:ERROR:nd1")
76 IF (nd2 .LT. n2/2 + 1) cpabort("Parallel convolution:ERROR:nd2")
77 IF (nd3 .LT. n3/2 + 1) cpabort("Parallel convolution:ERROR:nd3")
78 IF (md1 .LT. n1) cpabort("Parallel convolution:ERROR:md1")
79 IF (md2 .LT. n2) cpabort("Parallel convolution:ERROR:md2")
80 IF (md3 .LT. n3) cpabort("Parallel convolution:ERROR:md3")
81 IF (mod(nd3, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:nd3")
82 IF (mod(md2, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:md2")
83
84 !defining work arrays dimensions
85 ncache = ncache_optimal
86 IF (ncache <= max(n1, n2, n3)*4) ncache = max(n1, n2, n3)*4
87
88 lzt = n2
89 IF (mod(n2, 2) .EQ. 0) lzt = lzt + 1
90 IF (mod(n2, 4) .EQ. 0) lzt = lzt + 1 !maybe this is useless
91
92 !Allocations
93 ALLOCATE (btrig1(2, ctrig_length))
94 ALLOCATE (ftrig1(2, ctrig_length))
95 ALLOCATE (after1(7))
96 ALLOCATE (now1(7))
97 ALLOCATE (before1(7))
98 ALLOCATE (btrig2(2, ctrig_length))
99 ALLOCATE (ftrig2(2, ctrig_length))
100 ALLOCATE (after2(7))
101 ALLOCATE (now2(7))
102 ALLOCATE (before2(7))
103 ALLOCATE (btrig3(2, ctrig_length))
104 ALLOCATE (ftrig3(2, ctrig_length))
105 ALLOCATE (after3(7))
106 ALLOCATE (now3(7))
107 ALLOCATE (before3(7))
108 ALLOCATE (zw(2, ncache/4, 2))
109 ALLOCATE (zt(2, lzt, n1))
110 ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
111 IF (nproc .GT. 1) ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
112
113 !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
114 CALL ctrig(n3, btrig3, after3, before3, now3, 1, ic3)
115 CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
116 CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
117 DO j = 1, n1
118 ftrig1(1, j) = btrig1(1, j)
119 ftrig1(2, j) = -btrig1(2, j)
120 END DO
121 DO j = 1, n2
122 ftrig2(1, j) = btrig2(1, j)
123 ftrig2(2, j) = -btrig2(2, j)
124 END DO
125 DO j = 1, n3
126 ftrig3(1, j) = btrig3(1, j)
127 ftrig3(2, j) = -btrig3(2, j)
128 END DO
129
130 ! transform along z axis
131 lot = ncache/(4*n3)
132 IF (lot .LT. 1) THEN
133 WRITE (*, *) &
134 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
135 'least one 1-d FFT of this size even though this will'// &
136 'reduce the performance for shorter transform lengths'
137 cpabort("")
138 END IF
139 DO j2 = 1, md2/nproc
140 !this condition ensures that we manage only the interesting part for the FFT
141 IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
142 DO i1 = 1, n1, lot
143 ma = i1
144 mb = min(i1 + (lot - 1), n1)
145 nfft = mb - ma + 1
146 !inserting real data into complex array of half length
147 CALL p_fill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
148
149 !performing FFT
150 !input: I1,I3,J2,(Jp2)
151 inzee = 1
152 DO i = 1, ic3
153 CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
154 btrig3, after3(i), now3(i), before3(i), 1)
155 inzee = 3 - inzee
156 END DO
157
158 !output: I1,i3,J2,(Jp2)
159 !exchanging components
160 !input: I1,i3,J2,(Jp2)
161 CALL scramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2)
162 !output: I1,J2,i3,(Jp2)
163 END DO
164 END IF
165 END DO
166
167 !Interprocessor data transposition
168 !input: I1,J2,j3,jp3,(Jp2)
169 IF (nproc .GT. 1) THEN
170 !communication scheduling
171 CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
172 END IF
173 !output: I1,J2,j3,Jp2,(jp3)
174
175 !now each process perform complete convolution of its planes
176 DO j3 = 1, nd3/nproc
177 !this condition ensures that we manage only the interesting part for the FFT
178 IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
179 jp2stb = 1
180 j2stb = 1
181 jp2stf = 1
182 j2stf = 1
183
184 ! transform along x axis
185 lot = ncache/(4*n1)
186 IF (lot .LT. 1) THEN
187 WRITE (*, *) &
188 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
189 'least one 1-d FFT of this size even though this will'// &
190 'reduce the performance for shorter transform lengths'
191 cpabort("")
192 END IF
193
194 DO j = 1, n2, lot
195 ma = j
196 mb = min(j + (lot - 1), n2)
197 nfft = mb - ma + 1
198
199 !reverse index ordering, leaving the planes to be transformed at the end
200 !input: I1,J2,j3,Jp2,(jp3)
201 IF (nproc .EQ. 1) THEN
202 CALL p_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
203 ELSE
204 CALL p_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
205 END IF
206 !output: J2,Jp2,I1,j3,(jp3)
207
208 !performing FFT
209 !input: I2,I1,j3,(jp3)
210 inzee = 1
211 DO i = 1, ic1 - 1
212 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
213 btrig1, after1(i), now1(i), before1(i), 1)
214 inzee = 3 - inzee
215 END DO
216
217 !storing the last step into zt array
218 i = ic1
219 CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
220 btrig1, after1(i), now1(i), before1(i), 1)
221 !output: I2,i1,j3,(jp3)
222 END DO
223
224 !transform along y axis
225 lot = ncache/(4*n2)
226 IF (lot .LT. 1) THEN
227 WRITE (*, *) &
228 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
229 'least one 1-d FFT of this size even though this will'// &
230 'reduce the performance for shorter transform lengths'
231 cpabort("")
232 END IF
233
234 DO j = 1, n1, lot
235 ma = j
236 mb = min(j + (lot - 1), n1)
237 nfft = mb - ma + 1
238
239 !reverse ordering
240 !input: I2,i1,j3,(jp3)
241 CALL p_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
242 !output: i1,I2,j3,(jp3)
243
244 !performing FFT
245 !input: i1,I2,j3,(jp3)
246 inzee = 1
247 DO i = 1, ic2
248 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
249 btrig2, after2(i), now2(i), before2(i), 1)
250 inzee = 3 - inzee
251 END DO
252 !output: i1,i2,j3,(jp3)
253
254 !Multiply with kernel in fourier space
255 i3 = iproc*(nd3/nproc) + j3
256 CALL p_multkernel(n1, n2, n3, lot, nfft, j, i3, zw(1, 1, inzee), hx, hy, hz)
257
258 !TRANSFORM BACK IN REAL SPACE
259
260 !transform along y axis
261 !input: i1,i2,j3,(jp3)
262 DO i = 1, ic2
263 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
264 ftrig2, after2(i), now2(i), before2(i), -1)
265 inzee = 3 - inzee
266 END DO
267
268 !reverse ordering
269 !input: i1,I2,j3,(jp3)
270 CALL p_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
271 !output: I2,i1,j3,(jp3)
272 END DO
273
274 !transform along x axis
275 !input: I2,i1,j3,(jp3)
276 lot = ncache/(4*n1)
277 DO j = 1, n2, lot
278 ma = j
279 mb = min(j + (lot - 1), n2)
280 nfft = mb - ma + 1
281
282 !performing FFT
283 i = 1
284 CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
285 ftrig1, after1(i), now1(i), before1(i), -1)
286
287 inzee = 1
288 DO i = 2, ic1
289 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
290 ftrig1, after1(i), now1(i), before1(i), -1)
291 inzee = 3 - inzee
292 END DO
293 !output: I2,I1,j3,(jp3)
294
295 !reverse ordering
296 !input: J2,Jp2,I1,j3,(jp3)
297 IF (nproc .EQ. 1) THEN
298 CALL p_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
299 ELSE
300 CALL p_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
301 END IF
302 ! output: I1,J2,j3,Jp2,(jp3)
303 END DO
304 END IF
305 END DO
306
307 !Interprocessor data transposition
308 !input: I1,J2,j3,Jp2,(jp3)
309 IF (nproc .GT. 1) THEN
310 !communication scheduling
311 CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
312 END IF
313 !output: I1,J2,j3,jp3,(Jp2)
314 !transform along z axis
315 !input: I1,J2,i3,(Jp2)
316 lot = ncache/(4*n3)
317 DO j2 = 1, md2/nproc
318 !this condition ensures that we manage only the interesting part for the FFT
319 IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
320 DO i1 = 1, n1, lot
321 ma = i1
322 mb = min(i1 + (lot - 1), n1)
323 nfft = mb - ma + 1
324
325 !reverse ordering
326 !input: I1,J2,i3,(Jp2)
327 CALL unscramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1))
328 !output: I1,i3,J2,(Jp2)
329
330 !performing FFT
331 !input: I1,i3,J2,(Jp2)
332 inzee = 1
333 DO i = 1, ic3
334 CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
335 ftrig3, after3(i), now3(i), before3(i), -1)
336 inzee = 3 - inzee
337 END DO
338 !output: I1,I3,J2,(Jp2)
339
340 !rebuild the output array
341 CALL p_unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2), scal)
342
343 END DO
344 END IF
345 END DO
346
347 !De-allocations
348 DEALLOCATE (btrig1)
349 DEALLOCATE (ftrig1)
350 DEALLOCATE (after1)
351 DEALLOCATE (now1)
352 DEALLOCATE (before1)
353 DEALLOCATE (btrig2)
354 DEALLOCATE (ftrig2)
355 DEALLOCATE (after2)
356 DEALLOCATE (now2)
357 DEALLOCATE (before2)
358 DEALLOCATE (btrig3)
359 DEALLOCATE (ftrig3)
360 DEALLOCATE (after3)
361 DEALLOCATE (now3)
362 DEALLOCATE (before3)
363 DEALLOCATE (zmpi2)
364 DEALLOCATE (zw)
365 DEALLOCATE (zt)
366 IF (nproc .GT. 1) DEALLOCATE (zmpi1)
367 ! call timing(iproc,'PSolv_comput ','OF')
368 END SUBROUTINE p_poissonsolver
369
370! **************************************************************************************************
371!> \brief ...
372!> \param j3 ...
373!> \param nfft ...
374!> \param Jp2stb ...
375!> \param J2stb ...
376!> \param lot ...
377!> \param n1 ...
378!> \param md2 ...
379!> \param nd3 ...
380!> \param nproc ...
381!> \param zmpi1 ...
382!> \param zw ...
383! **************************************************************************************************
384 SUBROUTINE p_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
385 INTEGER, INTENT(in) :: j3, nfft
386 INTEGER, INTENT(inout) :: jp2stb, j2stb
387 INTEGER, INTENT(in) :: lot, n1, md2, nd3, nproc
388 REAL(kind=dp), &
389 DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
390 INTENT(in) :: zmpi1
391 REAL(kind=dp), DIMENSION(2, lot, n1), &
392 INTENT(inout) :: zw
393
394 INTEGER :: i1, j2, jp2, mfft
395
396 mfft = 0
397 DO jp2 = jp2stb, nproc
398 DO j2 = j2stb, md2/nproc
399 mfft = mfft + 1
400 IF (mfft .GT. nfft) THEN
401 jp2stb = jp2
402 j2stb = j2
403 RETURN
404 END IF
405 DO i1 = 1, n1
406 zw(1, mfft, i1) = zmpi1(1, i1, j2, j3, jp2)
407 zw(2, mfft, i1) = zmpi1(2, i1, j2, j3, jp2)
408 END DO
409 END DO
410 j2stb = 1
411 END DO
412 END SUBROUTINE p_mpiswitch_upcorn
413
414! **************************************************************************************************
415!> \brief ...
416!> \param nfft ...
417!> \param n2 ...
418!> \param lot ...
419!> \param n1 ...
420!> \param lzt ...
421!> \param zt ...
422!> \param zw ...
423! **************************************************************************************************
424 SUBROUTINE p_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
425 INTEGER, INTENT(in) :: nfft, n2, lot, n1, lzt
426 REAL(kind=dp), DIMENSION(2, lzt, n1), INTENT(in) :: zt
427 REAL(kind=dp), DIMENSION(2, lot, n2), &
428 INTENT(inout) :: zw
429
430 INTEGER :: i, j
431
432 DO j = 1, nfft
433 DO i = 1, n2
434 zw(1, j, i) = zt(1, i, j)
435 zw(2, j, i) = zt(2, i, j)
436 END DO
437 END DO
438
439 END SUBROUTINE p_switch_upcorn
440
441! **************************************************************************************************
442!> \brief ...
443!> \param nfft ...
444!> \param n2 ...
445!> \param lot ...
446!> \param n1 ...
447!> \param lzt ...
448!> \param zw ...
449!> \param zt ...
450! **************************************************************************************************
451 SUBROUTINE p_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
452 INTEGER, INTENT(in) :: nfft, n2, lot, n1, lzt
453 REAL(kind=dp), DIMENSION(2, lot, n2), INTENT(in) :: zw
454 REAL(kind=dp), DIMENSION(2, lzt, n1), &
455 INTENT(inout) :: zt
456
457 INTEGER :: i, j
458
459 DO j = 1, nfft
460 DO i = 1, n2
461 zt(1, i, j) = zw(1, j, i)
462 zt(2, i, j) = zw(2, j, i)
463 END DO
464 END DO
465
466 END SUBROUTINE p_unswitch_downcorn
467
468! **************************************************************************************************
469!> \brief ...
470!> \param j3 ...
471!> \param nfft ...
472!> \param Jp2stf ...
473!> \param J2stf ...
474!> \param lot ...
475!> \param n1 ...
476!> \param md2 ...
477!> \param nd3 ...
478!> \param nproc ...
479!> \param zw ...
480!> \param zmpi1 ...
481! **************************************************************************************************
482 SUBROUTINE p_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
483 INTEGER, INTENT(in) :: j3, nfft
484 INTEGER, INTENT(inout) :: jp2stf, j2stf
485 INTEGER, INTENT(in) :: lot, n1, md2, nd3, nproc
486 REAL(kind=dp), DIMENSION(2, lot, n1), INTENT(in) :: zw
487 REAL(kind=dp), &
488 DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
489 INTENT(inout) :: zmpi1
490
491 INTEGER :: i1, j2, jp2, mfft
492
493 mfft = 0
494 DO jp2 = jp2stf, nproc
495 DO j2 = j2stf, md2/nproc
496 mfft = mfft + 1
497 IF (mfft .GT. nfft) THEN
498 jp2stf = jp2
499 j2stf = j2
500 RETURN
501 END IF
502 DO i1 = 1, n1
503 zmpi1(1, i1, j2, j3, jp2) = zw(1, mfft, i1)
504 zmpi1(2, i1, j2, j3, jp2) = zw(2, mfft, i1)
505 END DO
506 END DO
507 j2stf = 1
508 END DO
509 END SUBROUTINE p_unmpiswitch_downcorn
510
511! **************************************************************************************************
512!> \brief (Based on suitable modifications of S.Goedecker routines)
513!> Restore data into output array
514!> \param md1 Dimensions of the undistributed part of the real grid
515!> \param md3 Dimensions of the undistributed part of the real grid
516!> \param lot ...
517!> \param nfft number of planes
518!> \param n3 (twice the) dimension of the last FFTtransform
519!> \param zw FFT work array
520!> \param zf Original distributed density as well as
521!> Distributed solution of the poisson equation (inout)
522!> \param scal Needed to achieve unitarity and correct dimensions
523!> \date February 2006
524!> \author S. Goedecker, L. Genovese
525!> \note Assuming that high frequencies are in the corners
526!> and that n3 is multiple of 4
527!>
528!> RESTRICTIONS on USAGE
529!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
530!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
531!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
532!> This file is distributed under the terms of the
533!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
534! **************************************************************************************************
535 SUBROUTINE p_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
536 INTEGER, INTENT(in) :: md1, md3, lot, nfft, n3
537 REAL(kind=dp), DIMENSION(2, lot, n3), INTENT(in) :: zw
538 REAL(kind=dp), DIMENSION(md1, md3), INTENT(inout) :: zf
539 REAL(kind=dp), INTENT(in) :: scal
540
541 INTEGER :: i1, i3
542 REAL(kind=dp) :: pot1
543
544 DO i3 = 1, n3
545 DO i1 = 1, nfft
546 pot1 = scal*zw(1, i1, i3)
547 zf(i1, i3) = pot1
548 END DO
549 END DO
550
551 END SUBROUTINE p_unfill_downcorn
552
553! **************************************************************************************************
554!> \brief ...
555!> \param md1 ...
556!> \param md3 ...
557!> \param lot ...
558!> \param nfft ...
559!> \param n3 ...
560!> \param zf ...
561!> \param zw ...
562! **************************************************************************************************
563 SUBROUTINE p_fill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
564 INTEGER, INTENT(in) :: md1, md3, lot, nfft, n3
565 REAL(kind=dp), DIMENSION(md1, md3), INTENT(in) :: zf
566 REAL(kind=dp), DIMENSION(2, lot, n3), &
567 INTENT(inout) :: zw
568
569 INTEGER :: i1, i3
570
571 DO i3 = 1, n3
572 DO i1 = 1, nfft
573 zw(1, i1, i3) = zf(i1, i3)
574 zw(2, i1, i3) = 0._dp
575 END DO
576 END DO
577
578 END SUBROUTINE p_fill_upcorn
579
580! **************************************************************************************************
581!> \brief (Based on suitable modifications of S.Goedecker routines)
582!> Assign the correct planes to the work array zmpi2
583!> in order to prepare for interprocessor data transposition.
584!> \param i1 Starting points of the plane and number of remaining lines
585!> \param j2 Starting points of the plane and number of remaining lines
586!> \param lot Starting points of the plane and number of remaining lines
587!> \param nfft Starting points of the plane and number of remaining lines
588!> \param n1 logical dimension of the FFT transform, reference for work arrays
589!> \param n3 logical dimension of the FFT transform, reference for work arrays
590!> \param md2 Dimensions of real grid
591!> \param nproc ...
592!> \param nd3 Dimensions of the kernel
593!> \param zw Work array (input)
594!> \param zmpi2 Work array for multiprocessor manipulation (output)
595!> \date February 2006
596!> \author S. Goedecker, L. Genovese
597!> \note
598!> RESTRICTIONS on USAGE
599!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
600!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
601!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
602!> This file is distributed under the terms of the
603!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
604! **************************************************************************************************
605 SUBROUTINE scramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2)
606 INTEGER, INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
607 nd3
608 REAL(kind=dp), DIMENSION(2, lot, n3), INTENT(in) :: zw
609 REAL(kind=dp), DIMENSION(2, n1, md2/nproc, nd3), &
610 INTENT(inout) :: zmpi2
611
612 INTEGER :: i, i3
613
614 DO i3 = 1, n3/2 + 1
615 DO i = 0, nfft - 1
616 zmpi2(1, i1 + i, j2, i3) = zw(1, i + 1, i3)
617 zmpi2(2, i1 + i, j2, i3) = zw(2, i + 1, i3)
618 END DO
619 END DO
620
621 END SUBROUTINE scramble_p
622
623! **************************************************************************************************
624!> \brief (Based on suitable modifications of S.Goedecker routines)
625!> Insert the correct planes of the work array zmpi2
626!> in order to prepare for backward FFT transform
627!> \param i1 Starting points of the plane and number of remaining lines
628!> \param j2 Starting points of the plane and number of remaining lines
629!> \param lot Starting points of the plane and number of remaining lines
630!> \param nfft Starting points of the plane and number of remaining lines
631!> \param n1 logical dimension of the FFT transform, reference for work arrays
632!> \param n3 logical dimension of the FFT transform, reference for work arrays
633!> \param md2 Dimensions of real grid
634!> \param nproc ...
635!> \param nd3 Dimensions of the kernel
636!> \param zmpi2 Work array for multiprocessor manipulation (output)
637!> \param zw Work array (input)
638!> \date February 2006
639!> \author S. Goedecker, L. Genovese
640!> \note
641!> RESTRICTIONS on USAGE
642!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
643!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
644!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
645!> This file is distributed under the terms of the
646!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
647! **************************************************************************************************
648 SUBROUTINE unscramble_p(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw)
649 INTEGER, INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
650 nd3
651 REAL(kind=dp), DIMENSION(2, n1, md2/nproc, nd3), &
652 INTENT(in) :: zmpi2
653 REAL(kind=dp), DIMENSION(2, lot, n3), &
654 INTENT(inout) :: zw
655
656 INTEGER :: i, i3, j3
657
658 i3 = 1
659 DO i = 0, nfft - 1
660 zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
661 zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
662 END DO
663
664 DO i3 = 2, n3/2 + 1
665 j3 = n3 + 2 - i3
666 DO i = 0, nfft - 1
667 zw(1, i + 1, j3) = zmpi2(1, i1 + i, j2, i3)
668 zw(2, i + 1, j3) = -zmpi2(2, i1 + i, j2, i3)
669 zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
670 zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
671 END DO
672 END DO
673
674 END SUBROUTINE unscramble_p
675
676! **************************************************************************************************
677!> \brief (Based on suitable modifications of S.Goedecker routines)
678!> Multiply with the kernel taking into account its symmetry
679!> Conceived to be used into convolution loops
680!> \param n1 ...
681!> \param n2 ...
682!> \param n3 ...
683!> \param lot ...
684!> \param nfft ...
685!> \param jS ...
686!> \param i3 ...
687!> \param zw Work array (input/output)
688!> n1,n2: logical dimension of the FFT transform, reference for zw
689!> nd1,nd2: Dimensions of POT
690!> jS,j3,nfft: starting point of the plane and number of remaining lines
691!>
692!> RESTRICTIONS on USAGE
693!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
694!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
695!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
696!> This file is distributed under the terms of the
697!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
698!> \param hx ...
699!> \param hy ...
700!> \param hz ...
701!> \date February 2006
702!> \author S. Goedecker, L. Genovese
703! **************************************************************************************************
704 SUBROUTINE p_multkernel(n1, n2, n3, lot, nfft, jS, i3, zw, hx, hy, hz)
705 INTEGER, INTENT(in) :: n1, n2, n3, lot, nfft, js, i3
706 REAL(kind=dp), DIMENSION(2, lot, n2), &
707 INTENT(inout) :: zw
708 REAL(kind=dp), INTENT(in) :: hx, hy, hz
709
710 INTEGER :: i1, i2, j1, j2, j3
711 REAL(kind=dp) :: fourpi2, ker, mu3, p1, p2, pi
712
713 pi = 4._dp*atan(1._dp)
714 fourpi2 = 4._dp*pi**2
715 j3 = i3 !n3/2+1-abs(n3/2+2-i3)
716 mu3 = real(j3 - 1, kind=dp)/real(n3, kind=dp)
717 mu3 = (mu3/hy)**2 !beware of the exchanged dimension
718 !Body
719 !generic case
720 DO i2 = 1, n2
721 DO i1 = 1, nfft
722 j1 = i1 + js - 1
723 j1 = j1 - (j1/(n1/2 + 2))*n1 !n1/2+1-abs(n1/2+2-jS-i1)
724 j2 = i2 - (i2/(n2/2 + 2))*n2 !n2/2+1-abs(n2/2+1-i2)
725 p1 = real(j1 - 1, kind=dp)/real(n1, kind=dp)
726 p2 = real(j2 - 1, kind=dp)/real(n2, kind=dp)
727 ker = -fourpi2*((p1/hx)**2 + (p2/hz)**2 + mu3) !beware of the exchanged dimension
728 IF (ker /= 0._dp) ker = 1._dp/ker
729 zw(1, i1, i2) = zw(1, i1, i2)*ker
730 zw(2, i1, i2) = zw(2, i1, i2)*ker
731 END DO
732 END DO
733
734 END SUBROUTINE p_multkernel
735
736! **************************************************************************************************
737!> \brief (Based on suitable modifications of S.Goedecker routines)
738!> Multiply with the kernel taking into account its symmetry
739!> Conceived to be used into convolution loops
740!> \param nd1 ...
741!> \param nd2 ...
742!> \param n1 ...
743!> \param n2 ...
744!> \param lot ...
745!> \param nfft ...
746!> \param jS ...
747!> \param pot Kernel, symmetric and real, half the length
748!> \param zw Work array (input/output)
749!> n1,n2: logical dimension of the FFT transform, reference for zw
750!> nd1,nd2: Dimensions of POT
751!> jS, nfft: starting point of the plane and number of remaining lines
752!>
753!> RESTRICTIONS on USAGE
754!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
755!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
756!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
757!> This file is distributed under the terms of the
758!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
759!> \date February 2006
760!> \author S. Goedecker, L. Genovese
761! **************************************************************************************************
762 SUBROUTINE multkernel(nd1, nd2, n1, n2, lot, nfft, jS, pot, zw)
763 INTEGER, INTENT(in) :: nd1, nd2, n1, n2, lot, nfft, js
764 REAL(kind=dp), DIMENSION(nd1, nd2), INTENT(in) :: pot
765 REAL(kind=dp), DIMENSION(2, lot, n2), &
766 INTENT(inout) :: zw
767
768 INTEGER :: i2, j, j1, j2
769
770 DO j = 1, nfft
771 j1 = j + js - 1
772 j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
773 zw(1, j, 1) = zw(1, j, 1)*pot(j1, 1)
774 zw(2, j, 1) = zw(2, j, 1)*pot(j1, 1)
775 END DO
776
777 !generic case
778 DO i2 = 2, n2/2
779 DO j = 1, nfft
780 j1 = j + js - 1
781 j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
782 j2 = n2 + 2 - i2
783 zw(1, j, i2) = zw(1, j, i2)*pot(j1, i2)
784 zw(2, j, i2) = zw(2, j, i2)*pot(j1, i2)
785 zw(1, j, j2) = zw(1, j, j2)*pot(j1, i2)
786 zw(2, j, j2) = zw(2, j, j2)*pot(j1, i2)
787 END DO
788 END DO
789
790 !case i2=n2/2+1
791 DO j = 1, nfft
792 j1 = j + js - 1
793 j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
794 j2 = n2/2 + 1
795 zw(1, j, j2) = zw(1, j, j2)*pot(j1, j2)
796 zw(2, j, j2) = zw(2, j, j2)*pot(j1, j2)
797 END DO
798
799 END SUBROUTINE multkernel
800
801! **************************************************************************************************
802!> \brief !HERE POT MUST BE THE KERNEL (BEWARE THE HALF DIMENSION)
803!> ****h* BigDFT/S_PoissonSolver
804!> (Based on suitable modifications of S.Goedecker routines)
805!> Applies the local FFT space Kernel to the density in Real space.
806!> Does NOT calculate the LDA exchange-correlation terms
807!> \param n1 logical dimension of the transform.
808!> \param n2 logical dimension of the transform.
809!> \param n3 logical dimension of the transform.
810!> \param nd1 Dimension of POT
811!> \param nd2 Dimension of POT
812!> \param nd3 Dimension of POT
813!> \param md1 Dimension of ZF
814!> \param md2 Dimension of ZF
815!> \param md3 Dimension of ZF
816!> \param nproc number of processors used as returned by MPI_COMM_SIZE
817!> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
818!> \param pot Kernel, only the distributed part (REAL)
819!> POT(i1,i2,i3)
820!> i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
821!> \param zf Density (input/output)
822!> ZF(i1,i3,i2)
823!> i1=1,md1 , i2=1,md2/nproc , i3=1,md3
824!> \param scal factor of renormalization of the FFT in order to acheve unitarity
825!> and the correct dimension
826!> \param mpi_group ...
827!> \date October 2006
828!> \author S. Goedecker, L. Genovese
829!> \note As transform lengths most products of the prime factors 2,3,5 are allowed.
830!> The detailed table with allowed transform lengths can
831!> be found in subroutine CTRIG
832!> \note
833!> RESTRICTIONS on USAGE
834!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
835!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
836!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
837!> This file is distributed under the terms of the
838!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
839! **************************************************************************************************
840 SUBROUTINE s_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
841 scal, mpi_group)
842 INTEGER, INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
843 md3, nproc, iproc
844 REAL(kind=dp), DIMENSION(nd1, nd2, nd3/nproc), &
845 INTENT(in) :: pot
846 REAL(kind=dp), DIMENSION(md1, md3, md2/nproc), &
847 INTENT(inout) :: zf
848 REAL(kind=dp), INTENT(in) :: scal
849
850 CLASS(mp_comm_type), INTENT(in) :: mpi_group
851
852 CHARACTER(len=*), PARAMETER :: routinen = 'S_PoissonSolver'
853 INTEGER, PARAMETER :: ncache_optimal = 8*1024
854
855 INTEGER :: handle, i, i1, i3, ic1, ic2, ic3, inzee, &
856 j, j2, j2stb, j2stf, j3, jp2stb, &
857 jp2stf, lot, lzt, ma, mb, ncache, nfft
858 INTEGER, ALLOCATABLE, DIMENSION(:) :: after1, after2, after3, before1, &
859 before2, before3, now1, now2, now3
860 REAL(kind=dp) :: twopion
861 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: btrig1, btrig2, btrig3, cosinarr, &
862 ftrig1, ftrig2, ftrig3
863 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: zt, zw
864 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: zmpi2
865 REAL(kind=dp), ALLOCATABLE, &
866 DIMENSION(:, :, :, :, :) :: zmpi1
867
868 CALL timeset(routinen, handle)
869 ! check input
870 IF (mod(n3, 2) .NE. 0) cpabort("Parallel convolution:ERROR:n3")
871 IF (nd1 .LT. n1/2 + 1) cpabort("Parallel convolution:ERROR:nd1")
872 IF (nd2 .LT. n2/2 + 1) cpabort("Parallel convolution:ERROR:nd2")
873 IF (nd3 .LT. n3/2 + 1) cpabort("Parallel convolution:ERROR:nd3")
874 IF (md1 .LT. n1) cpabort("Parallel convolution:ERROR:md1")
875 IF (md2 .LT. n2) cpabort("Parallel convolution:ERROR:md2")
876 IF (md3 .LT. n3/2) cpabort("Parallel convolution:ERROR:md3")
877 IF (mod(nd3, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:nd3")
878 IF (mod(md2, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:md2")
879
880 !defining work arrays dimensions
881 ncache = ncache_optimal
882 IF (ncache <= max(n1, n2, n3/2)*4) ncache = max(n1, n2, n3/2)*4
883
884 ! if (timing_flag == 1 .and. iproc ==0) print *,'parallel ncache=',ncache
885
886 lzt = n2
887 IF (mod(n2, 2) .EQ. 0) lzt = lzt + 1
888 IF (mod(n2, 4) .EQ. 0) lzt = lzt + 1 !maybe this is useless
889
890 !Allocations
891 ALLOCATE (btrig1(2, ctrig_length))
892 ALLOCATE (ftrig1(2, ctrig_length))
893 ALLOCATE (after1(7))
894 ALLOCATE (now1(7))
895 ALLOCATE (before1(7))
896 ALLOCATE (btrig2(2, ctrig_length))
897 ALLOCATE (ftrig2(2, ctrig_length))
898 ALLOCATE (after2(7))
899 ALLOCATE (now2(7))
900 ALLOCATE (before2(7))
901 ALLOCATE (btrig3(2, ctrig_length))
902 ALLOCATE (ftrig3(2, ctrig_length))
903 ALLOCATE (after3(7))
904 ALLOCATE (now3(7))
905 ALLOCATE (before3(7))
906 ALLOCATE (zw(2, ncache/4, 2))
907 ALLOCATE (zt(2, lzt, n1))
908 ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
909 ALLOCATE (cosinarr(2, n3/2))
910 IF (nproc .GT. 1) THEN
911 ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
912 zmpi1 = 0.0_dp
913 END IF
914 zmpi2 = 0.0_dp
915 zw = 0.0_dp
916 zt = 0.0_dp
917
918 !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
919 CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
920 CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
921 CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
922 DO j = 1, n1
923 ftrig1(1, j) = btrig1(1, j)
924 ftrig1(2, j) = -btrig1(2, j)
925 END DO
926 DO j = 1, n2
927 ftrig2(1, j) = btrig2(1, j)
928 ftrig2(2, j) = -btrig2(2, j)
929 END DO
930 DO j = 1, n3/2
931 ftrig3(1, j) = btrig3(1, j)
932 ftrig3(2, j) = -btrig3(2, j)
933 END DO
934
935 !Calculating array of phases for HalFFT decoding
936 twopion = 8._dp*atan(1._dp)/real(n3, kind=dp)
937 DO i3 = 1, n3/2
938 cosinarr(1, i3) = cos(twopion*(i3 - 1))
939 cosinarr(2, i3) = -sin(twopion*(i3 - 1))
940 END DO
941
942 !initializing integral
943 !ehartree=0._dp
944
945 ! transform along z axis
946 lot = ncache/(2*n3)
947 IF (lot .LT. 1) THEN
948 WRITE (*, *) &
949 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
950 'least one 1-d FFT of this size even though this will'// &
951 'reduce the performance for shorter transform lengths', n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc
952 cpabort("")
953 END IF
954
955 DO j2 = 1, md2/nproc
956 !this condition ensures that we manage only the interesting part for the FFT
957 IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
958 DO i1 = 1, n1, lot
959 ma = i1
960 mb = min(i1 + (lot - 1), n1)
961 nfft = mb - ma + 1
962
963 !inserting real data into complex array of half length
964 CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
965
966 !performing FFT
967 !input: I1,I3,J2,(Jp2)
968 inzee = 1
969 DO i = 1, ic3
970 CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
971 btrig3, after3(i), now3(i), before3(i), 1)
972 inzee = 3 - inzee
973 END DO
974 !output: I1,i3,J2,(Jp2)
975 !unpacking FFT in order to restore correct result,
976 !while exchanging components
977 !input: I1,i3,J2,(Jp2)
978 CALL scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
979 !output: I1,J2,i3,(Jp2)
980 END DO
981 END IF
982 END DO
983 !Interprocessor data transposition
984 !input: I1,J2,j3,jp3,(Jp2)
985 IF (nproc .GT. 1) THEN
986 CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
987 END IF
988 !output: I1,J2,j3,Jp2,(jp3)
989
990 !now each process perform complete convolution of its planes
991 DO j3 = 1, nd3/nproc
992 !this condition ensures that we manage only the interesting part for the FFT
993 IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
994 jp2stb = 1
995 j2stb = 1
996 jp2stf = 1
997 j2stf = 1
998
999 ! transform along x axis
1000 lot = ncache/(4*n1)
1001 IF (lot .LT. 1) THEN
1002 WRITE (*, *) &
1003 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
1004 'least one 1-d FFT of this size even though this will'// &
1005 'reduce the performance for shorter transform lengths'
1006 cpabort("")
1007 END IF
1008
1009 DO j = 1, n2, lot
1010 ma = j
1011 mb = min(j + (lot - 1), n2)
1012 nfft = mb - ma + 1
1013
1014 !reverse index ordering, leaving the planes to be transformed at the end
1015 !input: I1,J2,j3,Jp2,(jp3)
1016 IF (nproc .EQ. 1) THEN
1017 CALL s_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
1018 ELSE
1019 CALL s_mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
1020 END IF
1021 !output: J2,Jp2,I1,j3,(jp3)
1022
1023 !performing FFT
1024 !input: I2,I1,j3,(jp3)
1025 inzee = 1
1026 DO i = 1, ic1 - 1
1027 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1028 btrig1, after1(i), now1(i), before1(i), 1)
1029 inzee = 3 - inzee
1030 END DO
1031
1032 !storing the last step into zt array
1033 i = ic1
1034 CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
1035 btrig1, after1(i), now1(i), before1(i), 1)
1036 !output: I2,i1,j3,(jp3)
1037 END DO
1038
1039 !transform along y axis
1040 lot = ncache/(4*n2)
1041 IF (lot .LT. 1) THEN
1042 WRITE (*, *) &
1043 'convolxc_off:ncache has to be enlarged to be able to hold at'// &
1044 'least one 1-d FFT of this size even though this will'// &
1045 'reduce the performance for shorter transform lengths'
1046 cpabort("")
1047 END IF
1048
1049 DO j = 1, n1, lot
1050 ma = j
1051 mb = min(j + (lot - 1), n1)
1052 nfft = mb - ma + 1
1053
1054 !reverse ordering
1055 !input: I2,i1,j3,(jp3)
1056 CALL s_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
1057 !output: i1,I2,j3,(jp3)
1058
1059 !performing FFT
1060 !input: i1,I2,j3,(jp3)
1061 inzee = 1
1062 DO i = 1, ic2
1063 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1064 btrig2, after2(i), now2(i), before2(i), 1)
1065 inzee = 3 - inzee
1066 END DO
1067 !output: i1,i2,j3,(jp3)
1068
1069 !Multiply with kernel in fourier space
1070 CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
1071
1072 !TRANSFORM BACK IN REAL SPACE
1073
1074 !transform along y axis
1075 !input: i1,i2,j3,(jp3)
1076 DO i = 1, ic2
1077 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1078 ftrig2, after2(i), now2(i), before2(i), -1)
1079 inzee = 3 - inzee
1080 END DO
1081
1082 !reverse ordering
1083 !input: i1,I2,j3,(jp3)
1084 CALL s_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
1085 !output: I2,i1,j3,(jp3)
1086 END DO
1087
1088 !transform along x axis
1089 !input: I2,i1,j3,(jp3)
1090 lot = ncache/(4*n1)
1091 DO j = 1, n2, lot
1092 ma = j
1093 mb = min(j + (lot - 1), n2)
1094 nfft = mb - ma + 1
1095
1096 !performing FFT
1097 i = 1
1098 CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
1099 ftrig1, after1(i), now1(i), before1(i), -1)
1100
1101 inzee = 1
1102 DO i = 2, ic1
1103 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1104 ftrig1, after1(i), now1(i), before1(i), -1)
1105 inzee = 3 - inzee
1106 END DO
1107 !output: I2,I1,j3,(jp3)
1108
1109 !reverse ordering
1110 !input: J2,Jp2,I1,j3,(jp3)
1111 IF (nproc .EQ. 1) THEN
1112 CALL s_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
1113 ELSE
1114 CALL s_unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
1115 END IF
1116 ! output: I1,J2,j3,Jp2,(jp3)
1117 END DO
1118 END IF
1119 END DO
1120
1121 !Interprocessor data transposition
1122 !input: I1,J2,j3,Jp2,(jp3)
1123 IF (nproc .GT. 1) THEN
1124 !communication scheduling
1125 CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
1126 END IF
1127
1128 !output: I1,J2,j3,jp3,(Jp2)
1129
1130 !transform along z axis
1131 !input: I1,J2,i3,(Jp2)
1132 lot = ncache/(2*n3)
1133 DO j2 = 1, md2/nproc
1134 !this condition ensures that we manage only the interesting part for the FFT
1135 IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
1136 DO i1 = 1, n1, lot
1137 ma = i1
1138 mb = min(i1 + (lot - 1), n1)
1139 nfft = mb - ma + 1
1140
1141 !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
1142 !input: I1,J2,i3,(Jp2)
1143 CALL unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
1144 !output: I1,i3,J2,(Jp2)
1145
1146 !performing FFT
1147 !input: I1,i3,J2,(Jp2)
1148 inzee = 1
1149 DO i = 1, ic3
1150 CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1151 ftrig3, after3(i), now3(i), before3(i), -1)
1152 inzee = 3 - inzee
1153 END DO
1154 !output: I1,I3,J2,(Jp2)
1155
1156 !rebuild the output array
1157 CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
1158 , scal)
1159
1160 !integrate local pieces together
1161 !ehartree=ehartree+0.5_dp*ehartreetmp*hx*hy*hz
1162 END DO
1163 END IF
1164 END DO
1165
1166 !De-allocations
1167 DEALLOCATE (btrig1)
1168 DEALLOCATE (ftrig1)
1169 DEALLOCATE (after1)
1170 DEALLOCATE (now1)
1171 DEALLOCATE (before1)
1172 DEALLOCATE (btrig2)
1173 DEALLOCATE (ftrig2)
1174 DEALLOCATE (after2)
1175 DEALLOCATE (now2)
1176 DEALLOCATE (before2)
1177 DEALLOCATE (btrig3)
1178 DEALLOCATE (ftrig3)
1179 DEALLOCATE (after3)
1180 DEALLOCATE (now3)
1181 DEALLOCATE (before3)
1182 DEALLOCATE (zmpi2)
1183 DEALLOCATE (zw)
1184 DEALLOCATE (zt)
1185 DEALLOCATE (cosinarr)
1186 IF (nproc .GT. 1) DEALLOCATE (zmpi1)
1187
1188 ! call timing(iproc,'PSolv_comput ','OF')
1189 CALL timestop(handle)
1190 END SUBROUTINE s_poissonsolver
1191
1192! **************************************************************************************************
1193!> \brief ...
1194!> \param j3 ...
1195!> \param nfft ...
1196!> \param Jp2stb ...
1197!> \param J2stb ...
1198!> \param lot ...
1199!> \param n1 ...
1200!> \param md2 ...
1201!> \param nd3 ...
1202!> \param nproc ...
1203!> \param zmpi1 ...
1204!> \param zw ...
1205! **************************************************************************************************
1206 SUBROUTINE s_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
1207 INTEGER, INTENT(in) :: j3, nfft
1208 INTEGER, INTENT(inout) :: jp2stb, j2stb
1209 INTEGER, INTENT(in) :: lot, n1, md2, nd3, nproc
1210 REAL(kind=dp), &
1211 DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
1212 INTENT(in) :: zmpi1
1213 REAL(kind=dp), DIMENSION(2, lot, n1), &
1214 INTENT(inout) :: zw
1215
1216 INTEGER :: i1, j2, jp2, mfft
1217
1218 mfft = 0
1219 DO jp2 = jp2stb, nproc
1220 DO j2 = j2stb, md2/nproc
1221 mfft = mfft + 1
1222 IF (mfft .GT. nfft) THEN
1223 jp2stb = jp2
1224 j2stb = j2
1225 RETURN
1226 END IF
1227 DO i1 = 1, n1
1228 zw(1, mfft, i1) = zmpi1(1, i1, j2, j3, jp2)
1229 zw(2, mfft, i1) = zmpi1(2, i1, j2, j3, jp2)
1230 END DO
1231 END DO
1232 j2stb = 1
1233 END DO
1234 END SUBROUTINE s_mpiswitch_upcorn
1235
1236! **************************************************************************************************
1237!> \brief ...
1238!> \param nfft ...
1239!> \param n2 ...
1240!> \param lot ...
1241!> \param n1 ...
1242!> \param lzt ...
1243!> \param zt ...
1244!> \param zw ...
1245! **************************************************************************************************
1246 SUBROUTINE s_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
1247 INTEGER, INTENT(in) :: nfft, n2, lot, n1, lzt
1248 REAL(kind=dp), DIMENSION(2, lzt, n1), INTENT(in) :: zt
1249 REAL(kind=dp), DIMENSION(2, lot, n2), &
1250 INTENT(inout) :: zw
1251
1252 INTEGER :: i, j
1253
1254 DO j = 1, nfft
1255 DO i = 1, n2
1256 zw(1, j, i) = zt(1, i, j)
1257 zw(2, j, i) = zt(2, i, j)
1258 END DO
1259 END DO
1260 END SUBROUTINE s_switch_upcorn
1261
1262! **************************************************************************************************
1263!> \brief ...
1264!> \param nfft ...
1265!> \param n2 ...
1266!> \param lot ...
1267!> \param n1 ...
1268!> \param lzt ...
1269!> \param zw ...
1270!> \param zt ...
1271! **************************************************************************************************
1272 SUBROUTINE s_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
1273 INTEGER, INTENT(in) :: nfft, n2, lot, n1, lzt
1274 REAL(kind=dp), DIMENSION(2, lot, n2), INTENT(in) :: zw
1275 REAL(kind=dp), DIMENSION(2, lzt, n1), &
1276 INTENT(inout) :: zt
1277
1278 INTEGER :: i, j
1279
1280 DO j = 1, nfft
1281 DO i = 1, n2
1282 zt(1, i, j) = zw(1, j, i)
1283 zt(2, i, j) = zw(2, j, i)
1284 END DO
1285 END DO
1286 END SUBROUTINE s_unswitch_downcorn
1287
1288! **************************************************************************************************
1289!> \brief ...
1290!> \param j3 ...
1291!> \param nfft ...
1292!> \param Jp2stf ...
1293!> \param J2stf ...
1294!> \param lot ...
1295!> \param n1 ...
1296!> \param md2 ...
1297!> \param nd3 ...
1298!> \param nproc ...
1299!> \param zw ...
1300!> \param zmpi1 ...
1301! **************************************************************************************************
1302 SUBROUTINE s_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
1303 INTEGER, INTENT(in) :: j3, nfft
1304 INTEGER, INTENT(inout) :: jp2stf, j2stf
1305 INTEGER, INTENT(in) :: lot, n1, md2, nd3, nproc
1306 REAL(kind=dp), DIMENSION(2, lot, n1), INTENT(in) :: zw
1307 REAL(kind=dp), &
1308 DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
1309 INTENT(inout) :: zmpi1
1310
1311 INTEGER :: i1, j2, jp2, mfft
1312
1313 mfft = 0
1314 DO jp2 = jp2stf, nproc
1315 DO j2 = j2stf, md2/nproc
1316 mfft = mfft + 1
1317 IF (mfft .GT. nfft) THEN
1318 jp2stf = jp2
1319 j2stf = j2
1320 RETURN
1321 END IF
1322 DO i1 = 1, n1
1323 zmpi1(1, i1, j2, j3, jp2) = zw(1, mfft, i1)
1324 zmpi1(2, i1, j2, j3, jp2) = zw(2, mfft, i1)
1325 END DO
1326 END DO
1327 j2stf = 1
1328 END DO
1329 END SUBROUTINE s_unmpiswitch_downcorn
1330
1331! **************************************************************************************************
1332!> \brief (Based on suitable modifications of S.Goedecker routines)
1333!> Restore data into output array, calculating in the meanwhile
1334!> Hartree energy of the potential
1335!> \param md1 Dimensions of the undistributed part of the real grid
1336!> \param md3 Dimensions of the undistributed part of the real grid
1337!> \param lot ...
1338!> \param nfft number of planes
1339!> \param n3 (twice the) dimension of the last FFTtransform.
1340!> \param zw FFT work array
1341!> \param zf Original distributed density as well as
1342!> Distributed solution of the poisson equation (inout)
1343!> \param scal Needed to achieve unitarity and correct dimensions
1344!> \date February 2006
1345!> \author S. Goedecker, L. Genovese
1346!> \note Assuming that high frequencies are in the corners
1347!> and that n3 is multiple of 4
1348!>
1349!> RESTRICTIONS on USAGE
1350!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
1351!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
1352!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
1353!> This file is distributed under the terms of the
1354!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
1355! **************************************************************************************************
1356 SUBROUTINE unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
1357 INTEGER, INTENT(in) :: md1, md3, lot, nfft, n3
1358 REAL(kind=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
1359 REAL(kind=dp), DIMENSION(md1, md3), INTENT(inout) :: zf
1360 REAL(kind=dp), INTENT(in) :: scal
1361
1362 INTEGER :: i1, i3
1363 REAL(kind=dp) :: pot1
1364
1365 DO i3 = 1, n3/4
1366 DO i1 = 1, nfft
1367 pot1 = scal*zw(1, i1, i3)
1368 !ehartreetmp =ehartreetmp + pot1* zf(i1,2*i3-1)
1369 zf(i1, 2*i3 - 1) = pot1
1370 pot1 = scal*zw(2, i1, i3)
1371 !ehartreetmp =ehartreetmp + pot1* zf(i1,2*i3)
1372 zf(i1, 2*i3) = pot1
1373 END DO
1374 END DO
1375 END SUBROUTINE unfill_downcorn
1376
1377! **************************************************************************************************
1378!> \brief ...
1379!> \param md1 ...
1380!> \param md3 ...
1381!> \param lot ...
1382!> \param nfft ...
1383!> \param n3 ...
1384!> \param zf ...
1385!> \param zw ...
1386! **************************************************************************************************
1387 SUBROUTINE halfill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
1388 INTEGER :: md1, md3, lot, nfft, n3
1389 REAL(kind=dp) :: zf(md1, md3), zw(2, lot, n3/2)
1390
1391 INTEGER :: i1, i3
1392
1393 DO i3 = 1, n3/4
1394 ! WARNING: Assuming that high frequencies are in the corners
1395 ! and that n3 is multiple of 4
1396 !in principle we can relax this condition
1397 DO i1 = 1, nfft
1398 zw(1, i1, i3) = 0._dp
1399 zw(2, i1, i3) = 0._dp
1400 END DO
1401 END DO
1402 DO i3 = n3/4 + 1, n3/2
1403 DO i1 = 1, nfft
1404 zw(1, i1, i3) = zf(i1, 2*i3 - 1 - n3/2)
1405 zw(2, i1, i3) = zf(i1, 2*i3 - n3/2)
1406 END DO
1407 END DO
1408
1409 END SUBROUTINE halfill_upcorn
1410
1411! **************************************************************************************************
1412!> \brief (Based on suitable modifications of S.Goedecker routines)
1413!> Assign the correct planes to the work array zmpi2
1414!> in order to prepare for interprocessor data transposition.
1415!> In the meanwhile, it unpacks the data of the HalFFT in order to prepare for
1416!> multiplication with the kernel
1417!> \param i1 Starting points of the plane and number of remaining lines
1418!> \param j2 Starting points of the plane and number of remaining lines
1419!> \param lot Starting points of the plane and number of remaining lines
1420!> \param nfft Starting points of the plane and number of remaining lines
1421!> \param n1 logical dimension of the FFT transform, reference for work arrays
1422!> \param n3 logical dimension of the FFT transform, reference for work arrays
1423!> \param md2 Dimensions of real grid
1424!> \param nproc ...
1425!> \param nd3 Dimensions of the kernel
1426!> \param zw Work array (input)
1427!> \param zmpi2 Work array for multiprocessor manipulation (output)
1428!> \param cosinarr Array of the phases needed for unpacking
1429!> \date February 2006
1430!> \author S. Goedecker, L. Genovese
1431!> \note
1432!> RESTRICTIONS on USAGE
1433!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
1434!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
1435!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
1436!> This file is distributed under the terms of the
1437!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
1438! **************************************************************************************************
1439 SUBROUTINE scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
1440 INTEGER, INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
1441 nd3
1442 REAL(kind=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
1443 REAL(kind=dp), DIMENSION(2, n1, md2/nproc, nd3), &
1444 INTENT(inout) :: zmpi2
1445 REAL(kind=dp), DIMENSION(2, n3/2), INTENT(in) :: cosinarr
1446
1447 INTEGER :: i, i3, ind1, ind2
1448 REAL(kind=dp) :: a, b, c, cp, d, fei, fer, fi, foi, for, &
1449 fr, sp
1450
1451!case i3=1 and i3=n3/2+1
1452
1453 DO i = 0, nfft - 1
1454 a = zw(1, i + 1, 1)
1455 b = zw(2, i + 1, 1)
1456 zmpi2(1, i1 + i, j2, 1) = a + b
1457 zmpi2(2, i1 + i, j2, 1) = 0._dp
1458 zmpi2(1, i1 + i, j2, n3/2 + 1) = a - b
1459 zmpi2(2, i1 + i, j2, n3/2 + 1) = 0._dp
1460 END DO
1461 !case 2<=i3<=n3/2
1462 DO i3 = 2, n3/2
1463 ind1 = i3
1464 ind2 = n3/2 - i3 + 2
1465 cp = cosinarr(1, i3)
1466 sp = cosinarr(2, i3)
1467 DO i = 0, nfft - 1
1468 a = zw(1, i + 1, ind1)
1469 b = zw(2, i + 1, ind1)
1470 c = zw(1, i + 1, ind2)
1471 d = zw(2, i + 1, ind2)
1472 fer = .5_dp*(a + c)
1473 fei = .5_dp*(b - d)
1474 for = .5_dp*(a - c)
1475 foi = .5_dp*(b + d)
1476 fr = fer + cp*foi - sp*for
1477 fi = fei - cp*for - sp*foi
1478 zmpi2(1, i1 + i, j2, ind1) = fr
1479 zmpi2(2, i1 + i, j2, ind1) = fi
1480 END DO
1481 END DO
1482
1483 END SUBROUTINE scramble_unpack
1484
1485! **************************************************************************************************
1486!> \brief (Based on suitable modifications of S.Goedecker routines)
1487!> Insert the correct planes of the work array zmpi2
1488!> in order to prepare for backward FFT transform
1489!> In the meanwhile, it packs the data in order to be transformed with the HalFFT
1490!> procedure
1491!> \param i1 Starting points of the plane and number of remaining lines
1492!> \param j2 Starting points of the plane and number of remaining lines
1493!> \param lot Starting points of the plane and number of remaining lines
1494!> \param nfft Starting points of the plane and number of remaining lines
1495!> \param n1 logical dimension of the FFT transform, reference for work arrays
1496!> \param n3 logical dimension of the FFT transform, reference for work arrays
1497!> \param md2 Dimensions of real grid
1498!> \param nproc ...
1499!> \param nd3 Dimensions of the kernel
1500!> \param zmpi2 Work array for multiprocessor manipulation (output)
1501!> \param zw Work array (inout)
1502!> \param cosinarr Array of the phases needed for packing
1503!> \date February 2006
1504!> \author S. Goedecker, L. Genovese
1505!> \note
1506!> RESTRICTIONS on USAGE
1507!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
1508!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
1509!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
1510!> This file is distributed under the terms of the
1511!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
1512! **************************************************************************************************
1513 SUBROUTINE unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw, cosinarr)
1514 INTEGER, INTENT(in) :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
1515 nd3
1516 REAL(kind=dp), DIMENSION(2, n1, md2/nproc, nd3), &
1517 INTENT(in) :: zmpi2
1518 REAL(kind=dp), DIMENSION(2, lot, n3/2), &
1519 INTENT(inout) :: zw
1520 REAL(kind=dp), DIMENSION(2, n3/2), INTENT(in) :: cosinarr
1521
1522 INTEGER :: i, i3, inda, indb
1523 REAL(kind=dp) :: a, b, c, cp, d, ie, ih, io, re, rh, ro, &
1524 sp
1525
1526 DO i3 = 1, n3/2
1527 inda = i3
1528 indb = n3/2 + 2 - i3
1529 cp = cosinarr(1, i3)
1530 sp = cosinarr(2, i3)
1531 DO i = 0, nfft - 1
1532 a = zmpi2(1, i1 + i, j2, inda)
1533 b = zmpi2(2, i1 + i, j2, inda)
1534 c = zmpi2(1, i1 + i, j2, indb)
1535 d = -zmpi2(2, i1 + i, j2, indb)
1536 re = (a + c)
1537 ie = (b + d)
1538 ro = (a - c)*cp - (b - d)*sp
1539 io = (a - c)*sp + (b - d)*cp
1540 rh = re - io
1541 ih = ie + ro
1542 zw(1, i + 1, inda) = rh
1543 zw(2, i + 1, inda) = ih
1544 END DO
1545 END DO
1546
1547 END SUBROUTINE unscramble_pack
1548
1549! **************************************************************************************************
1550!> \brief (Based on suitable modifications of S.Goedecker routines)
1551!> Applies the local FFT space Kernel to the density in Real space.
1552!> Calculates also the LDA exchange-correlation terms
1553!> \param n1 logical dimension of the transform.
1554!> \param n2 logical dimension of the transform.
1555!> \param n3 logical dimension of the transform.
1556!> \param nd1 Dimension of POT
1557!> \param nd2 Dimension of POT
1558!> \param nd3 Dimension of POT
1559!> \param md1 Dimension of ZF
1560!> \param md2 Dimension of ZF
1561!> \param md3 Dimension of ZF
1562!> \param nproc number of processors used as returned by MPI_COMM_SIZE
1563!> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
1564!> \param pot Kernel, only the distributed part (REAL)
1565!> POT(i1,i2,i3)
1566!> i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
1567!> \param zf Density (input/output)
1568!> ZF(i1,i3,i2)
1569!> i1=1,md1 , i2=1,md2/nproc , i3=1,md3
1570!> \param scal factor of renormalization of the FFT in order to acheve unitarity
1571!> and the correct dimension
1572!> \param mpi_group ...
1573!> \date February 2006
1574!> \author S. Goedecker, L. Genovese
1575!> \note As transform lengths
1576!> most products of the prime factors 2,3,5 are allowed.
1577!> The detailed table with allowed transform lengths can
1578!> be found in subroutine CTRIG
1579!> \note
1580!> RESTRICTIONS on USAGE
1581!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
1582!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
1583!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
1584!> This file is distributed under the terms of the
1585!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
1586! **************************************************************************************************
1587 SUBROUTINE f_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
1588 scal, mpi_group)
1589 INTEGER, INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
1590 md3, nproc, iproc
1591 REAL(kind=dp), DIMENSION(nd1, nd2, nd3/nproc), &
1592 INTENT(in) :: pot
1593 REAL(kind=dp), DIMENSION(md1, md3, md2/nproc), &
1594 INTENT(inout) :: zf
1595 REAL(kind=dp), INTENT(in) :: scal
1596
1597 CLASS(mp_comm_type), INTENT(in) :: mpi_group
1598
1599 INTEGER, PARAMETER :: ncache_optimal = 8*1024
1600
1601 INTEGER :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
1602 j2stb, j2stf, j3, jp2stb, jp2stf, lot, &
1603 lzt, ma, mb, ncache, nfft
1604 INTEGER, ALLOCATABLE, DIMENSION(:) :: after1, after2, after3, before1, &
1605 before2, before3, now1, now2, now3
1606 REAL(kind=dp) :: twopion
1607 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: btrig1, btrig2, btrig3, cosinarr, &
1608 ftrig1, ftrig2, ftrig3
1609 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: zt, zw
1610 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: zmpi2
1611 REAL(kind=dp), ALLOCATABLE, &
1612 DIMENSION(:, :, :, :, :) :: zmpi1
1613
1614 IF (mod(n1, 2) .NE. 0) cpabort("Parallel convolution:ERROR:n1")
1615 IF (mod(n2, 2) .NE. 0) cpabort("Parallel convolution:ERROR:n2")
1616 IF (mod(n3, 2) .NE. 0) cpabort("Parallel convolution:ERROR:n3")
1617 IF (nd1 .LT. n1/2 + 1) cpabort("Parallel convolution:ERROR:nd1")
1618 IF (nd2 .LT. n2/2 + 1) cpabort("Parallel convolution:ERROR:nd2")
1619 IF (nd3 .LT. n3/2 + 1) cpabort("Parallel convolution:ERROR:nd3")
1620 IF (md1 .LT. n1/2) cpabort("Parallel convolution:ERROR:md1")
1621 IF (md2 .LT. n2/2) cpabort("Parallel convolution:ERROR:md2")
1622 IF (md3 .LT. n3/2) cpabort("Parallel convolution:ERROR:md3")
1623 IF (mod(nd3, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:nd3")
1624 IF (mod(md2, nproc) .NE. 0) cpabort("Parallel convolution:ERROR:md2")
1625
1626 !defining work arrays dimensions
1627
1628 ncache = ncache_optimal
1629 IF (ncache <= max(n1, n2, n3/2)*4) ncache = max(n1, n2, n3/2)*4
1630 lzt = n2/2
1631 IF (mod(n2/2, 2) .EQ. 0) lzt = lzt + 1
1632 IF (mod(n2/2, 4) .EQ. 0) lzt = lzt + 1
1633
1634 !Allocations
1635 ALLOCATE (btrig1(2, ctrig_length))
1636 ALLOCATE (ftrig1(2, ctrig_length))
1637 ALLOCATE (after1(7))
1638 ALLOCATE (now1(7))
1639 ALLOCATE (before1(7))
1640 ALLOCATE (btrig2(2, ctrig_length))
1641 ALLOCATE (ftrig2(2, ctrig_length))
1642 ALLOCATE (after2(7))
1643 ALLOCATE (now2(7))
1644 ALLOCATE (before2(7))
1645 ALLOCATE (btrig3(2, ctrig_length))
1646 ALLOCATE (ftrig3(2, ctrig_length))
1647 ALLOCATE (after3(7))
1648 ALLOCATE (now3(7))
1649 ALLOCATE (before3(7))
1650 ALLOCATE (zw(2, ncache/4, 2))
1651 ALLOCATE (zt(2, lzt, n1))
1652 ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
1653 zmpi2 = 1.0e99_dp ! init mpi'ed buffers to silence warnings under valgrind
1654 ALLOCATE (cosinarr(2, n3/2))
1655 IF (nproc .GT. 1) ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
1656
1657 !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
1658 CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
1659 CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
1660 CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
1661 DO j = 1, n1
1662 ftrig1(1, j) = btrig1(1, j)
1663 ftrig1(2, j) = -btrig1(2, j)
1664 END DO
1665 DO j = 1, n2
1666 ftrig2(1, j) = btrig2(1, j)
1667 ftrig2(2, j) = -btrig2(2, j)
1668 END DO
1669 DO j = 1, n3
1670 ftrig3(1, j) = btrig3(1, j)
1671 ftrig3(2, j) = -btrig3(2, j)
1672 END DO
1673
1674 !Calculating array of phases for HalFFT decoding
1675 twopion = 8._dp*atan(1._dp)/real(n3, kind=dp)
1676 DO i3 = 1, n3/2
1677 cosinarr(1, i3) = cos(twopion*(i3 - 1))
1678 cosinarr(2, i3) = -sin(twopion*(i3 - 1))
1679 END DO
1680
1681 ! transform along z axis
1682 lot = ncache/(2*n3)
1683 IF (lot .LT. 1) THEN
1684 WRITE (*, *) &
1685 'convolxc_on:ncache has to be enlarged to be able to hold at'// &
1686 'least one 1-d FFT of this size even though this will'// &
1687 'reduce the performance for shorter transform lengths'
1688 cpabort("")
1689 END IF
1690
1691 DO j2 = 1, md2/nproc
1692 !this condition ensures that we manage only the interesting part for the FFT
1693 IF (iproc*(md2/nproc) + j2 .LE. n2/2) THEN
1694 DO i1 = 1, (n1/2), lot
1695 ma = i1
1696 mb = min(i1 + (lot - 1), (n1/2))
1697 nfft = mb - ma + 1
1698
1699 !inserting real data into complex array of half length
1700 CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
1701
1702 !performing FFT
1703 !input: I1,I3,J2,(Jp2)
1704 inzee = 1
1705 DO i = 1, ic3
1706 CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1707 btrig3, after3(i), now3(i), before3(i), 1)
1708 inzee = 3 - inzee
1709 END DO
1710 !output: I1,i3,J2,(Jp2)
1711
1712 !unpacking FFT in order to restore correct result,
1713 !while exchanging components
1714 !input: I1,i3,J2,(Jp2)
1715 CALL scramble_unpack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
1716 !output: I1,J2,i3,(Jp2)
1717 END DO
1718 END IF
1719 END DO
1720
1721 !Interprocessor data transposition
1722 !input: I1,J2,j3,jp3,(Jp2)
1723 IF (nproc .GT. 1) THEN
1724 !communication scheduling
1725 CALL mpi_group%alltoall(zmpi2, zmpi1, n1*(md2/nproc)*(nd3/nproc))
1726 END IF
1727 !output: I1,J2,j3,Jp2,(jp3)
1728
1729 !now each process perform complete convolution of its planes
1730 DO j3 = 1, nd3/nproc
1731 !this condition ensures that we manage only the interesting part for the FFT
1732 IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
1733 jp2stb = 1
1734 j2stb = 1
1735 jp2stf = 1
1736 j2stf = 1
1737
1738 ! transform along x axis
1739 lot = ncache/(4*n1)
1740 IF (lot .LT. 1) THEN
1741 WRITE (*, *) &
1742 'convolxc_on:ncache has to be enlarged to be able to hold at'// &
1743 'least one 1-d FFT of this size even though this will'// &
1744 'reduce the performance for shorter transform lengths'
1745 cpabort("")
1746 END IF
1747
1748 DO j = 1, n2/2, lot
1749 ma = j
1750 mb = min(j + (lot - 1), n2/2)
1751 nfft = mb - ma + 1
1752
1753 !reverse index ordering, leaving the planes to be transformed at the end
1754 !input: I1,J2,j3,Jp2,(jp3)
1755 IF (nproc .EQ. 1) THEN
1756 CALL mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
1757 ELSE
1758 CALL mpiswitch_upcorn(j3, nfft, jp2stb, j2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
1759 END IF
1760 !output: J2,Jp2,I1,j3,(jp3)
1761
1762 !performing FFT
1763 !input: I2,I1,j3,(jp3)
1764 inzee = 1
1765 DO i = 1, ic1 - 1
1766 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1767 btrig1, after1(i), now1(i), before1(i), 1)
1768 inzee = 3 - inzee
1769 END DO
1770
1771 !storing the last step into zt array
1772 i = ic1
1773 CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
1774 btrig1, after1(i), now1(i), before1(i), 1)
1775 !output: I2,i1,j3,(jp3)
1776 END DO
1777
1778 !transform along y axis
1779 lot = ncache/(4*n2)
1780 IF (lot .LT. 1) THEN
1781 WRITE (*, *) &
1782 'convolxc_on:ncache has to be enlarged to be able to hold at'// &
1783 'least one 1-d FFT of this size even though this will'// &
1784 'reduce the performance for shorter transform lengths'
1785 cpabort("")
1786 END IF
1787
1788 DO j = 1, n1, lot
1789 ma = j
1790 mb = min(j + (lot - 1), n1)
1791 nfft = mb - ma + 1
1792
1793 !reverse ordering
1794 !input: I2,i1,j3,(jp3)
1795 CALL switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
1796 !output: i1,I2,j3,(jp3)
1797
1798 !performing FFT
1799 !input: i1,I2,j3,(jp3)
1800 inzee = 1
1801 DO i = 1, ic2
1802 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1803 btrig2, after2(i), now2(i), before2(i), 1)
1804 inzee = 3 - inzee
1805 END DO
1806 !output: i1,i2,j3,(jp3)
1807
1808 !Multiply with kernel in fourier space
1809 CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
1810
1811 !TRANSFORM BACK IN REAL SPACE
1812
1813 !transform along y axis
1814 !input: i1,i2,j3,(jp3)
1815 DO i = 1, ic2
1816 CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1817 ftrig2, after2(i), now2(i), before2(i), -1)
1818 inzee = 3 - inzee
1819 END DO
1820
1821 !reverse ordering
1822 !input: i1,I2,j3,(jp3)
1823 CALL unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
1824 !output: I2,i1,j3,(jp3)
1825 END DO
1826
1827 !transform along x axis
1828 !input: I2,i1,j3,(jp3)
1829 lot = ncache/(4*n1)
1830 DO j = 1, n2/2, lot
1831 ma = j
1832 mb = min(j + (lot - 1), n2/2)
1833 nfft = mb - ma + 1
1834
1835 !performing FFT
1836 i = 1
1837 CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
1838 ftrig1, after1(i), now1(i), before1(i), -1)
1839
1840 inzee = 1
1841 DO i = 2, ic1
1842 CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1843 ftrig1, after1(i), now1(i), before1(i), -1)
1844 inzee = 3 - inzee
1845 END DO
1846 !output: I2,I1,j3,(jp3)
1847
1848 !reverse ordering
1849 !input: J2,Jp2,I1,j3,(jp3)
1850 IF (nproc .EQ. 1) THEN
1851 CALL unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
1852 ELSE
1853 CALL unmpiswitch_downcorn(j3, nfft, jp2stf, j2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
1854 END IF
1855 ! output: I1,J2,j3,Jp2,(jp3)
1856 END DO
1857 END IF
1858 END DO
1859
1860 !Interprocessor data transposition
1861 !input: I1,J2,j3,Jp2,(jp3)
1862 IF (nproc .GT. 1) THEN
1863 !communication scheduling
1864 CALL mpi_group%alltoall(zmpi1, zmpi2, n1*(md2/nproc)*(nd3/nproc))
1865 !output: I1,J2,j3,jp3,(Jp2)
1866 END IF
1867
1868 !transform along z axis
1869 !input: I1,J2,i3,(Jp2)
1870 lot = ncache/(2*n3)
1871 DO j2 = 1, md2/nproc
1872 !this condition ensures that we manage only the interesting part for the FFT
1873 IF (iproc*(md2/nproc) + j2 .LE. n2/2) THEN
1874 DO i1 = 1, (n1/2), lot
1875 ma = i1
1876 mb = min(i1 + (lot - 1), (n1/2))
1877 nfft = mb - ma + 1
1878
1879 !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
1880 !input: I1,J2,i3,(Jp2)
1881 CALL unscramble_pack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
1882 !output: I1,i3,J2,(Jp2)
1883
1884 !performing FFT
1885 !input: I1,i3,J2,(Jp2)
1886 inzee = 1
1887 DO i = 1, ic3
1888 CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
1889 ftrig3, after3(i), now3(i), before3(i), -1)
1890 inzee = 3 - inzee
1891 END DO
1892 !output: I1,I3,J2,(Jp2)
1893
1894 !calculates the exchange correlation terms locally and rebuild the output array
1895 CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
1896 , scal) !,ehartreetmp)
1897
1898 !integrate local pieces together
1899 !ehartree=ehartree+0.5_dp*ehartreetmp*hgrid**3
1900 END DO
1901 END IF
1902 END DO
1903
1904 !De-allocations
1905 DEALLOCATE (btrig1)
1906 DEALLOCATE (ftrig1)
1907 DEALLOCATE (after1)
1908 DEALLOCATE (now1)
1909 DEALLOCATE (before1)
1910 DEALLOCATE (btrig2)
1911 DEALLOCATE (ftrig2)
1912 DEALLOCATE (after2)
1913 DEALLOCATE (now2)
1914 DEALLOCATE (before2)
1915 DEALLOCATE (btrig3)
1916 DEALLOCATE (ftrig3)
1917 DEALLOCATE (after3)
1918 DEALLOCATE (now3)
1919 DEALLOCATE (before3)
1920 DEALLOCATE (zmpi2)
1921 DEALLOCATE (zw)
1922 DEALLOCATE (zt)
1923 DEALLOCATE (cosinarr)
1924 IF (nproc .GT. 1) DEALLOCATE (zmpi1)
1925
1926 END SUBROUTINE f_poissonsolver
1927
1928! **************************************************************************************************
1929!> \brief ...
1930!> \param nfft ...
1931!> \param n2 ...
1932!> \param lot ...
1933!> \param n1 ...
1934!> \param lzt ...
1935!> \param zt ...
1936!> \param zw ...
1937! **************************************************************************************************
1938 SUBROUTINE switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
1939 INTEGER :: nfft, n2, lot, n1, lzt
1940 REAL(kind=dp) :: zt(2, lzt, n1), zw(2, lot, n2)
1941
1942 INTEGER :: i, j
1943
1944! WARNING: Assuming that high frequencies are in the corners
1945! and that n2 is multiple of 2
1946! Low frequencies
1947
1948 DO j = 1, nfft
1949 DO i = n2/2 + 1, n2
1950 zw(1, j, i) = zt(1, i - n2/2, j)
1951 zw(2, j, i) = zt(2, i - n2/2, j)
1952 END DO
1953 END DO
1954 ! High frequencies
1955 DO i = 1, n2/2
1956 DO j = 1, nfft
1957 zw(1, j, i) = 0._dp
1958 zw(2, j, i) = 0._dp
1959 END DO
1960 END DO
1961 END SUBROUTINE switch_upcorn
1962
1963! **************************************************************************************************
1964!> \brief ...
1965!> \param j3 ...
1966!> \param nfft ...
1967!> \param Jp2stb ...
1968!> \param J2stb ...
1969!> \param lot ...
1970!> \param n1 ...
1971!> \param md2 ...
1972!> \param nd3 ...
1973!> \param nproc ...
1974!> \param zmpi1 ...
1975!> \param zw ...
1976! **************************************************************************************************
1977 SUBROUTINE mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
1978 INTEGER :: j3, nfft, jp2stb, j2stb, lot, n1, md2, &
1979 nd3, nproc
1980 REAL(kind=dp) :: zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc), zw(2, lot, n1)
1981
1982 INTEGER :: i1, j2, jp2, mfft
1983
1984! WARNING: Assuming that high frequencies are in the corners
1985! and that n1 is multiple of 2
1986
1987 mfft = 0
1988 main: DO jp2 = jp2stb, nproc
1989 DO j2 = j2stb, md2/nproc
1990 mfft = mfft + 1
1991 IF (mfft .GT. nfft) THEN
1992 jp2stb = jp2
1993 j2stb = j2
1994 EXIT main
1995 END IF
1996 DO i1 = 1, n1/2
1997 zw(1, mfft, i1) = 0._dp
1998 zw(2, mfft, i1) = 0._dp
1999 END DO
2000 DO i1 = n1/2 + 1, n1
2001 zw(1, mfft, i1) = zmpi1(1, i1 - n1/2, j2, j3, jp2)
2002 zw(2, mfft, i1) = zmpi1(2, i1 - n1/2, j2, j3, jp2)
2003 END DO
2004 END DO
2005 j2stb = 1
2006 END DO main
2007 END SUBROUTINE mpiswitch_upcorn
2008
2009! **************************************************************************************************
2010!> \brief ...
2011!> \param nfft ...
2012!> \param n2 ...
2013!> \param lot ...
2014!> \param n1 ...
2015!> \param lzt ...
2016!> \param zw ...
2017!> \param zt ...
2018! **************************************************************************************************
2019 SUBROUTINE unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
2020 INTEGER :: nfft, n2, lot, n1, lzt
2021 REAL(kind=dp) :: zw(2, lot, n2), zt(2, lzt, n1)
2022
2023 INTEGER :: i, j
2024
2025! WARNING: Assuming that high frequencies are in the corners
2026! and that n2 is multiple of 2
2027! Low frequencies
2028
2029 DO j = 1, nfft
2030 DO i = 1, n2/2
2031 zt(1, i, j) = zw(1, j, i)
2032 zt(2, i, j) = zw(2, j, i)
2033 END DO
2034 END DO
2035 RETURN
2036 END SUBROUTINE unswitch_downcorn
2037
2038! **************************************************************************************************
2039!> \brief ...
2040!> \param j3 ...
2041!> \param nfft ...
2042!> \param Jp2stf ...
2043!> \param J2stf ...
2044!> \param lot ...
2045!> \param n1 ...
2046!> \param md2 ...
2047!> \param nd3 ...
2048!> \param nproc ...
2049!> \param zw ...
2050!> \param zmpi1 ...
2051! **************************************************************************************************
2052 SUBROUTINE unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
2053 INTEGER :: j3, nfft, jp2stf, j2stf, lot, n1, md2, &
2054 nd3, nproc
2055 REAL(kind=dp) :: zw(2, lot, n1), zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc)
2056
2057 INTEGER :: i1, j2, jp2, mfft
2058
2059! WARNING: Assuming that high frequencies are in the corners
2060! and that n1 is multiple of 2
2061
2062 mfft = 0
2063 main: DO jp2 = jp2stf, nproc
2064 DO j2 = j2stf, md2/nproc
2065 mfft = mfft + 1
2066 IF (mfft .GT. nfft) THEN
2067 jp2stf = jp2
2068 j2stf = j2
2069 EXIT main
2070 END IF
2071 DO i1 = 1, n1/2
2072 zmpi1(1, i1, j2, j3, jp2) = zw(1, mfft, i1)
2073 zmpi1(2, i1, j2, j3, jp2) = zw(2, mfft, i1)
2074 END DO
2075 END DO
2076 j2stf = 1
2077 END DO main
2078 END SUBROUTINE unmpiswitch_downcorn
2079
2080! **************************************************************************************************
2081!> \brief (Based on suitable modifications of S.Goedecker routines)
2082!> Restore data into output array, calculating in the meanwhile
2083!> Hartree energy of the potential
2084!> \param md1 Dimensions of the undistributed part of the real grid
2085!> \param md3 Dimensions of the undistributed part of the real grid
2086!> \param lot ...
2087!> \param nfft number of planes
2088!> \param n3 (twice the) dimension of the last FFTtransform.
2089!> \param zw FFT work array
2090!> \param zf Original distributed density as well as
2091!> Distributed solution of the poisson equation (inout)
2092!> \param scal Needed to achieve unitarity and correct dimensions
2093!> \param ehartreetmp Hartree energy
2094!> \date February 2006
2095!> \author S. Goedecker, L. Genovese
2096!> \note Assuming that high frequencies are in the corners
2097!> and that n3 is multiple of 4
2098!>
2099!> RESTRICTIONS on USAGE
2100!> Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
2101!> Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
2102!> Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
2103!> This file is distributed under the terms of the
2104!> GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
2105! **************************************************************************************************
2106 SUBROUTINE f_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal, ehartreetmp)
2107 INTEGER, INTENT(in) :: md1, md3, lot, nfft, n3
2108 REAL(kind=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
2109 REAL(kind=dp), DIMENSION(md1, md3), INTENT(inout) :: zf
2110 REAL(kind=dp), INTENT(in) :: scal
2111 REAL(kind=dp), INTENT(out) :: ehartreetmp
2112
2113 INTEGER :: i1, i3
2114 REAL(kind=dp) :: pot1
2115
2116 ehartreetmp = 0._dp
2117 DO i3 = 1, n3/4
2118 DO i1 = 1, nfft
2119 pot1 = scal*zw(1, i1, i3)
2120 ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3 - 1)
2121 zf(i1, 2*i3 - 1) = pot1
2122 pot1 = scal*zw(2, i1, i3)
2123 ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3)
2124 zf(i1, 2*i3) = pot1
2125 END DO
2126 END DO
2127 END SUBROUTINE f_unfill_downcorn
2128
2129END MODULE ps_wavelet_base
int main(int argc, char *argv[])
Stand-alone miniapp for smoke-testing and benchmarking dbm_multiply.
for(int lxp=0;lxp<=lp;lxp++)
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Interface to the message passing library MPI.
Creates the wavelet kernel for the wavelet based poisson solver.
subroutine, public s_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, scal, mpi_group)
!HERE POT MUST BE THE KERNEL (BEWARE THE HALF DIMENSION) ****h* BigDFT/S_PoissonSolver (Based on suit...
subroutine, public scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
(Based on suitable modifications of S.Goedecker routines) Assign the correct planes to the work array...
subroutine, public f_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, scal, mpi_group)
(Based on suitable modifications of S.Goedecker routines) Applies the local FFT space Kernel to the d...
subroutine, public p_poissonsolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, zf, scal, hx, hy, hz, mpi_group)
...
integer, parameter, public ctrig_length
subroutine, public fftstp(mm, nfft, m, nn, n, zin, zout, trig, after, now, before, isign)
...
subroutine, public ctrig(n, trig, after, before, now, isign, ic)
...