(git:33f85d8)
Loading...
Searching...
No Matches
fft_tools.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \par History
10!> JGH (30-Nov-2000): ESSL FFT Library added
11!> JGH (05-Jan-2001): Added SGI library FFT
12!> JGH (14-Jan-2001): Added parallel 3d FFT
13!> JGH (10-Feb-2006): New interface type
14!> JGH (31-Mar-2008): Remove local allocates and reshapes (performance)
15!> Possible problems can be related with setting arrays
16!> not to zero
17!> Some interfaces could be further simplified by avoiding
18!> an initial copy. However, this assumes contiguous arrays
19!> IAB (15-Oct-2008): Moved mp_cart_sub calls out of cube_tranpose_* and into
20!> fft_scratch type, reducing number of calls dramatically
21!> IAB (05-Dec-2008): Moved all other non-essential MPI calls into scratch type
22!> IAB (09-Jan-2009): Added fft_plan_type to store FFT data, including cached FFTW plans
23!> IAB (13-Feb-2009): Extended plan caching to serial 3D FFT (fft3d_s)
24!> IAB (09-Oct-2009): Added OpenMP directives to parallel 3D FFT
25!> (c) The Numerical Algorithms Group (NAG) Ltd, 2008-2009 on behalf of the HECToR project
26!> HFP (17-Oct-2024): Thread-safety insurance (CPASSERT), and OMP ATOMIC (tick_fft_pool)
27!> \author JGH
28! **************************************************************************************************
30 USE iso_c_binding, ONLY: c_f_pointer,&
31 c_loc,&
32 c_ptr,&
33 c_size_t
35 USE fft_lib, ONLY: &
36 fft_1dm, fft_3d, fft_alloc, fft_create_plan_1dm, fft_create_plan_3d, fft_dealloc, &
38 USE fft_plan, ONLY: fft_plan_type
39 USE kinds, ONLY: dp,&
40 dp_size,&
41 sp
42 USE mathconstants, ONLY: z_zero
43 USE message_passing, ONLY: mp_cart_type,&
50
51!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_in_parallel
52
53#include "../base/base_uses.f90"
54
55 IMPLICIT NONE
56
57 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'fft_tools'
58
59 ! Types for the pool of scratch data needed in FFT routines
60 ! keep the subroutine "is_equal" up-to-date
61 ! needs a default initialization
63 INTEGER :: nx = 0, ny = 0, nz = 0
64 INTEGER :: lmax = 0, mmax = 0, nmax = 0
65 INTEGER :: mx1 = 0, mx2 = 0, mx3 = 0
66 INTEGER :: my1 = 0, my2 = 0, my3 = 0
67 INTEGER :: mz1 = 0, mz2 = 0, mz3 = 0
68 INTEGER :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
69 INTEGER :: lg = 0, mg = 0
70 INTEGER :: nbx = 0, nbz = 0
71 INTEGER :: nmray = 0, nyzray = 0
72 TYPE(mp_cart_type) :: rs_group = mp_cart_type()
73 INTEGER, DIMENSION(2) :: g_pos = 0, r_pos = 0, r_dim = 0
74 INTEGER :: numtask = 0
75 END TYPE fft_scratch_sizes
76
78 INTEGER :: fft_scratch_id = 0
79 INTEGER :: tf_type = -1
80 LOGICAL :: in_use = .true.
81 TYPE(mp_comm_type) :: group = mp_comm_type()
82 INTEGER, DIMENSION(3) :: nfft = -1
83 ! to be used in cube_transpose_* routines
84 TYPE(mp_cart_type), DIMENSION(2) :: cart_sub_comm = mp_cart_type()
85 INTEGER, DIMENSION(2) :: dim = -1, pos = -1
86 ! to be used in fft3d_s
87 COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
88 :: ziptr => null(), zoptr => null()
89 ! to be used in fft3d_ps : block distribution
90 COMPLEX(KIND=dp), DIMENSION(:, :), CONTIGUOUS, POINTER &
91 :: p1buf => null(), p2buf => null(), p3buf => null(), p4buf => null(), &
92 p5buf => null(), p6buf => null(), p7buf => null()
93 ! to be used in fft3d_ps : plane distribution
94 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
95 :: r1buf => null(), r2buf => null()
96 COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
97 :: tbuf => null()
98 ! to be used in fft3d_pb
99 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
100 :: a1buf => null(), a2buf => null(), a3buf => null(), &
101 a4buf => null(), a5buf => null(), a6buf => null()
102 ! to be used in communication routines
103 INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: scount => null(), rcount => null(), sdispl => null(), rdispl => null()
104 INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgcube => null()
105 INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: xzcount => null(), yzcount => null(), xzdispl => null(), yzdispl => null()
106 INTEGER :: in = 0, mip = -1
107 REAL(kind=dp) :: rsratio = 1.0_dp
108 COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS &
109 :: xzbuf => null(), yzbuf => null()
110 COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS &
111 :: xzbuf_sgl => null(), yzbuf_sgl => null()
112 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
113 :: rbuf1 => null(), rbuf2 => null(), rbuf3 => null(), rbuf4 => null(), &
114 rbuf5 => null(), rbuf6 => null(), rr => null()
115 COMPLEX(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS &
116 :: ss => null(), tt => null()
117 INTEGER, DIMENSION(:, :), POINTER, CONTIGUOUS :: pgrid => null()
118 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: xcor => null(), zcor => null(), pzcoord => null()
120 TYPE(fft_plan_type), DIMENSION(6) :: fft_plan = fft_plan_type()
121 INTEGER :: last_tick = -1
122 END TYPE fft_scratch_type
123
125 TYPE(fft_scratch_type), POINTER :: fft_scratch => null()
126 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_next => null()
127 END TYPE fft_scratch_pool_type
128
129 INTEGER, SAVE :: init_fft_pool = 0
130 ! the clock for fft pool. Allows to identify the least recently used scratch
131 INTEGER, SAVE :: tick_fft_pool = 0
132 ! limit the number of scratch pools to fft_pool_scratch_limit.
133 INTEGER, SAVE :: fft_pool_scratch_limit = 15
135 ! END of types for the pool of scratch data needed in FFT routines
136
137 PRIVATE
138 PUBLIC :: init_fft, fft3d, finalize_fft
139 PUBLIC :: fft_alloc, fft_dealloc
140 PUBLIC :: init_fft_scratch_pool
142 PUBLIC :: fwfft, bwfft
144 PUBLIC :: fft_radix_next_odd
145
146 INTEGER, PARAMETER :: fwfft = +1, bwfft = -1
147 INTEGER, PARAMETER :: fft_radix_closest = 493, fft_radix_next = 494
148 INTEGER, PARAMETER :: fft_radix_allowed = 495, fft_radix_disallowed = 496
149 INTEGER, PARAMETER :: fft_radix_next_odd = 497
150
151 REAL(kind=dp), PARAMETER :: ratio_sparse_alltoall = 0.5_dp
152
153 ! these saved variables are FFT globals
154 INTEGER, SAVE :: fft_type = 0
155 LOGICAL, SAVE :: alltoall_sgl = .false.
156 LOGICAL, SAVE :: use_fftsg_sizes = .true.
157 INTEGER, SAVE :: fft_plan_style = 1
158
159 ! these are only needed for pw_gpu (-D__OFFLOAD)
162 PUBLIC :: yz_to_x, x_to_yz, xz_to_yz, yz_to_xz
164 PUBLIC :: fft_type, fft_plan_style
165
166 INTERFACE fft3d
167 MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
168 END INTERFACE
169
170! **************************************************************************************************
171
172CONTAINS
173
174! **************************************************************************************************
175!> \brief ...
176!> \param fftlib ...
177!> \param alltoall ...
178!> \param fftsg_sizes ...
179!> \param pool_limit ...
180!> \param wisdom_file ...
181!> \param plan_style ...
182!> \author JGH
183! **************************************************************************************************
184 SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
185 plan_style)
186
187 CHARACTER(LEN=*), INTENT(IN) :: fftlib
188 LOGICAL, INTENT(IN) :: alltoall, fftsg_sizes
189 INTEGER, INTENT(IN) :: pool_limit
190 CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
191 INTEGER, INTENT(IN) :: plan_style
192
193 use_fftsg_sizes = fftsg_sizes
194 alltoall_sgl = alltoall
195 fft_pool_scratch_limit = pool_limit
196 fft_type = fft_library(fftlib)
197 fft_plan_style = plan_style
198
199 IF (fft_type <= 0) cpabort("Unknown FFT library: "//trim(fftlib))
200
201 CALL fft_do_init(fft_type, wisdom_file)
202
203 ! setup the FFT scratch pool, if one is associated, clear first
204 CALL init_fft_scratch_pool() ! CALLs release_fft_scratch_pool()
205
206 END SUBROUTINE init_fft
207
208! **************************************************************************************************
209!> \brief does whatever is needed to finalize the current fft setup
210!> \param para_env ...
211!> \param wisdom_file ...
212!> \par History
213!> 10.2007 created [Joost VandeVondele]
214! **************************************************************************************************
215 SUBROUTINE finalize_fft(para_env, wisdom_file)
216 CLASS(mp_comm_type) :: para_env
217 CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
218
219! release the FFT scratch pool
220
221 CALL release_fft_scratch_pool()
222
223 ! finalize fft libs
224
225 CALL fft_do_cleanup(fft_type, wisdom_file, para_env%is_source())
226
227 END SUBROUTINE finalize_fft
228
229! **************************************************************************************************
230!> \brief Determine the allowed lengths of FFT's '''
231!> \param radix_in ...
232!> \param radix_out ...
233!> \param operation ...
234!> \par History
235!> new library structure (JGH)
236!> \author Ari Seitsonen
237! **************************************************************************************************
238 SUBROUTINE fft_radix_operations(radix_in, radix_out, operation)
239
240 INTEGER, INTENT(IN) :: radix_in
241 INTEGER, INTENT(OUT) :: radix_out
242 INTEGER, INTENT(IN) :: operation
243
244 INTEGER, PARAMETER :: fft_type_sg = 1
245
246 INTEGER :: i, iloc, ldata
247 INTEGER, ALLOCATABLE, DIMENSION(:) :: data
248
249 ldata = 1024
250 ALLOCATE (DATA(ldata))
251 DATA = -1
252
253 ! if the user wants to use fftsg sizes go for it
254 IF (use_fftsg_sizes) THEN
255 CALL fft_get_lengths(fft_type_sg, DATA, ldata)
256 ELSE
257 CALL fft_get_lengths(fft_type, DATA, ldata)
258 END IF
259
260 iloc = 0
261 DO i = 1, ldata
262 IF (DATA(i) == radix_in) THEN
263 iloc = i
264 EXIT
265 ELSE
266 IF (operation == fft_radix_allowed) THEN
267 cycle
268 ELSE IF (DATA(i) > radix_in) THEN
269 iloc = i
270 EXIT
271 END IF
272 END IF
273 END DO
274
275 IF (iloc == 0) THEN
276 cpabort("Index to radix array not found.")
277 END IF
278
279 IF (operation == fft_radix_allowed) THEN
280 IF (DATA(iloc) == radix_in) THEN
281 radix_out = fft_radix_allowed
282 ELSE
283 radix_out = fft_radix_disallowed
284 END IF
285
286 ELSE IF (operation == fft_radix_closest) THEN
287 IF (DATA(iloc) == radix_in) THEN
288 radix_out = DATA(iloc)
289 ELSE
290 IF (abs(DATA(iloc - 1) - radix_in) <= &
291 abs(DATA(iloc) - radix_in)) THEN
292 radix_out = DATA(iloc - 1)
293 ELSE
294 radix_out = DATA(iloc)
295 END IF
296 END IF
297
298 ELSE IF (operation == fft_radix_next) THEN
299 radix_out = DATA(iloc)
300
301 ELSE IF (operation == fft_radix_next_odd) THEN
302 DO i = iloc, ldata
303 IF (mod(DATA(i), 2) == 1) THEN
304 radix_out = DATA(i)
305 EXIT
306 END IF
307 END DO
308 IF (mod(radix_out, 2) == 0) THEN
309 cpabort("No odd radix found.")
310 END IF
311
312 ELSE
313 cpabort("Disallowed radix operation.")
314 END IF
315
316 DEALLOCATE (data)
317
318 END SUBROUTINE fft_radix_operations
319
320! **************************************************************************************************
321!> \brief Performs m 1-D forward FFT-s of size n.
322!> \param n size of the FFT
323!> \param m number of FFT-s
324!> \param trans shape of input and output arrays: [n x m] if it is FALSE, [m x n] if it is TRUE
325!> \param zin input array
326!> \param zout output array
327!> \param scale scaling factor
328!> \param stat status of the operation, non-zero code indicates an error
329! **************************************************************************************************
330 SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
331 INTEGER, INTENT(in) :: n, m
332 LOGICAL, INTENT(in) :: trans
333 COMPLEX(kind=dp), DIMENSION(*), INTENT(inout) :: zin, zout
334 REAL(kind=dp), INTENT(in) :: scale
335 INTEGER, INTENT(out) :: stat
336
337 CHARACTER(len=*), PARAMETER :: routinen = 'fft_fw1d'
338
339 INTEGER :: handle
340 TYPE(fft_plan_type) :: fft_plan
341
342 CALL timeset(routinen, handle)
343
344 IF (fft_type == 3) THEN
345 CALL fft_create_plan_1dm(fft_plan, fft_type, fwfft, trans, n, m, zin, zout, fft_plan_style)
346 CALL fft_1dm(fft_plan, zin, zout, scale, stat)
348 ELSE
349 CALL cp_warn(__location__, &
350 "FFT library in use cannot handle transformation of an arbitrary length.")
351 stat = 1
352 END IF
353
354 CALL timestop(handle)
355 END SUBROUTINE fft_fw1d
356
357! **************************************************************************************************
358!> \brief Calls the 3D-FFT function from the initialized library
359!> \param fsign ...
360!> \param n ...
361!> \param zin ...
362!> \param zout ...
363!> \param status ...
364!> \param debug ...
365!> \par History
366!> none
367!> \author JGH
368! **************************************************************************************************
369 SUBROUTINE fft3d_s(fsign, n, zin, zout, status, debug)
370
371 INTEGER, INTENT(IN) :: fsign
372 INTEGER, DIMENSION(:), INTENT(INOUT) :: n
373 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
374 INTENT(INOUT) :: zin
375 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
376 INTENT(INOUT), OPTIONAL, TARGET :: zout
377 INTEGER, INTENT(OUT), OPTIONAL :: status
378 LOGICAL, INTENT(IN), OPTIONAL :: debug
379
380 CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_s'
381
382 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
383 POINTER :: zoptr
384 COMPLEX(KIND=dp), DIMENSION(1, 1, 1), TARGET :: zdum
385 INTEGER :: handle, ld(3), lo(3), output_unit, sign, &
386 stat
387 LOGICAL :: fft_in_place, test
388 REAL(KIND=dp) :: in_sum, norm, out_sum
389 TYPE(fft_scratch_type), POINTER :: fft_scratch
390
391 CALL timeset(routinen, handle)
392 output_unit = cp_logger_get_default_io_unit()
393
394 IF (fsign == fwfft) THEN
395 norm = 1.0_dp/real(product(n), kind=dp)
396 ELSE IF (fsign == bwfft) THEN
397 norm = 1.0_dp
398 ELSE
399 cpabort("Unknown FFT direction!")
400 END IF
401
402 IF (PRESENT(debug)) THEN
403 test = debug
404 ELSE
405 test = .false.
406 END IF
407
408 IF (PRESENT(zout)) THEN
409 fft_in_place = .false.
410 ELSE
411 fft_in_place = .true.
412 END IF
413
414 IF (test) THEN
415 in_sum = sum(abs(zin))
416 END IF
417
418 ld(1) = SIZE(zin, 1)
419 ld(2) = SIZE(zin, 2)
420 ld(3) = SIZE(zin, 3)
421
422 IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3)) THEN
423 cpabort("Size and dimension (zin) have to be the same.")
424 END IF
425
426 sign = fsign
427 CALL get_fft_scratch(fft_scratch, tf_type=400, n=n)
428
429 IF (fft_in_place) THEN
430 zoptr => zdum
431 IF (fsign == fwfft) THEN
432 CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
433 ELSE
434 CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
435 END IF
436 ELSE
437 IF (fsign == fwfft) THEN
438 CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
439 ELSE
440 CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
441 END IF
442 END IF
443
444 CALL release_fft_scratch(fft_scratch)
445
446 IF (PRESENT(zout)) THEN
447 lo(1) = SIZE(zout, 1)
448 lo(2) = SIZE(zout, 2)
449 lo(3) = SIZE(zout, 3)
450 IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3)) THEN
451 cpabort("Size and dimension (zout) have to be the same.")
452 END IF
453 END IF
454
455 IF (PRESENT(status)) THEN
456 status = stat
457 END IF
458
459 IF (test .AND. output_unit > 0) THEN
460 IF (PRESENT(zout)) THEN
461 out_sum = sum(abs(zout))
462 WRITE (output_unit, '(A)') " Out of place 3D FFT (local) : fft3d_s"
463 WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
464 WRITE (output_unit, '(A,T60,3I7)') " Input array dimensions ", ld
465 WRITE (output_unit, '(A,T60,3I7)') " Output array dimensions ", lo
466 WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
467 WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
468 ELSE
469 out_sum = sum(abs(zin))
470 WRITE (output_unit, '(A)') " In place 3D FFT (local) : fft3d_s"
471 WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
472 WRITE (output_unit, '(A,T60,3I7)') " Input/output array dimensions ", ld
473 WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
474 WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
475 END IF
476 END IF
477
478 CALL timestop(handle)
479
480 END SUBROUTINE fft3d_s
481
482! **************************************************************************************************
483!> \brief ...
484!> \param fsign ...
485!> \param n ...
486!> \param cin ...
487!> \param gin ...
488!> \param rs_group ...
489!> \param yzp ...
490!> \param nyzray ...
491!> \param bo ...
492!> \param status ...
493!> \param debug ...
494! **************************************************************************************************
495 SUBROUTINE fft3d_ps(fsign, n, cin, gin, rs_group, yzp, nyzray, &
496 bo, status, debug)
497
498 INTEGER, INTENT(IN) :: fsign
499 INTEGER, DIMENSION(:), INTENT(IN) :: n
500 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
501 INTENT(INOUT) :: cin
502 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
503 INTENT(INOUT) :: gin
504 TYPE(mp_cart_type), INTENT(IN) :: rs_group
505 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
506 INTENT(IN) :: yzp
507 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nyzray
508 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
509 INTENT(IN) :: bo
510 INTEGER, INTENT(OUT), OPTIONAL :: status
511 LOGICAL, INTENT(IN), OPTIONAL :: debug
512
513 CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_ps'
514
515 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
516 POINTER :: pbuf, qbuf, rbuf, sbuf
517 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
518 POINTER :: tbuf
519 INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
520 nmax, numtask, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, sign, stat
521 INTEGER, ALLOCATABLE, DIMENSION(:) :: p2p
522 LOGICAL :: test
523 REAL(KIND=dp) :: norm, sum_data
524 TYPE(fft_scratch_sizes) :: fft_scratch_size
525 TYPE(fft_scratch_type), POINTER :: fft_scratch
526
527 CALL timeset(routinen, handle)
528 output_unit = cp_logger_get_default_io_unit()
529
530 IF (PRESENT(debug)) THEN
531 test = debug
532 ELSE
533 test = .false.
534 END IF
535
536 g_pos = rs_group%mepos
537 numtask = rs_group%num_pe
538 r_dim = rs_group%num_pe_cart
539 r_pos = rs_group%mepos_cart
540
541 IF (fsign == fwfft) THEN
542 norm = 1.0_dp/real(product(n), kind=dp)
543 ELSE IF (fsign == bwfft) THEN
544 norm = 1.0_dp
545 ELSE
546 cpabort("Unknown FFT direction!")
547 END IF
548
549 sign = fsign
550
551 lg = SIZE(gin, 1)
552 mg = SIZE(gin, 2)
553
554 nx = SIZE(cin, 1)
555 ny = SIZE(cin, 2)
556 nz = SIZE(cin, 3)
557
558 IF (mg == 0) THEN
559 mmax = 1
560 ELSE
561 mmax = mg
562 END IF
563 lmax = max(lg, (nx*ny*nz)/mmax + 1)
564
565 ALLOCATE (p2p(0:numtask - 1))
566
567 CALL rs_group%rank_compare(rs_group, p2p)
568
569 rp = p2p(g_pos)
570 mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
571 my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
572 mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
573 mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
574
575 n1 = maxval(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
576 n2 = maxval(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
577 nmax = max((2*n2)/numtask, 2)*mx2*mz2
578 nmax = max(nmax, n1*maxval(nyzray))
579 n1 = maxval(bo(2, 1, :, 2))
580 n2 = maxval(bo(2, 3, :, 2))
581
582 fft_scratch_size%nx = nx
583 fft_scratch_size%ny = ny
584 fft_scratch_size%nz = nz
585 fft_scratch_size%lmax = lmax
586 fft_scratch_size%mmax = mmax
587 fft_scratch_size%mx1 = mx1
588 fft_scratch_size%mx2 = mx2
589 fft_scratch_size%my1 = my1
590 fft_scratch_size%mz2 = mz2
591 fft_scratch_size%lg = lg
592 fft_scratch_size%mg = mg
593 fft_scratch_size%nbx = n1
594 fft_scratch_size%nbz = n2
595 mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
596 mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
597 mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
598 fft_scratch_size%mcz1 = mcz1
599 fft_scratch_size%mcx2 = mcx2
600 fft_scratch_size%mcz2 = mcz2
601 fft_scratch_size%nmax = nmax
602 fft_scratch_size%nmray = maxval(nyzray)
603 fft_scratch_size%nyzray = nyzray(g_pos)
604 fft_scratch_size%rs_group = rs_group
605 fft_scratch_size%g_pos = g_pos
606 fft_scratch_size%r_pos = r_pos
607 fft_scratch_size%r_dim = r_dim
608 fft_scratch_size%numtask = numtask
609
610 IF (test) THEN
611 IF (g_pos == 0 .AND. output_unit > 0) THEN
612 WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_ps"
613 WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
614 WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg, mg
615 WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", nx, ny, nz
616 END IF
617 END IF
618
619 IF (r_dim(2) > 1) THEN
620
621 !
622 ! real space is distributed over x and y coordinate
623 ! we have two stages of communication
624 !
625
626 IF (r_dim(1) == 1) THEN
627 cpabort("This processor distribution is not supported.")
628 END IF
629 CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
630
631 IF (sign == fwfft) THEN
632 ! cin -> gin
633
634 IF (test) THEN
635 sum_data = abs(sum(cin))
636 CALL rs_group%sum(sum_data)
637 IF (g_pos == 0 .AND. output_unit > 0) THEN
638 WRITE (output_unit, '(A)') " Two step communication algorithm "
639 WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
640 WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
641 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
642 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
643 END IF
644 END IF
645
646 pbuf => fft_scratch%p1buf
647 qbuf => fft_scratch%p2buf
648
649 ! FFT along z
650 CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
651
652 rbuf => fft_scratch%p3buf
653
654 IF (test) THEN
655 sum_data = abs(sum(qbuf))
656 CALL rs_group%sum(sum_data)
657 IF (g_pos == 0 .AND. output_unit > 0) THEN
658 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
659 END IF
660 END IF
661
662 ! Exchange data ( transpose of matrix )
663 CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
664
665 IF (test) THEN
666 sum_data = abs(sum(rbuf))
667 CALL rs_group%sum(sum_data)
668 IF (g_pos == 0 .AND. output_unit > 0) THEN
669 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) T", sum_data
670 END IF
671 END IF
672
673 pbuf => fft_scratch%p4buf
674
675 ! FFT along y
676 CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
677
678 qbuf => fft_scratch%p5buf
679
680 IF (test) THEN
681 sum_data = abs(sum(pbuf))
682 CALL rs_group%sum(sum_data)
683 IF (g_pos == 0 .AND. output_unit > 0) THEN
684 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) TS", sum_data
685 END IF
686 END IF
687
688 ! Exchange data ( transpose of matrix ) and sort
689 CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
690 bo(:, :, :, 2), qbuf, fft_scratch)
691
692 IF (test) THEN
693 sum_data = abs(sum(qbuf))
694 CALL rs_group%sum(sum_data)
695 IF (g_pos == 0 .AND. output_unit > 0) THEN
696 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) TS", sum_data
697 END IF
698 END IF
699
700 ! FFT along x
701 CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
702
703 IF (test) THEN
704 sum_data = abs(sum(gin))
705 CALL rs_group%sum(sum_data)
706 IF (g_pos == 0 .AND. output_unit > 0) THEN
707 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
708 END IF
709 END IF
710
711 ELSE IF (sign == bwfft) THEN
712 ! gin -> cin
713
714 IF (test) THEN
715 sum_data = abs(sum(gin))
716 CALL rs_group%sum(sum_data)
717 IF (g_pos == 0 .AND. output_unit > 0) THEN
718 WRITE (output_unit, '(A)') " Two step communication algorithm "
719 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
720 WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
721 WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
722 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
723 END IF
724 END IF
725
726 pbuf => fft_scratch%p7buf
727
728 ! FFT along x
729 CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
730
731 qbuf => fft_scratch%p4buf
732
733 IF (test) THEN
734 sum_data = abs(sum(pbuf))
735 CALL rs_group%sum(sum_data)
736 IF (g_pos == 0 .AND. output_unit > 0) THEN
737 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
738 END IF
739 END IF
740
741 ! Exchange data ( transpose of matrix ) and sort
742 CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
743 bo(:, :, :, 2), qbuf, fft_scratch)
744
745 IF (test) THEN
746 sum_data = abs(sum(qbuf))
747 CALL rs_group%sum(sum_data)
748 IF (g_pos == 0 .AND. output_unit > 0) THEN
749 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
750 END IF
751 END IF
752
753 rbuf => fft_scratch%p3buf
754
755 ! FFT along y
756 CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
757
758 pbuf => fft_scratch%p2buf
759
760 IF (test) THEN
761 sum_data = abs(sum(rbuf))
762 CALL rs_group%sum(sum_data)
763 IF (g_pos == 0 .AND. output_unit > 0) THEN
764 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
765 END IF
766 END IF
767
768 ! Exchange data ( transpose of matrix )
769 CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
770
771 IF (test) THEN
772 sum_data = abs(sum(pbuf))
773 CALL rs_group%sum(sum_data)
774 IF (g_pos == 0 .AND. output_unit > 0) THEN
775 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) T", sum_data
776 END IF
777 END IF
778
779 qbuf => fft_scratch%p1buf
780
781 ! FFT along z
782 CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
783
784 IF (test) THEN
785 sum_data = abs(sum(cin))
786 CALL rs_group%sum(sum_data)
787 IF (g_pos == 0 .AND. output_unit > 0) THEN
788 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
789 END IF
790 END IF
791
792 ELSE
793
794 cpabort("Illegal fsign parameter.")
795
796 END IF
797
798 CALL release_fft_scratch(fft_scratch)
799
800 ELSE
801
802 !
803 ! real space is only distributed over x coordinate
804 ! we have one stage of communication, after the transform of
805 ! direction x
806 !
807
808 CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
809
810 sbuf => fft_scratch%r1buf
811 tbuf => fft_scratch%tbuf
812
813 sbuf = z_zero
814 tbuf = z_zero
815
816 IF (sign == fwfft) THEN
817 ! cin -> gin
818
819 IF (test) THEN
820 sum_data = abs(sum(cin))
821 CALL rs_group%sum(sum_data)
822 IF (g_pos == 0 .AND. output_unit > 0) THEN
823 WRITE (output_unit, '(A)') " One step communication algorithm "
824 WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
825 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
826 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
827 END IF
828 END IF
829
830 ! FFT along y and z
831 CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
832 CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
833
834 IF (test) THEN
835 sum_data = abs(sum(tbuf))
836 CALL rs_group%sum(sum_data)
837 IF (g_pos == 0 .AND. output_unit > 0) THEN
838 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
839 END IF
840 END IF
841
842 ! Exchange data ( transpose of matrix ) and sort
843 CALL yz_to_x(tbuf, rs_group, g_pos, p2p, yzp, nyzray, &
844 bo(:, :, :, 2), sbuf, fft_scratch)
845
846 IF (test) THEN
847 sum_data = abs(sum(sbuf))
848 CALL rs_group%sum(sum_data)
849 IF (g_pos == 0 .AND. output_unit > 0) THEN
850 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
851 END IF
852 END IF
853 ! FFT along x
854 CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
855
856 IF (test) THEN
857 sum_data = abs(sum(gin))
858 CALL rs_group%sum(sum_data)
859 IF (g_pos == 0 .AND. output_unit > 0) THEN
860 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
861 END IF
862 END IF
863
864 ELSE IF (sign == bwfft) THEN
865 ! gin -> cin
866
867 IF (test) THEN
868 sum_data = abs(sum(gin))
869 CALL rs_group%sum(sum_data)
870 IF (g_pos == 0 .AND. output_unit > 0) THEN
871 WRITE (output_unit, '(A)') " One step communication algorithm "
872 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
873 WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
874 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
875 END IF
876 END IF
877
878 ! FFT along x
879 CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
880
881 IF (test) THEN
882 sum_data = abs(sum(sbuf))
883 CALL rs_group%sum(sum_data)
884 IF (g_pos == 0 .AND. output_unit > 0) THEN
885 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
886 END IF
887 END IF
888
889 ! Exchange data ( transpose of matrix ) and sort
890 CALL x_to_yz(sbuf, rs_group, g_pos, p2p, yzp, nyzray, &
891 bo(:, :, :, 2), tbuf, fft_scratch)
892
893 IF (test) THEN
894 sum_data = abs(sum(tbuf))
895 CALL rs_group%sum(sum_data)
896 IF (g_pos == 0 .AND. output_unit > 0) THEN
897 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
898 END IF
899 END IF
900
901 ! FFT along y and z
902 CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
903 CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
904
905 IF (test) THEN
906 sum_data = abs(sum(cin))
907 CALL rs_group%sum(sum_data)
908 IF (g_pos == 0 .AND. output_unit > 0) THEN
909 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
910 END IF
911 END IF
912 ELSE
913 cpabort("Illegal fsign parameter.")
914 END IF
915
916 CALL release_fft_scratch(fft_scratch)
917
918 END IF
919
920 DEALLOCATE (p2p)
921
922 IF (PRESENT(status)) THEN
923 status = stat
924 END IF
925 CALL timestop(handle)
926
927 END SUBROUTINE fft3d_ps
928
929! **************************************************************************************************
930!> \brief ...
931!> \param fsign ...
932!> \param n ...
933!> \param zin ...
934!> \param gin ...
935!> \param group ...
936!> \param bo ...
937!> \param status ...
938!> \param debug ...
939! **************************************************************************************************
940 SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, status, debug)
941
942 INTEGER, INTENT(IN) :: fsign
943 INTEGER, DIMENSION(3), INTENT(IN) :: n
944 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
945 INTENT(INOUT) :: zin
946 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
947 INTENT(INOUT) :: gin
948 TYPE(mp_cart_type), INTENT(IN) :: group
949 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
950 INTENT(IN) :: bo
951 INTEGER, INTENT(OUT), OPTIONAL :: status
952 LOGICAL, INTENT(IN), OPTIONAL :: debug
953
954 CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_pb'
955
956 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
957 POINTER :: abuf, bbuf
958 INTEGER :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
959 mcz2, mx1, mx2, mx3, my1, my2, my3, &
960 my_pos, mz1, mz2, mz3, output_unit, &
961 sign, stat
962 INTEGER, DIMENSION(2) :: dim
963 LOGICAL :: test
964 REAL(KIND=dp) :: norm, sum_data
965 TYPE(fft_scratch_sizes) :: fft_scratch_size
966 TYPE(fft_scratch_type), POINTER :: fft_scratch
967
968!------------------------------------------------------------------------------
969! "Real Space" 1) xyZ or 1) xYZ
970! 2) xYz or not used
971! "G Space" 3) Xyz or 3) XYz
972!
973! There is one communicator (2-dimensional) for all distributions
974! np = n1 * n2, where np is the total number of processors
975! If n2 = 1, we have the second case and only one transpose step is needed
976!
977! Assignment of dimensions to axis for different steps
978! First case: 1) n1=x; n2=y
979! 2) n1=x; n2=z
980! 3) n1=y; n2=z
981! Second case 1) n1=x
982! 3) n1=z
983!
984! The more general case with two communicators for the initial and final
985! distribution is not covered.
986!------------------------------------------------------------------------------
987
988 CALL timeset(routinen, handle)
989 output_unit = cp_logger_get_default_io_unit()
990
991 dim = group%num_pe_cart
992 my_pos = group%mepos
993
994 IF (PRESENT(debug)) THEN
995 test = debug
996 ELSE
997 test = .false.
998 END IF
999
1000 IF (fsign == fwfft) THEN
1001 norm = 1.0_dp/real(product(n), kind=dp)
1002 ELSE IF (fsign == bwfft) THEN
1003 norm = 1.0_dp
1004 ELSE
1005 cpabort("Unknown FFT direction!")
1006 END IF
1007
1008 sign = fsign
1009
1010 IF (test) THEN
1011 lg(1) = SIZE(gin, 1)
1012 lg(2) = SIZE(gin, 2)
1013 lz(1) = SIZE(zin, 1)
1014 lz(2) = SIZE(zin, 2)
1015 lz(3) = SIZE(zin, 3)
1016 IF (my_pos == 0 .AND. output_unit > 0) THEN
1017 WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_pb"
1018 WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
1019 WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg
1020 WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", lz
1021 END IF
1022 END IF
1023
1024 mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
1025 my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
1026 mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
1027 mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
1028 my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
1029 mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
1030 mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
1031 my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
1032 mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
1033 fft_scratch_size%mx1 = mx1
1034 fft_scratch_size%mx2 = mx2
1035 fft_scratch_size%mx3 = mx3
1036 fft_scratch_size%my1 = my1
1037 fft_scratch_size%my2 = my2
1038 fft_scratch_size%my3 = my3
1039 fft_scratch_size%mz1 = mz1
1040 fft_scratch_size%mz2 = mz2
1041 fft_scratch_size%mz3 = mz3
1042 mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
1043 mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
1044 mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
1045 mcy3 = maxval(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
1046 fft_scratch_size%mcz1 = mcz1
1047 fft_scratch_size%mcx2 = mcx2
1048 fft_scratch_size%mcz2 = mcz2
1049 fft_scratch_size%mcy3 = mcy3
1050 fft_scratch_size%rs_group = group
1051 fft_scratch_size%g_pos = my_pos
1052 fft_scratch_size%numtask = dim(1)*dim(2)
1053
1054 IF (dim(1) > 1 .AND. dim(2) > 1) THEN
1055
1056 !
1057 ! First case; two stages of communication
1058 !
1059
1060 CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
1061
1062 IF (sign == fwfft) THEN
1063 ! Stage 1 -> 3
1064
1065 bbuf => fft_scratch%a2buf
1066
1067 IF (test) THEN
1068 sum_data = abs(sum(zin))
1069 CALL group%sum(sum_data)
1070 IF (my_pos == 0 .AND. output_unit > 0) THEN
1071 WRITE (output_unit, '(A)') " Two step communication algorithm "
1072 WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1073 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1074 END IF
1075 END IF
1076
1077 ! FFT along z
1078 CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
1079
1080 abuf => fft_scratch%a3buf
1081
1082 IF (test) THEN
1083 sum_data = abs(sum(bbuf))
1084 CALL group%sum(sum_data)
1085 IF (my_pos == 0 .AND. output_unit > 0) THEN
1086 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1087 END IF
1088 END IF
1089
1090 CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
1091
1092 bbuf => fft_scratch%a4buf
1093
1094 IF (test) THEN
1095 sum_data = abs(sum(abuf))
1096 CALL group%sum(sum_data)
1097 IF (my_pos == 0 .AND. output_unit > 0) THEN
1098 WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1099 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1100 END IF
1101 END IF
1102
1103 ! FFT along y
1104 CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1105
1106 abuf => fft_scratch%a5buf
1107
1108 IF (test) THEN
1109 sum_data = abs(sum(bbuf))
1110 CALL group%sum(sum_data)
1111 IF (my_pos == 0 .AND. output_unit > 0) THEN
1112 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1113 END IF
1114 END IF
1115
1116 CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
1117
1118 IF (test) THEN
1119 sum_data = abs(sum(abuf))
1120 CALL group%sum(sum_data)
1121 IF (my_pos == 0 .AND. output_unit > 0) THEN
1122 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1123 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1124 END IF
1125 END IF
1126
1127 ! FFT along x
1128 CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1129
1130 IF (test) THEN
1131 sum_data = abs(sum(gin))
1132 CALL group%sum(sum_data)
1133 IF (my_pos == 0 .AND. output_unit > 0) THEN
1134 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1135 END IF
1136 END IF
1137
1138 ELSEIF (sign == bwfft) THEN
1139 ! Stage 3 -> 1
1140
1141 bbuf => fft_scratch%a5buf
1142
1143 IF (test) THEN
1144 sum_data = abs(sum(gin))
1145 CALL group%sum(sum_data)
1146 IF (my_pos == 0 .AND. output_unit > 0) THEN
1147 WRITE (output_unit, '(A)') " Two step communication algorithm "
1148 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1149 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1150 END IF
1151 END IF
1152
1153 ! FFT along x
1154 CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1155
1156 abuf => fft_scratch%a4buf
1157
1158 IF (test) THEN
1159 sum_data = abs(sum(bbuf))
1160 CALL group%sum(sum_data)
1161 IF (my_pos == 0 .AND. output_unit > 0) THEN
1162 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1163 END IF
1164 END IF
1165
1166 CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
1167
1168 bbuf => fft_scratch%a3buf
1169
1170 IF (test) THEN
1171 sum_data = abs(sum(abuf))
1172 CALL group%sum(sum_data)
1173 IF (my_pos == 0 .AND. output_unit > 0) THEN
1174 WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1175 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1176 END IF
1177 END IF
1178
1179 ! FFT along y
1180 CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1181
1182 abuf => fft_scratch%a2buf
1183
1184 IF (test) THEN
1185 sum_data = abs(sum(bbuf))
1186 CALL group%sum(sum_data)
1187 IF (my_pos == 0 .AND. output_unit > 0) THEN
1188 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1189 END IF
1190 END IF
1191
1192 CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
1193
1194 IF (test) THEN
1195 sum_data = abs(sum(abuf))
1196 CALL group%sum(sum_data)
1197 IF (my_pos == 0 .AND. output_unit > 0) THEN
1198 WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1199 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1200 END IF
1201 END IF
1202
1203 ! FFT along z
1204 CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
1205
1206 IF (test) THEN
1207 sum_data = abs(sum(zin))
1208 CALL group%sum(sum_data)
1209 IF (my_pos == 0 .AND. output_unit > 0) THEN
1210 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1211 END IF
1212 END IF
1213
1214 ELSE
1215 cpabort("Illegal fsign parameter.")
1216 END IF
1217
1218 CALL release_fft_scratch(fft_scratch)
1219
1220 ELSEIF (dim(2) == 1) THEN
1221
1222 !
1223 ! Second case; one stage of communication
1224 !
1225
1226 CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
1227
1228 IF (sign == fwfft) THEN
1229 ! Stage 1 -> 3
1230
1231 IF (test) THEN
1232 sum_data = abs(sum(zin))
1233 CALL group%sum(sum_data)
1234 IF (my_pos == 0 .AND. output_unit > 0) THEN
1235 WRITE (output_unit, '(A)') " one step communication algorithm "
1236 WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1237 WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1238 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1239 END IF
1240 END IF
1241
1242 abuf => fft_scratch%a3buf
1243 bbuf => fft_scratch%a4buf
1244 ! FFT along z and y
1245 CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
1246 CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1247
1248 abuf => fft_scratch%a5buf
1249
1250 IF (test) THEN
1251 sum_data = abs(sum(bbuf))
1252 CALL group%sum(sum_data)
1253 IF (my_pos == 0 .AND. output_unit > 0) THEN
1254 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1255 END IF
1256 END IF
1257
1258 CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
1259
1260 IF (test) THEN
1261 sum_data = abs(sum(abuf))
1262 CALL group%sum(sum_data)
1263 IF (my_pos == 0 .AND. output_unit > 0) THEN
1264 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1265 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1266 END IF
1267 END IF
1268
1269 ! FFT along x
1270 CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1271
1272 IF (test) THEN
1273 sum_data = abs(sum(gin))
1274 CALL group%sum(sum_data)
1275 IF (my_pos == 0 .AND. output_unit > 0) THEN
1276 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1277 END IF
1278 END IF
1279
1280 ELSEIF (sign == bwfft) THEN
1281 ! Stage 3 -> 1
1282
1283 IF (test) THEN
1284 sum_data = abs(sum(gin))
1285 CALL group%sum(sum_data)
1286 IF (my_pos == 0 .AND. output_unit > 0) THEN
1287 WRITE (output_unit, '(A)') " one step communication algorithm "
1288 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1289 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1290 END IF
1291 END IF
1292
1293 bbuf => fft_scratch%a5buf
1294
1295 ! FFT along x
1296 CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1297
1298 abuf => fft_scratch%a4buf
1299
1300 IF (test) THEN
1301 sum_data = abs(sum(bbuf))
1302 CALL group%sum(sum_data)
1303 IF (my_pos == 0 .AND. output_unit > 0) THEN
1304 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1305 END IF
1306 END IF
1307
1308 CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
1309
1310 bbuf => fft_scratch%a3buf
1311
1312 IF (test) THEN
1313 sum_data = abs(sum(abuf))
1314 CALL group%sum(sum_data)
1315 IF (my_pos == 0 .AND. output_unit > 0) THEN
1316 WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1317 WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1318 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1319 END IF
1320 END IF
1321
1322 ! FFT along y
1323 CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1324
1325 ! FFT along z
1326 CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
1327
1328 IF (test) THEN
1329 sum_data = abs(sum(zin))
1330 CALL group%sum(sum_data)
1331 IF (my_pos == 0 .AND. output_unit > 0) THEN
1332 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1333 END IF
1334 END IF
1335
1336 ELSE
1337 cpabort("Illegal fsign parameter.")
1338 END IF
1339
1340 CALL release_fft_scratch(fft_scratch)
1341
1342 ELSE
1343
1344 cpabort("Partition not implemented.")
1345
1346 END IF
1347
1348 IF (PRESENT(status)) THEN
1349 status = stat
1350 END IF
1351
1352 CALL timestop(handle)
1353
1354 END SUBROUTINE fft3d_pb
1355
1356! **************************************************************************************************
1357!> \brief ...
1358!> \param sb ...
1359!> \param group ...
1360!> \param my_pos ...
1361!> \param p2p ...
1362!> \param yzp ...
1363!> \param nray ...
1364!> \param bo ...
1365!> \param tb ...
1366!> \param fft_scratch ...
1367!> \par History
1368!> 15. Feb. 2006 : single precision all_to_all
1369!> \author JGH (14-Jan-2001)
1370! **************************************************************************************************
1371 SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1372
1373 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1374 INTENT(IN) :: sb
1375
1376 CLASS(mp_comm_type), INTENT(IN) :: group
1377 INTEGER, INTENT(IN) :: my_pos
1378 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1379 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1380 INTENT(IN) :: yzp
1381 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1382 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1383 INTENT(IN) :: bo
1384 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1385 INTENT(INOUT) :: tb
1386 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1387
1388 CHARACTER(len=*), PARAMETER :: routinen = 'x_to_yz'
1389
1390 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1391 POINTER :: rr
1392 COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1393 POINTER :: ss, tt
1394 INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1395 nm, np, nr, nx
1396 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1397
1398 CALL timeset(routinen, handle)
1399
1400 np = SIZE(p2p)
1401 scount => fft_scratch%scount
1402 rcount => fft_scratch%rcount
1403 sdispl => fft_scratch%sdispl
1404 rdispl => fft_scratch%rdispl
1405
1406 IF (alltoall_sgl) THEN
1407 ss => fft_scratch%ss
1408 tt => fft_scratch%tt
1409 ss(:, :) = cmplx(sb(:, :), kind=sp)
1410 tt(:, :) = 0._sp
1411 ELSE
1412 rr => fft_scratch%rr
1413 END IF
1414
1415 mpr = p2p(my_pos)
1416 nm = maxval(nray(0:np - 1))
1417 nr = nray(my_pos)
1418!$OMP PARALLEL DO DEFAULT(NONE), &
1419!$OMP PRIVATE(ix,nx), &
1420!$OMP SHARED(np,p2p,bo,nr,scount,sdispl)
1421 DO ip = 0, np - 1
1422 ix = p2p(ip)
1423 nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1424 scount(ip) = nr*nx
1425 sdispl(ip) = nr*(bo(1, 1, ix) - 1)
1426 END DO
1427!$OMP END PARALLEL DO
1428 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1429!$OMP PARALLEL DO DEFAULT(NONE), &
1430!$OMP PRIVATE(nr), &
1431!$OMP SHARED(np,nray,nx,rcount,rdispl,nm)
1432 DO ip = 0, np - 1
1433 nr = nray(ip)
1434 rcount(ip) = nr*nx
1435 rdispl(ip) = nm*nx*ip
1436 END DO
1437!$OMP END PARALLEL DO
1438 IF (alltoall_sgl) THEN
1439 CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
1440 ELSE
1441 CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
1442 END IF
1443
1444 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1445!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1446!$OMP PRIVATE(ixx,ir,iy,iz,ix) &
1447!$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tt,rr,tb)
1448 DO ip = 0, np - 1
1449 DO ix = 1, nx
1450 ixx = nray(ip)*(ix - 1)
1451 IF (alltoall_sgl) THEN
1452 DO ir = 1, nray(ip)
1453 iy = yzp(1, ir, ip)
1454 iz = yzp(2, ir, ip)
1455 tb(iy, iz, ix) = tt(ir + ixx, ip)
1456 END DO
1457 ELSE
1458 DO ir = 1, nray(ip)
1459 iy = yzp(1, ir, ip)
1460 iz = yzp(2, ir, ip)
1461 tb(iy, iz, ix) = rr(ir + ixx, ip)
1462 END DO
1463 END IF
1464 END DO
1465 END DO
1466!$OMP END PARALLEL DO
1467
1468 CALL timestop(handle)
1469
1470 END SUBROUTINE x_to_yz
1471
1472! **************************************************************************************************
1473!> \brief ...
1474!> \param tb ...
1475!> \param group ...
1476!> \param my_pos ...
1477!> \param p2p ...
1478!> \param yzp ...
1479!> \param nray ...
1480!> \param bo ...
1481!> \param sb ...
1482!> \param fft_scratch ...
1483!> \par History
1484!> 15. Feb. 2006 : single precision all_to_all
1485!> \author JGH (14-Jan-2001)
1486! **************************************************************************************************
1487 SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
1488
1489 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1490 INTENT(IN) :: tb
1491
1492 CLASS(mp_comm_type), INTENT(IN) :: group
1493 INTEGER, INTENT(IN) :: my_pos
1494 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1495 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1496 INTENT(IN) :: yzp
1497 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1498 INTEGER, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1499 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1500 INTENT(INOUT) :: sb
1501 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1502
1503 CHARACTER(len=*), PARAMETER :: routinen = 'yz_to_x'
1504
1505 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1506 POINTER :: rr
1507 COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1508 POINTER :: ss, tt
1509 INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1510 nm, np, nr, nx
1511 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1512
1513 CALL timeset(routinen, handle)
1514
1515 np = SIZE(p2p)
1516 mpr = p2p(my_pos)
1517 scount => fft_scratch%scount
1518 rcount => fft_scratch%rcount
1519 sdispl => fft_scratch%sdispl
1520 rdispl => fft_scratch%rdispl
1521
1522 IF (alltoall_sgl) THEN
1523 ss => fft_scratch%ss
1524 tt => fft_scratch%tt
1525 ss = 0._sp
1526 tt = 0._sp
1527 ELSE
1528 rr => fft_scratch%rr
1529 END IF
1530
1531 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1532!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1533!$OMP PRIVATE(ip, ixx, ir, iy, iz, ix) &
1534!$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tb,tt,rr)
1535 DO ip = 0, np - 1
1536 DO ix = 1, nx
1537 ixx = nray(ip)*(ix - 1)
1538 IF (alltoall_sgl) THEN
1539 DO ir = 1, nray(ip)
1540 iy = yzp(1, ir, ip)
1541 iz = yzp(2, ir, ip)
1542 tt(ir + ixx, ip) = cmplx(tb(iy, iz, ix), kind=sp)
1543 END DO
1544 ELSE
1545 DO ir = 1, nray(ip)
1546 iy = yzp(1, ir, ip)
1547 iz = yzp(2, ir, ip)
1548 rr(ir + ixx, ip) = tb(iy, iz, ix)
1549 END DO
1550 END IF
1551 END DO
1552 END DO
1553!$OMP END PARALLEL DO
1554 nm = maxval(nray(0:np - 1))
1555 nr = nray(my_pos)
1556!$OMP PARALLEL DO DEFAULT(NONE), &
1557!$OMP PRIVATE(ix,nx), &
1558!$OMP SHARED(np,p2p,bo,rcount,rdispl,nr)
1559 DO ip = 0, np - 1
1560 ix = p2p(ip)
1561 nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1562 rcount(ip) = nr*nx
1563 rdispl(ip) = nr*(bo(1, 1, ix) - 1)
1564 END DO
1565!$OMP END PARALLEL DO
1566 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1567!$OMP PARALLEL DO DEFAULT(NONE), &
1568!$OMP PRIVATE(nr), &
1569!$OMP SHARED(np,nray,scount,sdispl,nx,nm)
1570 DO ip = 0, np - 1
1571 nr = nray(ip)
1572 scount(ip) = nr*nx
1573 sdispl(ip) = nm*nx*ip
1574 END DO
1575!$OMP END PARALLEL DO
1576
1577 IF (alltoall_sgl) THEN
1578 CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
1579 sb = ss
1580 ELSE
1581 CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
1582 END IF
1583
1584 CALL timestop(handle)
1585
1586 END SUBROUTINE yz_to_x
1587
1588! **************************************************************************************************
1589!> \brief ...
1590!> \param sb ...
1591!> \param group ...
1592!> \param dims ...
1593!> \param my_pos ...
1594!> \param p2p ...
1595!> \param yzp ...
1596!> \param nray ...
1597!> \param bo ...
1598!> \param tb ...
1599!> \param fft_scratch ...
1600!> \par History
1601!> 15. Feb. 2006 : single precision all_to_all
1602!> \author JGH (18-Jan-2001)
1603! **************************************************************************************************
1604 SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1605
1606 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1607 INTENT(IN) :: sb
1608
1609 CLASS(mp_comm_type), INTENT(IN) :: group
1610 INTEGER, DIMENSION(2), INTENT(IN) :: dims
1611 INTEGER, INTENT(IN) :: my_pos
1612 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1613 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1614 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1615 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1616 COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1617 TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1618
1619 CHARACTER(len=*), PARAMETER :: routinen = 'yz_to_xz'
1620
1621 COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1622 COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1623 INTEGER :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
1624 jj, jx, jy, jz, myx, myz, np, npx, &
1625 npz, nx, nz, rs_pos
1626 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1627 xcor, zcor
1628 INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1629
1630 CALL timeset(routinen, handle)
1631
1632 np = SIZE(p2p)
1633
1634 rs_pos = p2p(my_pos)
1635
1636 IF (alltoall_sgl) THEN
1637 yzbuf_sgl => fft_scratch%yzbuf_sgl
1638 xzbuf_sgl => fft_scratch%xzbuf_sgl
1639 ELSE
1640 yzbuf => fft_scratch%yzbuf
1641 xzbuf => fft_scratch%xzbuf
1642 END IF
1643 npx = dims(1)
1644 npz = dims(2)
1645 pgrid => fft_scratch%pgrid
1646 xcor => fft_scratch%xcor
1647 zcor => fft_scratch%zcor
1648 pzcoord => fft_scratch%pzcoord
1649 scount => fft_scratch%scount
1650 rcount => fft_scratch%rcount
1651 sdispl => fft_scratch%sdispl
1652 rdispl => fft_scratch%rdispl
1653
1654 nx = SIZE(sb, 2)
1655
1656! If the send and recv counts are not already cached, then
1657! calculate and store them
1658 IF (fft_scratch%in == 0) THEN
1659
1660 scount = 0
1661
1662 DO ix = 0, npx - 1
1663 ip = pgrid(ix, 0)
1664 xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1665 END DO
1666 DO iz = 0, npz - 1
1667 ip = pgrid(0, iz)
1668 zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1669 END DO
1670 DO jx = 1, nx
1671 IF (alltoall_sgl) THEN
1672 DO ir = 1, nray(my_pos)
1673 jy = yzp(1, ir, my_pos)
1674 jz = yzp(2, ir, my_pos)
1675 ip = pgrid(xcor(jx), zcor(jz))
1676 scount(ip) = scount(ip) + 1
1677 END DO
1678 ELSE
1679 DO ir = 1, nray(my_pos)
1680 jy = yzp(1, ir, my_pos)
1681 jz = yzp(2, ir, my_pos)
1682 ip = pgrid(xcor(jx), zcor(jz))
1683 scount(ip) = scount(ip) + 1
1684 END DO
1685 END IF
1686 END DO
1687
1688 CALL group%alltoall(scount, rcount, 1)
1689 fft_scratch%yzcount = scount
1690 fft_scratch%xzcount = rcount
1691
1692 ! Work out the correct displacements in the buffers
1693 sdispl(0) = 0
1694 rdispl(0) = 0
1695 DO ip = 1, np - 1
1696 sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1697 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1698 END DO
1699
1700 fft_scratch%yzdispl = sdispl
1701 fft_scratch%xzdispl = rdispl
1702
1703 icrs = 0
1704 DO ip = 0, np - 1
1705 IF (scount(ip) /= 0) icrs = icrs + 1
1706 IF (rcount(ip) /= 0) icrs = icrs + 1
1707 END DO
1708 CALL group%sum(icrs)
1709 fft_scratch%rsratio = real(icrs, kind=dp)/(real(2*np, kind=dp)*real(np, kind=dp))
1710
1711 fft_scratch%in = 1
1712 ELSE
1713 scount = fft_scratch%yzcount
1714 rcount = fft_scratch%xzcount
1715 sdispl = fft_scratch%yzdispl
1716 rdispl = fft_scratch%xzdispl
1717 END IF
1718
1719! Do the actual packing
1720!$OMP PARALLEL DO DEFAULT(NONE), &
1721!$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1722!$OMP SHARED(np,p2p,pzcoord,bo,nray,yzp,zcor),&
1723!$OMP SHARED(yzbuf,sb,scount,sdispl,my_pos),&
1724!$OMP SHARED(yzbuf_sgl,alltoall_sgl)
1725 DO ip = 0, np - 1
1726 IF (scount(ip) == 0) cycle
1727 ipl = p2p(ip)
1728 jj = 0
1729 nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1730 DO ir = 1, nray(my_pos)
1731 jz = yzp(2, ir, my_pos)
1732 IF (zcor(jz) == pzcoord(ipl)) THEN
1733 jj = jj + 1
1734 jy = yzp(1, ir, my_pos)
1735 IF (alltoall_sgl) THEN
1736 DO jx = 0, nx - 1
1737 yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = cmplx(sb(ir, jx + bo(1, 1, ipl)), kind=sp)
1738 END DO
1739 ELSE
1740 DO jx = 0, nx - 1
1741 yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
1742 END DO
1743 END IF
1744 END IF
1745 END DO
1746 END DO
1747!$OMP END PARALLEL DO
1748
1749 IF (alltoall_sgl) THEN
1750 CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
1751 ELSE
1752 IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1753 CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
1754 ELSE
1755 CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
1756 END IF
1757 END IF
1758
1759 myx = fft_scratch%sizes%r_pos(1)
1760 myz = fft_scratch%sizes%r_pos(2)
1761 nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
1762
1763!$OMP PARALLEL DO DEFAULT(NONE), &
1764!$OMP PRIVATE(ipr,jj,ir,jx,jy,jz),&
1765!$OMP SHARED(tb,np,p2p,bo,rs_pos,nray),&
1766!$OMP SHARED(yzp,alltoall_sgl,zcor,myz),&
1767!$OMP SHARED(xzbuf,xzbuf_sgl,nz,rdispl)
1768 DO ip = 0, np - 1
1769 ipr = p2p(ip)
1770 jj = 0
1771 DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
1772 DO ir = 1, nray(ip)
1773 jz = yzp(2, ir, ip)
1774 IF (alltoall_sgl) THEN
1775 IF (zcor(jz) == myz) THEN
1776 jj = jj + 1
1777 jy = yzp(1, ir, ip)
1778 jz = jz - bo(1, 3, rs_pos) + 1
1779 tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
1780 END IF
1781 ELSE
1782 IF (zcor(jz) == myz) THEN
1783 jj = jj + 1
1784 jy = yzp(1, ir, ip)
1785 jz = jz - bo(1, 3, rs_pos) + 1
1786 tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
1787 END IF
1788 END IF
1789 END DO
1790 END DO
1791 END DO
1792!$OMP END PARALLEL DO
1793
1794 CALL timestop(handle)
1795
1796 END SUBROUTINE yz_to_xz
1797
1798! **************************************************************************************************
1799!> \brief ...
1800!> \param sb ...
1801!> \param group ...
1802!> \param dims ...
1803!> \param my_pos ...
1804!> \param p2p ...
1805!> \param yzp ...
1806!> \param nray ...
1807!> \param bo ...
1808!> \param tb ...
1809!> \param fft_scratch ...
1810!> \par History
1811!> 15. Feb. 2006 : single precision all_to_all
1812!> \author JGH (19-Jan-2001)
1813! **************************************************************************************************
1814 SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1815
1816 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1817 INTENT(IN) :: sb
1818
1819 CLASS(mp_comm_type), INTENT(IN) :: group
1820 INTEGER, DIMENSION(2), INTENT(IN) :: dims
1821 INTEGER, INTENT(IN) :: my_pos
1822 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1823 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1824 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1825 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1826 COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1827 TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1828
1829 CHARACTER(len=*), PARAMETER :: routinen = 'xz_to_yz'
1830
1831 COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1832 COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1833 INTEGER :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
1834 jj, jx, jy, jz, mp, myx, myz, np, npx, &
1835 npz, nx, nz
1836 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1837 xcor, zcor
1838 INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1839
1840 CALL timeset(routinen, handle)
1841
1842 np = SIZE(p2p)
1843
1844 IF (alltoall_sgl) THEN
1845 yzbuf_sgl => fft_scratch%yzbuf_sgl
1846 xzbuf_sgl => fft_scratch%xzbuf_sgl
1847 ELSE
1848 yzbuf => fft_scratch%yzbuf
1849 xzbuf => fft_scratch%xzbuf
1850 END IF
1851 npx = dims(1)
1852 npz = dims(2)
1853 pgrid => fft_scratch%pgrid
1854 xcor => fft_scratch%xcor
1855 zcor => fft_scratch%zcor
1856 pzcoord => fft_scratch%pzcoord
1857 scount => fft_scratch%scount
1858 rcount => fft_scratch%rcount
1859 sdispl => fft_scratch%sdispl
1860 rdispl => fft_scratch%rdispl
1861
1862! If the send and recv counts are not already cached, then
1863! calculate and store them
1864 IF (fft_scratch%in == 0) THEN
1865
1866 rcount = 0
1867 nx = maxval(bo(2, 1, :))
1868
1869 DO ix = 0, npx - 1
1870 ip = pgrid(ix, 0)
1871 xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1872 END DO
1873 DO iz = 0, npz - 1
1874 ip = pgrid(0, iz)
1875 zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1876 END DO
1877 DO jx = 1, nx
1878 DO ir = 1, nray(my_pos)
1879 jy = yzp(1, ir, my_pos)
1880 jz = yzp(2, ir, my_pos)
1881 ip = pgrid(xcor(jx), zcor(jz))
1882 rcount(ip) = rcount(ip) + 1
1883 END DO
1884 END DO
1885
1886 CALL group%alltoall(rcount, scount, 1)
1887 fft_scratch%xzcount = scount
1888 fft_scratch%yzcount = rcount
1889
1890 ! Work out the correct displacements in the buffers
1891 sdispl(0) = 0
1892 rdispl(0) = 0
1893 DO ip = 1, np - 1
1894 sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1895 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1896 END DO
1897
1898 fft_scratch%xzdispl = sdispl
1899 fft_scratch%yzdispl = rdispl
1900
1901 icrs = 0
1902 DO ip = 0, np - 1
1903 IF (scount(ip) /= 0) icrs = icrs + 1
1904 IF (rcount(ip) /= 0) icrs = icrs + 1
1905 END DO
1906 CALL group%sum(icrs)
1907 fft_scratch%rsratio = real(icrs, kind=dp)/(real(2*np, kind=dp)*real(np, kind=dp))
1908
1909 fft_scratch%in = 1
1910 ELSE
1911 scount = fft_scratch%xzcount
1912 rcount = fft_scratch%yzcount
1913 sdispl = fft_scratch%xzdispl
1914 rdispl = fft_scratch%yzdispl
1915 END IF
1916
1917! Now do the actual packing
1918 myx = fft_scratch%sizes%r_pos(1)
1919 myz = fft_scratch%sizes%r_pos(2)
1920 mp = p2p(my_pos)
1921 nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
1922 nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
1923
1924!$OMP PARALLEL DO DEFAULT(NONE), &
1925!$OMP PRIVATE(jj,ipl,ir,jx,jy,jz,ixx),&
1926!$OMP SHARED(np,p2p,nray,yzp,zcor,myz,bo,mp),&
1927!$OMP SHARED(alltoall_sgl,nx,scount,sdispl),&
1928!$OMP SHARED(xzbuf,xzbuf_sgl,sb,nz)
1929 DO ip = 0, np - 1
1930 jj = 0
1931 ipl = p2p(ip)
1932 DO ir = 1, nray(ip)
1933 jz = yzp(2, ir, ip)
1934 IF (zcor(jz) == myz) THEN
1935 jj = jj + 1
1936 jy = yzp(1, ir, ip)
1937 jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
1938 IF (alltoall_sgl) THEN
1939 DO jx = 0, nx - 1
1940 ixx = jj + jx*scount(ipl)/nx
1941 xzbuf_sgl(ixx + sdispl(ipl)) = cmplx(sb(jy, jz + jx*nz), kind=sp)
1942 END DO
1943 ELSE
1944 DO jx = 0, nx - 1
1945 ixx = jj + jx*scount(ipl)/nx
1946 xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
1947 END DO
1948 END IF
1949 END IF
1950 END DO
1951 END DO
1952!$OMP END PARALLEL DO
1953
1954 IF (alltoall_sgl) THEN
1955 CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
1956 ELSE
1957 IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1958 CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
1959 ELSE
1960 CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
1961 END IF
1962 END IF
1963
1964!$OMP PARALLEL DO DEFAULT(NONE), &
1965!$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1966!$OMP SHARED(p2p,pzcoord,bo,nray,my_pos,yzp),&
1967!$OMP SHARED(rcount,rdispl,tb,yzbuf,zcor),&
1968!$OMP SHARED(yzbuf_sgl,alltoall_sgl,np)
1969 DO ip = 0, np - 1
1970 IF (rcount(ip) == 0) cycle
1971 ipl = p2p(ip)
1972 jj = 0
1973 nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1974 DO ir = 1, nray(my_pos)
1975 jz = yzp(2, ir, my_pos)
1976 IF (zcor(jz) == pzcoord(ipl)) THEN
1977 jj = jj + 1
1978 jy = yzp(1, ir, my_pos)
1979 IF (alltoall_sgl) THEN
1980 DO jx = 0, nx - 1
1981 tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
1982 END DO
1983 ELSE
1984 DO jx = 0, nx - 1
1985 tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
1986 END DO
1987 END IF
1988 END IF
1989 END DO
1990 END DO
1991!$OMP END PARALLEL DO
1992
1993 CALL timestop(handle)
1994
1995 END SUBROUTINE xz_to_yz
1996
1997! **************************************************************************************************
1998!> \brief ...
1999!> \param cin ...
2000!> \param boin ...
2001!> \param boout ...
2002!> \param sout ...
2003!> \param fft_scratch ...
2004!> \par History
2005!> none
2006!> \author JGH (20-Jan-2001)
2007! **************************************************************************************************
2008 SUBROUTINE cube_transpose_1(cin, boin, boout, sout, fft_scratch)
2009
2010 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2011 INTENT(IN) :: cin
2012 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2013 INTENT(IN) :: boin, boout
2014 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2015 INTENT(OUT) :: sout
2016 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2017
2018 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_1'
2019
2020 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2021 POINTER :: rbuf
2022 INTEGER :: handle, ip, ipl, ir, is, ixy, iz, mip, &
2023 mz, np, nx, ny, nz
2024 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2025 INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2026 INTEGER, DIMENSION(2) :: dim, pos
2027
2028 CALL timeset(routinen, handle)
2029
2030 mip = fft_scratch%mip
2031 dim = fft_scratch%dim
2032 pos = fft_scratch%pos
2033 scount => fft_scratch%scount
2034 rcount => fft_scratch%rcount
2035 sdispl => fft_scratch%sdispl
2036 rdispl => fft_scratch%rdispl
2037 pgrid => fft_scratch%pgcube
2038 np = dim(2)
2039
2040 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2041 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2042
2043!$OMP PARALLEL DO DEFAULT(NONE), &
2044!$OMP PRIVATE(ipl,ny), &
2045!$OMP SHARED(np,pgrid,boout,scount,sdispl,nx,nz)
2046 DO ip = 0, np - 1
2047 ipl = pgrid(ip, 2)
2048 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2049 scount(ip) = nx*nz*ny
2050 sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
2051 END DO
2052!$OMP END PARALLEL DO
2053 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2054 mz = maxval(boin(2, 3, :) - boin(1, 3, :) + 1)
2055!$OMP PARALLEL DO DEFAULT(NONE), &
2056!$OMP PRIVATE(ipl,nz), &
2057!$OMP SHARED(np,pgrid,boin,nx,ny,rcount,rdispl,mz)
2058 DO ip = 0, np - 1
2059 ipl = pgrid(ip, 2)
2060 nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2061 rcount(ip) = nx*nz*ny
2062 rdispl(ip) = nx*ny*mz*ip
2063 END DO
2064!$OMP END PARALLEL DO
2065
2066 rbuf => fft_scratch%rbuf1
2067
2068 CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2069
2070!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2071!$OMP PRIVATE(ip,ipl,nz,iz,is,ir) &
2072!$OMP SHARED(nx,ny,np,pgrid,boin,sout,rbuf)
2073 DO ixy = 1, nx*ny
2074 DO ip = 0, np - 1
2075 ipl = pgrid(ip, 2)
2076 nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2077 DO iz = 1, nz
2078 is = boin(1, 3, ipl) + iz - 1
2079 ir = iz + nz*(ixy - 1)
2080 sout(is, ixy) = rbuf(ir, ip)
2081 END DO
2082 END DO
2083 END DO
2084!$OMP END PARALLEL DO
2085
2086 CALL timestop(handle)
2087
2088 END SUBROUTINE cube_transpose_1
2089
2090! **************************************************************************************************
2091!> \brief ...
2092!> \param cin ...
2093!> \param boin ...
2094!> \param boout ...
2095!> \param sout ...
2096!> \param fft_scratch ...
2097! **************************************************************************************************
2098 SUBROUTINE cube_transpose_2(cin, boin, boout, sout, fft_scratch)
2099
2100 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2101 INTENT(IN) :: cin
2102 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2103 INTENT(IN) :: boin, boout
2104 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2105 INTENT(OUT) :: sout
2106 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2107
2108 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_2'
2109
2110 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2111 POINTER :: rbuf
2112 INTEGER :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
2113 np, nx, ny, nz
2114 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2115 INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2116 INTEGER, DIMENSION(2) :: dim, pos
2117 TYPE(mp_comm_type) :: sub_group
2118
2119 CALL timeset(routinen, handle)
2120
2121 sub_group = fft_scratch%cart_sub_comm(2)
2122 mip = fft_scratch%mip
2123 dim = fft_scratch%dim
2124 pos = fft_scratch%pos
2125 scount => fft_scratch%scount
2126 rcount => fft_scratch%rcount
2127 sdispl => fft_scratch%sdispl
2128 rdispl => fft_scratch%rdispl
2129 pgrid => fft_scratch%pgcube
2130 np = dim(2)
2131
2132 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2133 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2134 mz = maxval(boout(2, 3, :) - boout(1, 3, :) + 1)
2135
2136 rbuf => fft_scratch%rbuf2
2137
2138!$OMP PARALLEL DEFAULT(NONE), &
2139!$OMP PRIVATE(ip,ipl,nz,iz,ir), &
2140!$OMP SHARED(nx,ny,np,pgrid,boout,rbuf,cin,scount,sdispl,mz)
2141!$OMP DO COLLAPSE(2)
2142 DO ixy = 1, nx*ny
2143 DO ip = 0, np - 1
2144 ipl = pgrid(ip, 2)
2145 nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2146 DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
2147 ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
2148 rbuf(ir, ip) = cin(iz, ixy)
2149 END DO
2150 END DO
2151 END DO
2152!$OMP END DO
2153!$OMP DO
2154 DO ip = 0, np - 1
2155 ipl = pgrid(ip, 2)
2156 nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2157 scount(ip) = nx*ny*nz
2158 sdispl(ip) = nx*ny*mz*ip
2159 END DO
2160!$OMP END DO
2161!$OMP END PARALLEL
2162 nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
2163!$OMP PARALLEL DO DEFAULT(NONE), &
2164!$OMP PRIVATE(ipl,ny), &
2165!$OMP SHARED(np,pgrid,boin,nx,nz,rcount,rdispl)
2166 DO ip = 0, np - 1
2167 ipl = pgrid(ip, 2)
2168 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2169 rcount(ip) = nx*ny*nz
2170 rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
2171 END DO
2172!$OMP END PARALLEL DO
2173
2174 CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2175
2176 CALL timestop(handle)
2177
2178 END SUBROUTINE cube_transpose_2
2179
2180! **************************************************************************************************
2181!> \brief ...
2182!> \param cin ...
2183!> \param boin ...
2184!> \param boout ...
2185!> \param sout ...
2186!> \param fft_scratch ...
2187! **************************************************************************************************
2188 SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
2189
2190 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2191 INTENT(IN) :: cin
2192 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2193 INTENT(IN) :: boin, boout
2194 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2195 INTENT(OUT) :: sout
2196 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2197
2198 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_3'
2199
2200 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2201 POINTER :: rbuf
2202 INTEGER :: handle, ip, ipl, ir, is, ixz, iy, lb, &
2203 mip, my, my_id, np, num_threads, nx, &
2204 ny, nz, ub
2205 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2206 INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2207 INTEGER, DIMENSION(2) :: dim, pos
2208 TYPE(mp_comm_type) :: sub_group
2209
2210 CALL timeset(routinen, handle)
2211
2212 sub_group = fft_scratch%cart_sub_comm(1)
2213 mip = fft_scratch%mip
2214 dim = fft_scratch%dim
2215 pos = fft_scratch%pos
2216 np = dim(1)
2217 scount => fft_scratch%scount
2218 rcount => fft_scratch%rcount
2219 sdispl => fft_scratch%sdispl
2220 rdispl => fft_scratch%rdispl
2221 pgrid => fft_scratch%pgcube
2222
2223 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2224 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2225!$OMP PARALLEL DO DEFAULT(NONE), &
2226!$OMP PRIVATE(ipl, nx), &
2227!$OMP SHARED(np,pgrid,boout,ny,nz,scount,sdispl)
2228 DO ip = 0, np - 1
2229 ipl = pgrid(ip, 1)
2230 nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
2231 scount(ip) = nx*nz*ny
2232 sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
2233 END DO
2234!$OMP END PARALLEL DO
2235 nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2236 my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2237!$OMP PARALLEL DO DEFAULT(NONE), &
2238!$OMP PRIVATE(ipl, ny), &
2239!$OMP SHARED(np,pgrid,boin,nx,nz,my,rcount,rdispl)
2240 DO ip = 0, np - 1
2241 ipl = pgrid(ip, 1)
2242 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2243 rcount(ip) = nx*nz*ny
2244 rdispl(ip) = nx*my*nz*ip
2245 END DO
2246!$OMP END PARALLEL DO
2247
2248 rbuf => fft_scratch%rbuf3
2249 num_threads = 1
2250 my_id = 0
2251!$OMP PARALLEL DEFAULT(NONE), &
2252!$OMP PRIVATE(NUM_THREADS, my_id, lb, ub) &
2253!$OMP SHARED(rbuf)
2254!$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2255!$ my_id = omp_get_thread_num()
2256 IF (my_id < num_threads) THEN
2257 lb = (SIZE(rbuf, 2)*my_id)/num_threads
2258 ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2259 rbuf(:, lb:ub) = 0.0_dp
2260 END IF
2261!$OMP END PARALLEL
2262
2263 CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2264
2265!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2266!$OMP PRIVATE(ip,ipl,ny,iy,is,ir) &
2267!$OMP SHARED(nx,nz,np,pgrid,boin,rbuf,sout)
2268 DO ixz = 1, nx*nz
2269 DO ip = 0, np - 1
2270 ipl = pgrid(ip, 1)
2271 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2272 DO iy = 1, ny
2273 is = boin(1, 2, ipl) + iy - 1
2274 ir = iy + ny*(ixz - 1)
2275 sout(is, ixz) = rbuf(ir, ip)
2276 END DO
2277 END DO
2278 END DO
2279!$OMP END PARALLEL DO
2280
2281 CALL timestop(handle)
2282
2283 END SUBROUTINE cube_transpose_3
2284
2285! **************************************************************************************************
2286!> \brief ...
2287!> \param cin ...
2288!> \param boin ...
2289!> \param boout ...
2290!> \param sout ...
2291!> \param fft_scratch ...
2292! **************************************************************************************************
2293 SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
2294
2295 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2296 INTENT(IN) :: cin
2297 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2298 INTENT(IN) :: boin, boout
2299 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2300 INTENT(OUT) :: sout
2301 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2302
2303 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_4'
2304
2305 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2306 POINTER :: rbuf
2307 INTEGER :: handle, ip, ipl, ir, iy, izx, lb, mip, &
2308 my, my_id, np, num_threads, nx, ny, &
2309 nz, ub
2310 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2311 INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2312 INTEGER, DIMENSION(2) :: dim, pos
2313 TYPE(mp_comm_type) :: sub_group
2314
2315 CALL timeset(routinen, handle)
2316
2317 sub_group = fft_scratch%cart_sub_comm(1)
2318 mip = fft_scratch%mip
2319 dim = fft_scratch%dim
2320 pos = fft_scratch%pos
2321 np = dim(1)
2322 scount => fft_scratch%scount
2323 rcount => fft_scratch%rcount
2324 sdispl => fft_scratch%sdispl
2325 rdispl => fft_scratch%rdispl
2326 pgrid => fft_scratch%pgcube
2327
2328 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2329 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2330 my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2331
2332 rbuf => fft_scratch%rbuf4
2333 num_threads = 1
2334 my_id = 0
2335!$OMP PARALLEL DEFAULT(NONE), &
2336!$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ipl,ny,iy,ir), &
2337!$OMP SHARED(rbuf,nz,nx,np,pgrid,boout,cin,my,scount,sdispl)
2338!$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2339!$ my_id = omp_get_thread_num()
2340 IF (my_id < num_threads) THEN
2341 lb = (SIZE(rbuf, 2)*my_id)/num_threads
2342 ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2343 rbuf(:, lb:ub) = 0.0_dp
2344 END IF
2345!$OMP BARRIER
2346
2347!$OMP DO COLLAPSE(2)
2348 DO izx = 1, nz*nx
2349 DO ip = 0, np - 1
2350 ipl = pgrid(ip, 1)
2351 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2352 DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
2353 ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
2354 rbuf(ir, ip) = cin(iy, izx)
2355 END DO
2356 END DO
2357 END DO
2358!$OMP END DO
2359!$OMP DO
2360 DO ip = 0, np - 1
2361 ipl = pgrid(ip, 1)
2362 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2363 scount(ip) = nx*ny*nz
2364 sdispl(ip) = nx*nz*my*ip
2365 END DO
2366!$OMP END DO
2367!$OMP END PARALLEL
2368 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2369!$OMP PARALLEL DO DEFAULT(NONE), &
2370!$OMP PRIVATE(ipl,nx), &
2371!$OMP SHARED(np,pgrid,boin,rcount,rdispl,ny,nz)
2372 DO ip = 0, np - 1
2373 ipl = pgrid(ip, 1)
2374 nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
2375 rcount(ip) = nx*ny*nz
2376 rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
2377 END DO
2378!$OMP END PARALLEL DO
2379
2380 CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2381
2382 CALL timestop(handle)
2383
2384 END SUBROUTINE cube_transpose_4
2385
2386! **************************************************************************************************
2387!> \brief ...
2388!> \param cin ...
2389!> \param group ...
2390!> \param boin ...
2391!> \param boout ...
2392!> \param sout ...
2393!> \param fft_scratch ...
2394! **************************************************************************************************
2395 SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
2396
2397 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2398 INTENT(IN) :: cin
2399
2400 CLASS(mp_comm_type), INTENT(IN) :: group
2401 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2402 COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2403 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2404
2405 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_5'
2406
2407 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2408 INTEGER :: handle, ip, ir, is, ixz, iy, lb, mip, &
2409 my, my_id, np, num_threads, nx, ny, &
2410 nz, ub
2411 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2412
2413 CALL timeset(routinen, handle)
2414
2415 np = fft_scratch%sizes%numtask
2416 mip = fft_scratch%mip
2417 scount => fft_scratch%scount
2418 rcount => fft_scratch%rcount
2419 sdispl => fft_scratch%sdispl
2420 rdispl => fft_scratch%rdispl
2421
2422 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2423 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2424!$OMP PARALLEL DO DEFAULT(NONE), &
2425!$OMP PRIVATE(nx), &
2426!$OMP SHARED(np,boout,ny,nz,scount,sdispl)
2427 DO ip = 0, np - 1
2428 nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
2429 scount(ip) = nx*nz*ny
2430 sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
2431 END DO
2432!$OMP END PARALLEL DO
2433 nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2434 my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2435!$OMP PARALLEL DO DEFAULT(NONE), &
2436!$OMP PRIVATE(ny), &
2437!$OMP SHARED(np,boin,nx,nz,rcount,rdispl,my)
2438 DO ip = 0, np - 1
2439 ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2440 rcount(ip) = nx*nz*ny
2441 rdispl(ip) = nx*my*nz*ip
2442 END DO
2443!$OMP END PARALLEL DO
2444
2445 rbuf => fft_scratch%rbuf5
2446 num_threads = 1
2447 my_id = 0
2448!$OMP PARALLEL DEFAULT(NONE), &
2449!$OMP PRIVATE(NUM_THREADS, my_id, lb, ub), &
2450!$OMP SHARED(rbuf)
2451!$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2452!$ my_id = omp_get_thread_num()
2453 IF (my_id < num_threads) THEN
2454 lb = (SIZE(rbuf, 2)*my_id)/num_threads
2455 ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2456 rbuf(:, lb:ub) = 0.0_dp
2457 END IF
2458!$OMP END PARALLEL
2459
2460 CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2461
2462!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2463!$OMP PRIVATE(ip,ny,iy,is,ir) &
2464!$OMP SHARED(nx,nz,np,boin,sout,rbuf)
2465 DO ixz = 1, nx*nz
2466 DO ip = 0, np - 1
2467 ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2468 DO iy = 1, ny
2469 is = boin(1, 2, ip) + iy - 1
2470 ir = iy + ny*(ixz - 1)
2471 sout(is, ixz) = rbuf(ir, ip)
2472 END DO
2473 END DO
2474 END DO
2475!$OMP END PARALLEL DO
2476
2477 CALL timestop(handle)
2478
2479 END SUBROUTINE cube_transpose_5
2480
2481! **************************************************************************************************
2482!> \brief ...
2483!> \param cin ...
2484!> \param group ...
2485!> \param boin ...
2486!> \param boout ...
2487!> \param sout ...
2488!> \param fft_scratch ...
2489! **************************************************************************************************
2490 SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
2491
2492 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2493 INTENT(IN) :: cin
2494
2495 CLASS(mp_comm_type), INTENT(IN) :: group
2496 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2497 COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2498 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2499
2500 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_6'
2501
2502 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2503 INTEGER :: handle, ip, ir, iy, izx, lb, mip, my, &
2504 my_id, np, num_threads, nx, ny, nz, ub
2505 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2506
2507 CALL timeset(routinen, handle)
2508
2509 np = fft_scratch%sizes%numtask
2510 mip = fft_scratch%mip
2511 scount => fft_scratch%scount
2512 rcount => fft_scratch%rcount
2513 sdispl => fft_scratch%sdispl
2514 rdispl => fft_scratch%rdispl
2515
2516 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2517 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2518 my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2519
2520 rbuf => fft_scratch%rbuf5
2521 num_threads = 1
2522 my_id = 0
2523!$OMP PARALLEL DEFAULT(NONE), &
2524!$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ny,iy,ir), &
2525!$OMP SHARED(rbuf,nx,nz,np,boout,cin,my,scount,sdispl)
2526!$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2527!$ my_id = omp_get_thread_num()
2528 IF (my_id < num_threads) THEN
2529 lb = (SIZE(rbuf, 2)*my_id)/num_threads
2530 ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2531 rbuf(:, lb:ub) = 0.0_dp
2532 END IF
2533!$OMP BARRIER
2534
2535!$OMP DO COLLAPSE(2)
2536 DO izx = 1, nz*nx
2537 DO ip = 0, np - 1
2538 ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2539 DO iy = boout(1, 2, ip), boout(2, 2, ip)
2540 ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
2541 rbuf(ir, ip) = cin(iy, izx)
2542 END DO
2543 END DO
2544 END DO
2545!$OMP END DO
2546!$OMP DO
2547 DO ip = 0, np - 1
2548 ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2549 scount(ip) = nx*ny*nz
2550 sdispl(ip) = nx*nz*my*ip
2551 END DO
2552!$OMP END DO
2553!$OMP END PARALLEL
2554 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2555!$OMP PARALLEL DO DEFAULT(NONE), &
2556!$OMP PRIVATE(nx), &
2557!$OMP SHARED(np,boin,rcount,rdispl,nz,ny)
2558 DO ip = 0, np - 1
2559 nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
2560 rcount(ip) = nx*ny*nz
2561 rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
2562 END DO
2563!$OMP END PARALLEL DO
2564
2565 CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2566
2567 CALL timestop(handle)
2568
2569 END SUBROUTINE cube_transpose_6
2570
2571! **************************************************************************************************
2572!> \brief ...
2573! **************************************************************************************************
2575
2576 CALL release_fft_scratch_pool()
2577
2578 ! Allocate first scratch and mark it as used
2579 ALLOCATE (fft_scratch_first)
2580 ALLOCATE (fft_scratch_first%fft_scratch)
2581 ! this is a very special scratch, it seems, we always keep it 'most - recent' so we will never delete it
2582 fft_scratch_first%fft_scratch%last_tick = huge(fft_scratch_first%fft_scratch%last_tick)
2583
2585
2586 END SUBROUTINE init_fft_scratch_pool
2587
2588! **************************************************************************************************
2589!> \brief ...
2590!> \param fft_scratch ...
2591! **************************************************************************************************
2592 SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
2593 TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
2594
2595#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2596 INTEGER :: ierr
2597 COMPLEX(KIND=dp), POINTER :: dummy_ptr_z
2598#endif
2599
2600 ! deallocate structures
2601 IF (ASSOCIATED(fft_scratch%ziptr)) THEN
2602 CALL fft_dealloc(fft_scratch%ziptr)
2603 END IF
2604 IF (ASSOCIATED(fft_scratch%zoptr)) THEN
2605 CALL fft_dealloc(fft_scratch%zoptr)
2606 END IF
2607 IF (ASSOCIATED(fft_scratch%p1buf)) THEN
2608 CALL fft_dealloc(fft_scratch%p1buf)
2609 END IF
2610 IF (ASSOCIATED(fft_scratch%p2buf)) THEN
2611#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2612 dummy_ptr_z => fft_scratch%p2buf(1, 1)
2613 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2614#else
2615 CALL fft_dealloc(fft_scratch%p2buf)
2616#endif
2617 END IF
2618 IF (ASSOCIATED(fft_scratch%p3buf)) THEN
2619#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2620 dummy_ptr_z => fft_scratch%p3buf(1, 1)
2621 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2622#else
2623 CALL fft_dealloc(fft_scratch%p3buf)
2624#endif
2625 END IF
2626 IF (ASSOCIATED(fft_scratch%p4buf)) THEN
2627#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2628 dummy_ptr_z => fft_scratch%p4buf(1, 1)
2629 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2630#else
2631 CALL fft_dealloc(fft_scratch%p4buf)
2632#endif
2633 END IF
2634 IF (ASSOCIATED(fft_scratch%p5buf)) THEN
2635#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2636 dummy_ptr_z => fft_scratch%p5buf(1, 1)
2637 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2638#else
2639 CALL fft_dealloc(fft_scratch%p5buf)
2640#endif
2641 END IF
2642 IF (ASSOCIATED(fft_scratch%p6buf)) THEN
2643 CALL fft_dealloc(fft_scratch%p6buf)
2644 END IF
2645 IF (ASSOCIATED(fft_scratch%p7buf)) THEN
2646#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2647 dummy_ptr_z => fft_scratch%p7buf(1, 1)
2648 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2649#else
2650 CALL fft_dealloc(fft_scratch%p7buf)
2651#endif
2652 END IF
2653 IF (ASSOCIATED(fft_scratch%r1buf)) THEN
2654#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2655 dummy_ptr_z => fft_scratch%r1buf(1, 1)
2656 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2657#else
2658 CALL fft_dealloc(fft_scratch%r1buf)
2659#endif
2660 END IF
2661 IF (ASSOCIATED(fft_scratch%r2buf)) THEN
2662 CALL fft_dealloc(fft_scratch%r2buf)
2663 END IF
2664 IF (ASSOCIATED(fft_scratch%tbuf)) THEN
2665#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2666 dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
2667 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2668#else
2669 CALL fft_dealloc(fft_scratch%tbuf)
2670#endif
2671 END IF
2672 IF (ASSOCIATED(fft_scratch%a1buf)) THEN
2673 CALL fft_dealloc(fft_scratch%a1buf)
2674 END IF
2675 IF (ASSOCIATED(fft_scratch%a2buf)) THEN
2676 CALL fft_dealloc(fft_scratch%a2buf)
2677 END IF
2678 IF (ASSOCIATED(fft_scratch%a3buf)) THEN
2679 CALL fft_dealloc(fft_scratch%a3buf)
2680 END IF
2681 IF (ASSOCIATED(fft_scratch%a4buf)) THEN
2682 CALL fft_dealloc(fft_scratch%a4buf)
2683 END IF
2684 IF (ASSOCIATED(fft_scratch%a5buf)) THEN
2685 CALL fft_dealloc(fft_scratch%a5buf)
2686 END IF
2687 IF (ASSOCIATED(fft_scratch%a6buf)) THEN
2688 CALL fft_dealloc(fft_scratch%a6buf)
2689 END IF
2690 IF (ASSOCIATED(fft_scratch%scount)) THEN
2691 DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
2692 fft_scratch%sdispl, fft_scratch%rdispl)
2693 END IF
2694 IF (ASSOCIATED(fft_scratch%rr)) THEN
2695 DEALLOCATE (fft_scratch%rr)
2696 END IF
2697 IF (ASSOCIATED(fft_scratch%xzbuf)) THEN
2698 DEALLOCATE (fft_scratch%xzbuf)
2699 END IF
2700 IF (ASSOCIATED(fft_scratch%yzbuf)) THEN
2701 DEALLOCATE (fft_scratch%yzbuf)
2702 END IF
2703 IF (ASSOCIATED(fft_scratch%xzbuf_sgl)) THEN
2704 DEALLOCATE (fft_scratch%xzbuf_sgl)
2705 END IF
2706 IF (ASSOCIATED(fft_scratch%yzbuf_sgl)) THEN
2707 DEALLOCATE (fft_scratch%yzbuf_sgl)
2708 END IF
2709 IF (ASSOCIATED(fft_scratch%ss)) THEN
2710 DEALLOCATE (fft_scratch%ss)
2711 END IF
2712 IF (ASSOCIATED(fft_scratch%tt)) THEN
2713 DEALLOCATE (fft_scratch%tt)
2714 END IF
2715 IF (ASSOCIATED(fft_scratch%pgrid)) THEN
2716 DEALLOCATE (fft_scratch%pgrid)
2717 END IF
2718 IF (ASSOCIATED(fft_scratch%pgcube)) THEN
2719 DEALLOCATE (fft_scratch%pgcube)
2720 END IF
2721 IF (ASSOCIATED(fft_scratch%xcor)) THEN
2722 DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
2723 END IF
2724 IF (ASSOCIATED(fft_scratch%pzcoord)) THEN
2725 DEALLOCATE (fft_scratch%pzcoord)
2726 END IF
2727 IF (ASSOCIATED(fft_scratch%xzcount)) THEN
2728 DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
2729 DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
2730 fft_scratch%in = 0
2731 fft_scratch%rsratio = 1._dp
2732 END IF
2733 IF (ASSOCIATED(fft_scratch%rbuf1)) THEN
2734 DEALLOCATE (fft_scratch%rbuf1)
2735 END IF
2736 IF (ASSOCIATED(fft_scratch%rbuf2)) THEN
2737 DEALLOCATE (fft_scratch%rbuf2)
2738 END IF
2739 IF (ASSOCIATED(fft_scratch%rbuf3)) THEN
2740 DEALLOCATE (fft_scratch%rbuf3)
2741 END IF
2742 IF (ASSOCIATED(fft_scratch%rbuf4)) THEN
2743 DEALLOCATE (fft_scratch%rbuf4)
2744 END IF
2745 IF (ASSOCIATED(fft_scratch%rbuf5)) THEN
2746 DEALLOCATE (fft_scratch%rbuf5)
2747 END IF
2748 IF (ASSOCIATED(fft_scratch%rbuf6)) THEN
2749 DEALLOCATE (fft_scratch%rbuf6)
2750 END IF
2751
2752 IF (fft_scratch%cart_sub_comm(1) /= mp_comm_null) THEN
2753 CALL fft_scratch%cart_sub_comm(1)%free()
2754 END IF
2755 IF (fft_scratch%cart_sub_comm(2) /= mp_comm_null) THEN
2756 CALL fft_scratch%cart_sub_comm(2)%free()
2757 END IF
2758
2759 CALL fft_destroy_plan(fft_scratch%fft_plan(1))
2760 CALL fft_destroy_plan(fft_scratch%fft_plan(2))
2761 CALL fft_destroy_plan(fft_scratch%fft_plan(3))
2762 CALL fft_destroy_plan(fft_scratch%fft_plan(4))
2763 CALL fft_destroy_plan(fft_scratch%fft_plan(5))
2764 CALL fft_destroy_plan(fft_scratch%fft_plan(6))
2765
2766 END SUBROUTINE deallocate_fft_scratch_type
2767
2768! **************************************************************************************************
2769!> \brief ...
2770! **************************************************************************************************
2771 SUBROUTINE release_fft_scratch_pool()
2772 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch, fft_scratch_current
2773
2774!$ CPASSERT(.NOT. omp_in_parallel() .OR. 0 == omp_get_thread_num())
2775
2776 IF (init_fft_pool == 0) NULLIFY (fft_scratch_first)
2777
2778 fft_scratch => fft_scratch_first
2779 DO
2780 IF (ASSOCIATED(fft_scratch)) THEN
2781 fft_scratch_current => fft_scratch
2782 fft_scratch => fft_scratch_current%fft_scratch_next
2783 NULLIFY (fft_scratch_current%fft_scratch_next)
2784
2785 CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
2786
2787 DEALLOCATE (fft_scratch_current%fft_scratch)
2788 DEALLOCATE (fft_scratch_current)
2789 ELSE
2790 EXIT
2791 END IF
2792 END DO
2793
2794 init_fft_pool = 0
2795
2796 END SUBROUTINE release_fft_scratch_pool
2797
2798! **************************************************************************************************
2799!> \brief ...
2800! **************************************************************************************************
2801 SUBROUTINE resize_fft_scratch_pool()
2802
2803 INTEGER :: last_tick, nscratch
2804 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, fft_scratch_old
2805
2806 nscratch = 0
2807
2808 last_tick = huge(last_tick)
2809 NULLIFY (fft_scratch_old)
2810
2811 ! start at the global pool, count, and find a deletion candidate
2812 fft_scratch_current => fft_scratch_first
2813 DO
2814 IF (ASSOCIATED(fft_scratch_current)) THEN
2815 nscratch = nscratch + 1
2816 ! is this a candidate for deletion (i.e. least recently used, and not in use)
2817 IF (.NOT. fft_scratch_current%fft_scratch%in_use) THEN
2818 IF (fft_scratch_current%fft_scratch%last_tick < last_tick) THEN
2819 last_tick = fft_scratch_current%fft_scratch%last_tick
2820 fft_scratch_old => fft_scratch_current
2821 END IF
2822 END IF
2823 fft_scratch_current => fft_scratch_current%fft_scratch_next
2824 ELSE
2825 EXIT
2826 END IF
2827 END DO
2828
2829 ! we should delete a scratch
2830 IF (nscratch > fft_pool_scratch_limit) THEN
2831 ! note that we never deallocate the first (special) element of the list
2832 IF (ASSOCIATED(fft_scratch_old)) THEN
2833 fft_scratch_current => fft_scratch_first
2834 DO
2835 IF (ASSOCIATED(fft_scratch_current)) THEN
2836 ! should we delete the next in the list?
2837 IF (ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old)) THEN
2838 ! fix the linked list
2839 fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
2840
2841 ! deallocate the element
2842 CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
2843 DEALLOCATE (fft_scratch_old%fft_scratch)
2844 DEALLOCATE (fft_scratch_old)
2845
2846 ELSE
2847 fft_scratch_current => fft_scratch_current%fft_scratch_next
2848 END IF
2849 ELSE
2850 EXIT
2851 END IF
2852 END DO
2853
2854 ELSE
2855 cpwarn("The number of the scratches exceeded the limit, but none could be deallocated")
2856 END IF
2857 END IF
2858
2859 END SUBROUTINE resize_fft_scratch_pool
2860
2861! **************************************************************************************************
2862!> \brief ...
2863!> \param fft_scratch ...
2864!> \param tf_type ...
2865!> \param n ...
2866!> \param fft_sizes ...
2867! **************************************************************************************************
2868 SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
2869 TYPE(fft_scratch_type), POINTER :: fft_scratch
2870 INTEGER, INTENT(IN) :: tf_type
2871 INTEGER, DIMENSION(:), INTENT(IN) :: n
2872 TYPE(fft_scratch_sizes), INTENT(IN), &
2873 OPTIONAL :: fft_sizes
2874
2875 CHARACTER(len=*), PARAMETER :: routinen = 'get_fft_scratch'
2876
2877 INTEGER :: coord(2), dim(2), handle, i, ix, iz, lg, lmax, m1, m2, &
2878 mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
2879 nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
2880 INTEGER, DIMENSION(3) :: pcoord
2881 LOGICAL :: equal
2882 LOGICAL, DIMENSION(2) :: dims
2883 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, &
2884 fft_scratch_last, &
2885 fft_scratch_new
2886#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2887 INTEGER :: ierr
2888 INTEGER(KIND=C_SIZE_T) :: length
2889 TYPE(c_ptr) :: cptr_r1buf, cptr_tbuf, &
2890 cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
2891#endif
2892 CALL timeset(routinen, handle)
2893
2894 ! this is the place to check that the scratch_pool does not grow without limits
2895 ! before we add a new scratch check the size of the pool and release some of the list if needed
2896 CALL resize_fft_scratch_pool()
2897
2898 ! get the required scratch
2899!$OMP ATOMIC
2901 fft_scratch_current => fft_scratch_first
2902 DO
2903 IF (ASSOCIATED(fft_scratch_current)) THEN
2904 IF (fft_scratch_current%fft_scratch%in_use) THEN
2905 fft_scratch_last => fft_scratch_current
2906 fft_scratch_current => fft_scratch_current%fft_scratch_next
2907 cycle
2908 END IF
2909 IF (tf_type /= fft_scratch_current%fft_scratch%tf_type) THEN
2910 fft_scratch_last => fft_scratch_current
2911 fft_scratch_current => fft_scratch_current%fft_scratch_next
2912 cycle
2913 END IF
2914 IF (.NOT. all(n == fft_scratch_current%fft_scratch%nfft)) THEN
2915 fft_scratch_last => fft_scratch_current
2916 fft_scratch_current => fft_scratch_current%fft_scratch_next
2917 cycle
2918 END IF
2919 IF (PRESENT(fft_sizes)) THEN
2920 IF (fft_sizes%rs_group /= fft_scratch_current%fft_scratch%group) THEN
2921 fft_scratch_last => fft_scratch_current
2922 fft_scratch_current => fft_scratch_current%fft_scratch_next
2923 cycle
2924 END IF
2925 CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
2926 IF (.NOT. equal) THEN
2927 fft_scratch_last => fft_scratch_current
2928 fft_scratch_current => fft_scratch_current%fft_scratch_next
2929 cycle
2930 END IF
2931 END IF
2932 ! Success
2933 fft_scratch => fft_scratch_current%fft_scratch
2934 fft_scratch_current%fft_scratch%in_use = .true.
2935 EXIT
2936 ELSE
2937 ! We cannot find the scratch type in this pool
2938 ! Generate a new scratch set
2939 ALLOCATE (fft_scratch_new)
2940 ALLOCATE (fft_scratch_new%fft_scratch)
2941
2942 IF (tf_type .NE. 400) THEN
2943 fft_scratch_new%fft_scratch%sizes = fft_sizes
2944 np = fft_sizes%numtask
2945 ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
2946 fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
2947 fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
2948 END IF
2949
2950 SELECT CASE (tf_type)
2951 CASE DEFAULT
2952 cpabort("Invalid scratch type.")
2953 CASE (100) ! fft3d_pb: full cube distribution
2954 cpassert(PRESENT(fft_sizes))
2955 mx1 = fft_sizes%mx1
2956 my1 = fft_sizes%my1
2957 mx2 = fft_sizes%mx2
2958 mz2 = fft_sizes%mz2
2959 my3 = fft_sizes%my3
2960 mz3 = fft_sizes%mz3
2961 CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
2962 CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
2963 CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
2964 CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
2965 CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
2966 CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
2967 fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
2968
2969 dim = fft_sizes%rs_group%num_pe_cart
2970 pos = fft_sizes%rs_group%mepos_cart
2971 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
2972 fft_scratch_new%fft_scratch%dim = dim
2973 fft_scratch_new%fft_scratch%pos = pos
2974 mcz1 = fft_sizes%mcz1
2975 mcx2 = fft_sizes%mcx2
2976 mcz2 = fft_sizes%mcz2
2977 mcy3 = fft_sizes%mcy3
2978 ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
2979 ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
2980 ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:dim(1) - 1))
2981 ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:dim(1) - 1))
2982
2983 dims = (/.true., .false./)
2984 CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
2985 dims = (/.false., .true./)
2986 CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
2987
2988 !initialise pgcube
2989 DO i = 0, dim(1) - 1
2990 coord = (/i, pos(2)/)
2991 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
2992 END DO
2993 DO i = 0, dim(2) - 1
2994 coord = (/pos(1), i/)
2995 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
2996 END DO
2997
2998 !set up fft plans
2999 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., n(3), mx1*my1, &
3000 fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf, fft_plan_style)
3001 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., n(2), mx2*mz2, &
3002 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3003 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), my3*mz3, &
3004 fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3005 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), my3*mz3, &
3006 fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3007 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., n(2), mx2*mz2, &
3008 fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3009 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., n(3), mx1*my1, &
3010 fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3011
3012 CASE (101) ! fft3d_pb: full cube distribution (dim 1)
3013 cpassert(PRESENT(fft_sizes))
3014 mx1 = fft_sizes%mx1
3015 my1 = fft_sizes%my1
3016 mz1 = fft_sizes%mz1
3017 my3 = fft_sizes%my3
3018 mz3 = fft_sizes%mz3
3019 CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
3020 CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
3021 fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
3022 CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
3023 CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
3024 CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
3025 CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
3026
3027 dim = fft_sizes%rs_group%num_pe_cart
3028 pos = fft_sizes%rs_group%mepos_cart
3029 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3030 fft_scratch_new%fft_scratch%dim = dim
3031 fft_scratch_new%fft_scratch%pos = pos
3032 mcy3 = fft_sizes%mcy3
3033 ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:dim(1) - 1))
3034 ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:dim(1) - 1))
3035
3036 !set up fft plans
3037 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., n(3), mx1*my1, &
3038 fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3039 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., n(2), mx1*mz1, &
3040 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3041 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), my3*mz3, &
3042 fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3043 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), my3*mz3, &
3044 fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3045 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., n(2), mx1*mz1, &
3046 fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3047 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., n(3), mx1*my1, &
3048 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3049
3050 CASE (200) ! fft3d_ps: plane distribution
3051 cpassert(PRESENT(fft_sizes))
3052 nx = fft_sizes%nx
3053 ny = fft_sizes%ny
3054 nz = fft_sizes%nz
3055 mx2 = fft_sizes%mx2
3056 lmax = fft_sizes%lmax
3057 mmax = fft_sizes%mmax
3058 lg = fft_sizes%lg
3059 mg = fft_sizes%mg
3060 np = fft_sizes%numtask
3061 nmray = fft_sizes%nmray
3062 nyzray = fft_sizes%nyzray
3063#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3064 length = int(2*dp_size*max(mmax, 1)*max(lmax, 1), kind=c_size_t)
3065 ierr = offload_malloc_pinned_mem(cptr_r1buf, length)
3066 cpassert(ierr == 0)
3067 CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/max(mmax, 1), max(lmax, 1)/))
3068 length = int(2*dp_size*max(ny, 1)*max(nz, 1)*max(nx, 1), kind=c_size_t)
3069 ierr = offload_malloc_pinned_mem(cptr_tbuf, length)
3070 cpassert(ierr == 0)
3071 CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/max(ny, 1), max(nz, 1), max(nx, 1)/))
3072#else
3073 CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
3074 CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
3075#endif
3076 fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
3077 CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
3078 nm = nmray*mx2
3079 IF (alltoall_sgl) THEN
3080 ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
3081 ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
3082 ELSE
3083 ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
3084 END IF
3085
3086 !set up fft plans
3087 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., nz, nx*ny, &
3088 fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3089 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., ny, nx*nz, &
3090 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3091 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), nyzray, &
3092 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf, fft_plan_style)
3093 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), nyzray, &
3094 fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3095 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., ny, nx*nz, &
3096 fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3097 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., nz, nx*ny, &
3098 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3099
3100 CASE (300) ! fft3d_ps: block distribution
3101 cpassert(PRESENT(fft_sizes))
3102 mx1 = fft_sizes%mx1
3103 mx2 = fft_sizes%mx2
3104 my1 = fft_sizes%my1
3105 mz2 = fft_sizes%mz2
3106 mcx2 = fft_sizes%mcx2
3107 lg = fft_sizes%lg
3108 mg = fft_sizes%mg
3109 nmax = fft_sizes%nmax
3110 nmray = fft_sizes%nmray
3111 nyzray = fft_sizes%nyzray
3112 m1 = fft_sizes%r_dim(1)
3113 m2 = fft_sizes%r_dim(2)
3114 nbx = fft_sizes%nbx
3115 nbz = fft_sizes%nbz
3116 CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
3117 CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
3118#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3119 length = int(2*dp_size*max(n(3), 1)*max(mx1*my1, 1), kind=c_size_t)
3120 ierr = offload_malloc_pinned_mem(cptr_p2buf, length)
3121 cpassert(ierr == 0)
3122 CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/max(n(3), 1), max(mx1*my1, 1)/))
3123 length = int(2*dp_size*max(mx2*mz2, 1)*max(n(2), 1), kind=c_size_t)
3124 ierr = offload_malloc_pinned_mem(cptr_p3buf, length)
3125 cpassert(ierr == 0)
3126 CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/max(mx2*mz2, 1), max(n(2), 1)/))
3127 length = int(2*dp_size*max(n(2), 1)*max(mx2*mz2, 1), kind=c_size_t)
3128 ierr = offload_malloc_pinned_mem(cptr_p4buf, length)
3129 cpassert(ierr == 0)
3130 CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/max(n(2), 1), max(mx2*mz2, 1)/))
3131 length = int(2*dp_size*max(nyzray, 1)*max(n(1), 1), kind=c_size_t)
3132 ierr = offload_malloc_pinned_mem(cptr_p5buf, length)
3133 cpassert(ierr == 0)
3134 CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/max(nyzray, 1), max(n(1), 1)/))
3135 length = int(2*dp_size*max(mg, 1)*max(lg, 1), kind=c_size_t)
3136 ierr = offload_malloc_pinned_mem(cptr_p7buf, length)
3137 cpassert(ierr == 0)
3138 CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/max(mg, 1), max(lg, 1)/))
3139#else
3140 CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
3141 CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
3142 CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
3143 CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
3144 CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
3145#endif
3146 IF (alltoall_sgl) THEN
3147 ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
3148 ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
3149 ELSE
3150 ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
3151 ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
3152 END IF
3153 ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
3154 ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
3155 ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
3156 ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
3157 ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
3158 fft_scratch_new%fft_scratch%yzcount(0:np - 1))
3159 ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
3160 fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
3161 fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
3162
3163 dim = fft_sizes%rs_group%num_pe_cart
3164 pos = fft_sizes%rs_group%mepos_cart
3165 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3166 fft_scratch_new%fft_scratch%dim = dim
3167 fft_scratch_new%fft_scratch%pos = pos
3168 mcz1 = fft_sizes%mcz1
3169 mcz2 = fft_sizes%mcz2
3170 ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
3171 ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
3172
3173 dims = (/.false., .true./)
3174 CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
3175
3176 !initialise pgcube
3177 DO i = 0, dim(2) - 1
3178 coord = (/pos(1), i/)
3179 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3180 END DO
3181
3182 !initialise pgrid
3183 DO ix = 0, m1 - 1
3184 DO iz = 0, m2 - 1
3185 coord = (/ix, iz/)
3186 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
3187 END DO
3188 END DO
3189
3190 !initialise pzcoord
3191 DO i = 0, np - 1
3192 CALL fft_sizes%rs_group%coords(i, pcoord)
3193 fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
3194 END DO
3195
3196 !set up fft plans
3197 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., n(3), mx1*my1, &
3198 fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf, fft_plan_style)
3199 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., n(2), mx2*mz2, &
3200 fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf, fft_plan_style)
3201 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), nyzray, &
3202 fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf, fft_plan_style)
3203 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), nyzray, &
3204 fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf, fft_plan_style)
3205 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., n(2), mx2*mz2, &
3206 fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf, fft_plan_style)
3207 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., n(3), mx1*my1, &
3208 fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf, fft_plan_style)
3209
3210 CASE (400) ! serial FFT
3211 np = 0
3212 CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
3213 CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
3214
3215 !in place plans
3216 CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, .true., fwfft, n, &
3217 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3218 CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, .true., bwfft, n, &
3219 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3220 ! out of place plans
3221 CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, .false., fwfft, n, &
3222 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3223 CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, .false., bwfft, n, &
3224 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3225
3226 END SELECT
3227
3228 NULLIFY (fft_scratch_new%fft_scratch_next)
3229 fft_scratch_new%fft_scratch%fft_scratch_id = &
3230 fft_scratch_last%fft_scratch%fft_scratch_id + 1
3231 fft_scratch_new%fft_scratch%in_use = .true.
3232 fft_scratch_new%fft_scratch%nfft = n
3233 fft_scratch_last%fft_scratch_next => fft_scratch_new
3234 fft_scratch_new%fft_scratch%tf_type = tf_type
3235 fft_scratch => fft_scratch_new%fft_scratch
3236 EXIT
3237
3238 END IF
3239 END DO
3240
3241!$OMP ATOMIC READ
3242 fft_scratch%last_tick = tick_fft_pool
3243
3244 CALL timestop(handle)
3245
3246 END SUBROUTINE get_fft_scratch
3247
3248! **************************************************************************************************
3249!> \brief ...
3250!> \param fft_scratch ...
3251! **************************************************************************************************
3252 SUBROUTINE release_fft_scratch(fft_scratch)
3253
3254 TYPE(fft_scratch_type), POINTER :: fft_scratch
3255
3256 INTEGER :: scratch_id
3257 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current
3258
3259 scratch_id = fft_scratch%fft_scratch_id
3260
3261 fft_scratch_current => fft_scratch_first
3262 DO
3263 IF (ASSOCIATED(fft_scratch_current)) THEN
3264 IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id) THEN
3265 fft_scratch%in_use = .false.
3266 NULLIFY (fft_scratch)
3267 EXIT
3268 END IF
3269 fft_scratch_current => fft_scratch_current%fft_scratch_next
3270 ELSE
3271 ! We cannot find the scratch type in this pool
3272 cpabort("Invalid scratch type.")
3273 EXIT
3274 END IF
3275 END DO
3276
3277 END SUBROUTINE release_fft_scratch
3278
3279! **************************************************************************************************
3280!> \brief ...
3281!> \param rs ...
3282!> \param scount ...
3283!> \param sdispl ...
3284!> \param rq ...
3285!> \param rcount ...
3286!> \param rdispl ...
3287!> \param group ...
3288! **************************************************************************************************
3289 SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
3290 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rs
3291 INTEGER, DIMENSION(:), POINTER :: scount, sdispl
3292 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rq
3293 INTEGER, DIMENSION(:), POINTER :: rcount, rdispl
3294
3295 CLASS(mp_comm_type), INTENT(IN) :: group
3296
3297 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: msgin, msgout
3298 INTEGER :: ip, n, nr, ns, pos
3299 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: rreq, sreq
3300
3301 CALL group%sync()
3302 n = group%num_pe
3303 pos = group%mepos
3304 ALLOCATE (sreq(0:n - 1))
3305 ALLOCATE (rreq(0:n - 1))
3306 nr = 0
3307 DO ip = 0, n - 1
3308 IF (rcount(ip) == 0) cycle
3309 IF (ip == pos) cycle
3310 msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
3311 CALL group%irecv(msgout, ip, rreq(nr))
3312 nr = nr + 1
3313 END DO
3314 ns = 0
3315 DO ip = 0, n - 1
3316 IF (scount(ip) == 0) cycle
3317 IF (ip == pos) cycle
3318 msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
3319 CALL group%isend(msgin, ip, sreq(ns))
3320 ns = ns + 1
3321 END DO
3322 IF (rcount(pos) /= 0) THEN
3323 IF (rcount(pos) /= scount(pos)) cpabort("Invalid count.")
3324 rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
3325 END IF
3326 CALL mp_waitall(sreq(0:ns - 1))
3327 CALL mp_waitall(rreq(0:nr - 1))
3328 DEALLOCATE (sreq)
3329 DEALLOCATE (rreq)
3330 CALL group%sync()
3331
3332 END SUBROUTINE sparse_alltoall
3333
3334! **************************************************************************************************
3335!> \brief test data structures for equality. It is assumed that if they are
3336!> different for one mpi task they are different for all (??)
3337!> \param fft_size_1 ...
3338!> \param fft_size_2 ...
3339!> \param equal ...
3340! **************************************************************************************************
3341 SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
3342 TYPE(fft_scratch_sizes) :: fft_size_1, fft_size_2
3343 LOGICAL :: equal
3344
3345 equal = .true.
3346
3347 equal = equal .AND. fft_size_1%nx == fft_size_2%nx
3348 equal = equal .AND. fft_size_1%ny == fft_size_2%ny
3349 equal = equal .AND. fft_size_1%nz == fft_size_2%nz
3350
3351 equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
3352 equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
3353 equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
3354
3355 equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
3356 equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
3357 equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
3358
3359 equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
3360 equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
3361 equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
3362
3363 equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
3364 equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
3365 equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
3366 equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
3367
3368 equal = equal .AND. fft_size_1%lg == fft_size_2%lg
3369 equal = equal .AND. fft_size_1%mg == fft_size_2%mg
3370
3371 equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
3372 equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
3373
3374 equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
3375 equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
3376
3377 equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
3378
3379 equal = equal .AND. all(fft_size_1%g_pos == fft_size_2%g_pos)
3380 equal = equal .AND. all(fft_size_1%r_pos == fft_size_2%r_pos)
3381 equal = equal .AND. all(fft_size_1%r_dim == fft_size_2%r_dim)
3382
3383 equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
3384
3385 END SUBROUTINE is_equal
3386
3387END MODULE fft_tools
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
subroutine, public fft_do_init(fft_type, wisdom_file)
...
Definition fft_lib.F:63
subroutine, public fft_3d(plan, scale, zin, zout, stat)
...
Definition fft_lib.F:172
subroutine, public fft_create_plan_1dm(plan, fft_type, fsign, trans, n, m, zin, zout, plan_style)
...
Definition fft_lib.F:212
subroutine, public fft_destroy_plan(plan)
...
Definition fft_lib.F:241
integer function, public fft_library(fftlib)
Interface to FFT libraries.
Definition fft_lib.F:42
subroutine, public fft_1dm(plan, zin, zout, scale, stat)
...
Definition fft_lib.F:261
subroutine, public fft_get_lengths(fft_type, data, max_length)
...
Definition fft_lib.F:106
subroutine, public fft_create_plan_3d(plan, fft_type, fft_in_place, fsign, n, zin, zout, plan_style)
...
Definition fft_lib.F:136
subroutine, public fft_do_cleanup(fft_type, wisdom_file, ionode)
...
Definition fft_lib.F:84
Type to store data about a (1D or 3D) FFT, including FFTW plan.
Definition fft_plan.F:18
subroutine, public fft_radix_operations(radix_in, radix_out, operation)
Determine the allowed lengths of FFT's '''.
Definition fft_tools.F:239
integer, save init_fft_pool
Definition fft_tools.F:129
subroutine, public yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
...
Definition fft_tools.F:1488
integer, parameter, public bwfft
Definition fft_tools.F:146
subroutine, public yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
...
Definition fft_tools.F:1605
subroutine, public init_fft_scratch_pool()
...
Definition fft_tools.F:2575
integer, save, public fft_plan_style
Definition fft_tools.F:157
subroutine, public cube_transpose_1(cin, boin, boout, sout, fft_scratch)
...
Definition fft_tools.F:2009
integer, parameter, public fft_radix_closest
Definition fft_tools.F:147
subroutine, public fft_fw1d(n, m, trans, zin, zout, scale, stat)
Performs m 1-D forward FFT-s of size n.
Definition fft_tools.F:331
subroutine, public release_fft_scratch(fft_scratch)
...
Definition fft_tools.F:3253
subroutine, public x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
...
Definition fft_tools.F:1372
integer, save tick_fft_pool
Definition fft_tools.F:131
subroutine, public init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, plan_style)
...
Definition fft_tools.F:186
subroutine, public get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
...
Definition fft_tools.F:2869
integer, parameter, public fwfft
Definition fft_tools.F:146
integer, save, public fft_type
Definition fft_tools.F:154
integer, parameter, public fft_radix_next_odd
Definition fft_tools.F:149
subroutine, public xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
...
Definition fft_tools.F:1815
subroutine, public cube_transpose_2(cin, boin, boout, sout, fft_scratch)
...
Definition fft_tools.F:2099
type(fft_scratch_pool_type), pointer, save fft_scratch_first
Definition fft_tools.F:134
integer, save fft_pool_scratch_limit
Definition fft_tools.F:133
integer, parameter, public fft_radix_next
Definition fft_tools.F:147
subroutine, public finalize_fft(para_env, wisdom_file)
does whatever is needed to finalize the current fft setup
Definition fft_tools.F:216
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp_size
Definition kinds.F:36
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Definition of mathematical constants and functions.
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
type(mp_comm_type), parameter, public mp_comm_null
Fortran API for the offload package, which is written in C.
Definition offload_api.F:12
integer function, public offload_free_pinned_mem(buffer)
free pinned memory
Definition offload_api.F:75
integer function, public offload_malloc_pinned_mem(buffer, length)
allocate pinned memory.
Definition offload_api.F:52