(git:374b731)
Loading...
Searching...
No Matches
fft_tools.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \par History
10!> JGH (30-Nov-2000): ESSL FFT Library added
11!> JGH (05-Jan-2001): Added SGI library FFT
12!> JGH (14-Jan-2001): Added parallel 3d FFT
13!> JGH (10-Feb-2006): New interface type
14!> JGH (31-Mar-2008): Remove local allocates and reshapes (performance)
15!> Possible problems can be related with setting arrays
16!> not to zero
17!> Some interfaces could be further simplified by avoiding
18!> an initial copy. However, this assumes contiguous arrays
19!> IAB (15-Oct-2008): Moved mp_cart_sub calls out of cube_tranpose_* and into
20!> fft_scratch type, reducing number of calls dramatically
21!> IAB (05-Dec-2008): Moved all other non-essential MPI calls into scratch type
22!> IAB (09-Jan-2009): Added fft_plan_type to store FFT data, including cached FFTW plans
23!> IAB (13-Feb-2009): Extended plan caching to serial 3D FFT (fft3d_s)
24!> IAB (09-Oct-2009): Added OpenMP directives to parallel 3D FFT
25!> (c) The Numerical Algorithms Group (NAG) Ltd, 2008-2009 on behalf of the HECToR project
26!> \author JGH
27! **************************************************************************************************
29 USE iso_c_binding, ONLY: c_f_pointer,&
30 c_loc,&
31 c_ptr,&
32 c_size_t
34 USE fft_lib, ONLY: &
35 fft_1dm, fft_3d, fft_alloc, fft_create_plan_1dm, fft_create_plan_3d, fft_dealloc, &
37 USE fft_plan, ONLY: fft_plan_type
38 USE kinds, ONLY: dp,&
39 dp_size,&
40 sp
41 USE mathconstants, ONLY: z_zero
42 USE message_passing, ONLY: mp_cart_type,&
49
50!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
51
52#include "../base/base_uses.f90"
53
54 IMPLICIT NONE
55
56 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'fft_tools'
57
58 ! Types for the pool of scratch data needed in FFT routines
59 ! keep the subroutine "is_equal" up-to-date
60 ! needs a default initialization
62 INTEGER :: nx = 0, ny = 0, nz = 0
63 INTEGER :: lmax = 0, mmax = 0, nmax = 0
64 INTEGER :: mx1 = 0, mx2 = 0, mx3 = 0
65 INTEGER :: my1 = 0, my2 = 0, my3 = 0
66 INTEGER :: mz1 = 0, mz2 = 0, mz3 = 0
67 INTEGER :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
68 INTEGER :: lg = 0, mg = 0
69 INTEGER :: nbx = 0, nbz = 0
70 INTEGER :: nmray = 0, nyzray = 0
71 TYPE(mp_comm_type) :: gs_group = mp_comm_type()
72 TYPE(mp_cart_type) :: rs_group = mp_cart_type()
73 INTEGER, DIMENSION(2) :: g_pos = 0, r_pos = 0, r_dim = 0
74 INTEGER :: numtask = 0
75 END TYPE fft_scratch_sizes
76
78 INTEGER :: fft_scratch_id = 0
79 INTEGER :: tf_type = -1
80 LOGICAL :: in_use = .true.
81 TYPE(mp_comm_type) :: group = mp_comm_type()
82 INTEGER, DIMENSION(3) :: nfft = -1
83 ! to be used in cube_transpose_* routines
84 TYPE(mp_cart_type), DIMENSION(2) :: cart_sub_comm = mp_cart_type()
85 INTEGER, DIMENSION(2) :: dim = -1, pos = -1
86 ! to be used in fft3d_s
87 COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
88 :: ziptr => null(), zoptr => null()
89 ! to be used in fft3d_ps : block distribution
90 COMPLEX(KIND=dp), DIMENSION(:, :), CONTIGUOUS, POINTER &
91 :: p1buf => null(), p2buf => null(), p3buf => null(), p4buf => null(), &
92 p5buf => null(), p6buf => null(), p7buf => null()
93 ! to be used in fft3d_ps : plane distribution
94 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
95 :: r1buf => null(), r2buf => null()
96 COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
97 :: tbuf => null()
98 ! to be used in fft3d_pb
99 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
100 :: a1buf => null(), a2buf => null(), a3buf => null(), &
101 a4buf => null(), a5buf => null(), a6buf => null()
102 ! to be used in communication routines
103 INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: scount => null(), rcount => null(), sdispl => null(), rdispl => null()
104 INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgcube => null()
105 INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: xzcount => null(), yzcount => null(), xzdispl => null(), yzdispl => null()
106 INTEGER :: in = 0, mip = -1
107 REAL(kind=dp) :: rsratio = 1.0_dp
108 COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS &
109 :: xzbuf => null(), yzbuf => null()
110 COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS &
111 :: xzbuf_sgl => null(), yzbuf_sgl => null()
112 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
113 :: rbuf1 => null(), rbuf2 => null(), rbuf3 => null(), rbuf4 => null(), &
114 rbuf5 => null(), rbuf6 => null(), rr => null()
115 COMPLEX(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS &
116 :: ss => null(), tt => null()
117 INTEGER, DIMENSION(:, :), POINTER, CONTIGUOUS :: pgrid => null()
118 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: xcor => null(), zcor => null(), pzcoord => null()
120 TYPE(fft_plan_type), DIMENSION(6) :: fft_plan = fft_plan_type()
121 INTEGER :: last_tick = -1
122 END TYPE fft_scratch_type
123
125 TYPE(fft_scratch_type), POINTER :: fft_scratch => null()
126 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_next => null()
127 END TYPE fft_scratch_pool_type
128
129 INTEGER, SAVE :: init_fft_pool = 0
130 ! the clock for fft pool. Allows to identify the least recently used scratch
131 INTEGER, SAVE :: tick_fft_pool = 0
132 ! limit the number of scratch pools to fft_pool_scratch_limit.
133 INTEGER, SAVE :: fft_pool_scratch_limit = 15
135 ! END of types for the pool of scratch data needed in FFT routines
136
137 PRIVATE
138 PUBLIC :: init_fft, fft3d, finalize_fft
139 PUBLIC :: fft_alloc, fft_dealloc
141 PUBLIC :: fwfft, bwfft
143 PUBLIC :: fft_radix_next_odd
144
145 INTEGER, PARAMETER :: fwfft = +1, bwfft = -1
146 INTEGER, PARAMETER :: fft_radix_closest = 493, fft_radix_next = 494
147 INTEGER, PARAMETER :: fft_radix_allowed = 495, fft_radix_disallowed = 496
148 INTEGER, PARAMETER :: fft_radix_next_odd = 497
149
150 REAL(kind=dp), PARAMETER :: ratio_sparse_alltoall = 0.5_dp
151
152 ! these saved variables are FFT globals
153 INTEGER, SAVE :: fft_type = 0
154 LOGICAL, SAVE :: alltoall_sgl = .false.
155 LOGICAL, SAVE :: use_fftsg_sizes = .true.
156 INTEGER, SAVE :: fft_plan_style = 1
157
158 ! these are only needed for pw_gpu (-D__OFFLOAD)
161 PUBLIC :: yz_to_x, x_to_yz, xz_to_yz, yz_to_xz
163 PUBLIC :: fft_type, fft_plan_style
164
165 INTERFACE fft3d
166 MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
167 END INTERFACE
168
169! **************************************************************************************************
170
171CONTAINS
172
173! **************************************************************************************************
174!> \brief ...
175!> \param fftlib ...
176!> \param alltoall ...
177!> \param fftsg_sizes ...
178!> \param pool_limit ...
179!> \param wisdom_file ...
180!> \param plan_style ...
181!> \author JGH
182! **************************************************************************************************
183 SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
184 plan_style)
185
186 CHARACTER(LEN=*), INTENT(IN) :: fftlib
187 LOGICAL, INTENT(IN) :: alltoall, fftsg_sizes
188 INTEGER, INTENT(IN) :: pool_limit
189 CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
190 INTEGER, INTENT(IN) :: plan_style
191
192 use_fftsg_sizes = fftsg_sizes
193 alltoall_sgl = alltoall
194 fft_pool_scratch_limit = pool_limit
195 fft_type = fft_library(fftlib)
196 fft_plan_style = plan_style
197
198 IF (fft_type <= 0) cpabort("Unknown FFT library: "//trim(fftlib))
199
200 CALL fft_do_init(fft_type, wisdom_file)
201
202 ! setup the FFT scratch pool, if one is associated, clear first
203 CALL release_fft_scratch_pool()
204 CALL init_fft_scratch_pool()
205
206 END SUBROUTINE init_fft
207
208! **************************************************************************************************
209!> \brief does whatever is needed to finalize the current fft setup
210!> \param para_env ...
211!> \param wisdom_file ...
212!> \par History
213!> 10.2007 created [Joost VandeVondele]
214! **************************************************************************************************
215 SUBROUTINE finalize_fft(para_env, wisdom_file)
216 CLASS(mp_comm_type) :: para_env
217 CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
218
219! release the FFT scratch pool
220
221 CALL release_fft_scratch_pool()
222
223 ! finalize fft libs
224
225 CALL fft_do_cleanup(fft_type, wisdom_file, para_env%is_source())
226
227 END SUBROUTINE finalize_fft
228
229! **************************************************************************************************
230!> \brief Determine the allowed lengths of FFT's '''
231!> \param radix_in ...
232!> \param radix_out ...
233!> \param operation ...
234!> \par History
235!> new library structure (JGH)
236!> \author Ari Seitsonen
237! **************************************************************************************************
238 SUBROUTINE fft_radix_operations(radix_in, radix_out, operation)
239
240 INTEGER, INTENT(IN) :: radix_in
241 INTEGER, INTENT(OUT) :: radix_out
242 INTEGER, INTENT(IN) :: operation
243
244 INTEGER, PARAMETER :: fft_type_sg = 1
245
246 INTEGER :: i, iloc, ldata
247 INTEGER, ALLOCATABLE, DIMENSION(:) :: data
248
249 ldata = 1024
250 ALLOCATE (DATA(ldata))
251 DATA = -1
252
253 ! if the user wants to use fftsg sizes go for it
254 IF (use_fftsg_sizes) THEN
255 CALL fft_get_lengths(fft_type_sg, DATA, ldata)
256 ELSE
257 CALL fft_get_lengths(fft_type, DATA, ldata)
258 END IF
259
260 iloc = 0
261 DO i = 1, ldata
262 IF (DATA(i) == radix_in) THEN
263 iloc = i
264 EXIT
265 ELSE
266 IF (operation == fft_radix_allowed) THEN
267 cycle
268 ELSE IF (DATA(i) > radix_in) THEN
269 iloc = i
270 EXIT
271 END IF
272 END IF
273 END DO
274
275 IF (iloc == 0) THEN
276 cpabort("Index to radix array not found.")
277 END IF
278
279 IF (operation == fft_radix_allowed) THEN
280 IF (DATA(iloc) == radix_in) THEN
281 radix_out = fft_radix_allowed
282 ELSE
283 radix_out = fft_radix_disallowed
284 END IF
285
286 ELSE IF (operation == fft_radix_closest) THEN
287 IF (DATA(iloc) == radix_in) THEN
288 radix_out = DATA(iloc)
289 ELSE
290 IF (abs(DATA(iloc - 1) - radix_in) <= &
291 abs(DATA(iloc) - radix_in)) THEN
292 radix_out = DATA(iloc - 1)
293 ELSE
294 radix_out = DATA(iloc)
295 END IF
296 END IF
297
298 ELSE IF (operation == fft_radix_next) THEN
299 radix_out = DATA(iloc)
300
301 ELSE IF (operation == fft_radix_next_odd) THEN
302 DO i = iloc, ldata
303 IF (mod(DATA(i), 2) == 1) THEN
304 radix_out = DATA(i)
305 EXIT
306 END IF
307 END DO
308 IF (mod(radix_out, 2) == 0) THEN
309 cpabort("No odd radix found.")
310 END IF
311
312 ELSE
313 cpabort("Disallowed radix operation.")
314 END IF
315
316 DEALLOCATE (data)
317
318 END SUBROUTINE fft_radix_operations
319
320! **************************************************************************************************
321!> \brief Performs m 1-D forward FFT-s of size n.
322!> \param n size of the FFT
323!> \param m number of FFT-s
324!> \param trans shape of input and output arrays: [n x m] if it is FALSE, [m x n] if it is TRUE
325!> \param zin input array
326!> \param zout output array
327!> \param scale scaling factor
328!> \param stat status of the operation, non-zero code indicates an error
329! **************************************************************************************************
330 SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
331 INTEGER, INTENT(in) :: n, m
332 LOGICAL, INTENT(in) :: trans
333 COMPLEX(kind=dp), DIMENSION(*), INTENT(inout) :: zin, zout
334 REAL(kind=dp), INTENT(in) :: scale
335 INTEGER, INTENT(out) :: stat
336
337 CHARACTER(len=*), PARAMETER :: routinen = 'fft_fw1d'
338
339 INTEGER :: handle
340 TYPE(fft_plan_type) :: fft_plan
341
342 CALL timeset(routinen, handle)
343
344 IF (fft_type == 3) THEN
345 CALL fft_create_plan_1dm(fft_plan, fft_type, fwfft, trans, n, m, zin, zout, fft_plan_style)
346 CALL fft_1dm(fft_plan, zin, zout, scale, stat)
348 ELSE
349 CALL cp_warn(__location__, &
350 "FFT library in use cannot handle transformation of an arbitrary length.")
351 stat = 1
352 END IF
353
354 CALL timestop(handle)
355 END SUBROUTINE fft_fw1d
356
357! **************************************************************************************************
358!> \brief Calls the 3D-FFT function from the initialized library
359!> \param fsign ...
360!> \param n ...
361!> \param zin ...
362!> \param zout ...
363!> \param scale ...
364!> \param status ...
365!> \param debug ...
366!> \par History
367!> none
368!> \author JGH
369! **************************************************************************************************
370 SUBROUTINE fft3d_s(fsign, n, zin, zout, scale, status, debug)
371
372 INTEGER, INTENT(IN) :: fsign
373 INTEGER, DIMENSION(:), INTENT(INOUT) :: n
374 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
375 INTENT(INOUT) :: zin
376 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
377 INTENT(INOUT), OPTIONAL, TARGET :: zout
378 REAL(KIND=dp), INTENT(IN), OPTIONAL :: scale
379 INTEGER, INTENT(OUT), OPTIONAL :: status
380 LOGICAL, INTENT(IN), OPTIONAL :: debug
381
382 CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_s'
383
384 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
385 POINTER :: zoptr
386 COMPLEX(KIND=dp), DIMENSION(1, 1, 1), TARGET :: zdum
387 INTEGER :: handle, ld(3), lo(3), output_unit, sign, &
388 stat
389 LOGICAL :: fft_in_place, test
390 REAL(KIND=dp) :: in_sum, norm, out_sum
391 TYPE(fft_scratch_type), POINTER :: fft_scratch
392
393 CALL timeset(routinen, handle)
394 output_unit = cp_logger_get_default_io_unit()
395
396 IF (PRESENT(scale)) THEN
397 norm = scale
398 ELSE
399 norm = 1.0_dp
400 END IF
401
402 IF (PRESENT(debug)) THEN
403 test = debug
404 ELSE
405 test = .false.
406 END IF
407
408 IF (PRESENT(zout)) THEN
409 fft_in_place = .false.
410 ELSE
411 fft_in_place = .true.
412 END IF
413
414 IF (test) THEN
415 in_sum = sum(abs(zin))
416 END IF
417
418 ld(1) = SIZE(zin, 1)
419 ld(2) = SIZE(zin, 2)
420 ld(3) = SIZE(zin, 3)
421
422 IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3)) THEN
423 cpabort("Size and dimension (zin) have to be the same.")
424 END IF
425
426 sign = fsign
427 CALL get_fft_scratch(fft_scratch, tf_type=400, n=n)
428
429 IF (fft_in_place) THEN
430 zoptr => zdum
431 IF (fsign == fwfft) THEN
432 CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
433 ELSE
434 CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
435 END IF
436 ELSE
437 IF (fsign == fwfft) THEN
438 CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
439 ELSE
440 CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
441 END IF
442 END IF
443
444 CALL release_fft_scratch(fft_scratch)
445
446 IF (PRESENT(zout)) THEN
447 lo(1) = SIZE(zout, 1)
448 lo(2) = SIZE(zout, 2)
449 lo(3) = SIZE(zout, 3)
450 IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3)) THEN
451 cpabort("Size and dimension (zout) have to be the same.")
452 END IF
453 END IF
454
455 IF (PRESENT(status)) THEN
456 status = stat
457 END IF
458
459 IF (test .AND. output_unit > 0) THEN
460 IF (PRESENT(zout)) THEN
461 out_sum = sum(abs(zout))
462 WRITE (output_unit, '(A)') " Out of place 3D FFT (local) : fft3d_s"
463 WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
464 WRITE (output_unit, '(A,T60,3I7)') " Input array dimensions ", ld
465 WRITE (output_unit, '(A,T60,3I7)') " Output array dimensions ", lo
466 WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
467 WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
468 ELSE
469 out_sum = sum(abs(zin))
470 WRITE (output_unit, '(A)') " In place 3D FFT (local) : fft3d_s"
471 WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
472 WRITE (output_unit, '(A,T60,3I7)') " Input/output array dimensions ", ld
473 WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
474 WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
475 END IF
476 END IF
477
478 CALL timestop(handle)
479
480 END SUBROUTINE fft3d_s
481
482! **************************************************************************************************
483!> \brief ...
484!> \param fsign ...
485!> \param n ...
486!> \param cin ...
487!> \param gin ...
488!> \param gs_group ...
489!> \param rs_group ...
490!> \param yzp ...
491!> \param nyzray ...
492!> \param bo ...
493!> \param scale ...
494!> \param status ...
495!> \param debug ...
496! **************************************************************************************************
497 SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, &
498 bo, scale, status, debug)
499
500 INTEGER, INTENT(IN) :: fsign
501 INTEGER, DIMENSION(:), INTENT(IN) :: n
502 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
503 INTENT(INOUT) :: cin
504 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
505 INTENT(INOUT) :: gin
506 TYPE(mp_comm_type), INTENT(IN) :: gs_group
507 TYPE(mp_cart_type), INTENT(IN) :: rs_group
508 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
509 INTENT(IN) :: yzp
510 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nyzray
511 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
512 INTENT(IN) :: bo
513 REAL(KIND=dp), INTENT(IN), OPTIONAL :: scale
514 INTEGER, INTENT(OUT), OPTIONAL :: status
515 LOGICAL, INTENT(IN), OPTIONAL :: debug
516
517 CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_ps'
518
519 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
520 POINTER :: pbuf, qbuf, rbuf, sbuf
521 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
522 POINTER :: tbuf
523 INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
524 nmax, numtask, numtask_g, numtask_r, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, &
525 sign, stat
526 INTEGER, ALLOCATABLE, DIMENSION(:) :: p2p
527 LOGICAL :: test
528 REAL(KIND=dp) :: norm, sum_data
529 TYPE(fft_scratch_sizes) :: fft_scratch_size
530 TYPE(fft_scratch_type), POINTER :: fft_scratch
531
532 CALL timeset(routinen, handle)
533 output_unit = cp_logger_get_default_io_unit()
534
535 IF (PRESENT(debug)) THEN
536 test = debug
537 ELSE
538 test = .false.
539 END IF
540
541 numtask_g = gs_group%num_pe
542 g_pos = gs_group%mepos
543 numtask_r = rs_group%num_pe
544 r_dim = rs_group%num_pe_cart
545 r_pos = rs_group%mepos_cart
546 IF (numtask_g /= numtask_r) THEN
547 cpabort("Real space and G space groups are different.")
548 END IF
549 numtask = numtask_r
550
551 IF (PRESENT(scale)) THEN
552 norm = scale
553 ELSE
554 norm = 1.0_dp
555 END IF
556
557 sign = fsign
558
559 lg = SIZE(gin, 1)
560 mg = SIZE(gin, 2)
561
562 nx = SIZE(cin, 1)
563 ny = SIZE(cin, 2)
564 nz = SIZE(cin, 3)
565
566 IF (mg == 0) THEN
567 mmax = 1
568 ELSE
569 mmax = mg
570 END IF
571 lmax = max(lg, (nx*ny*nz)/mmax + 1)
572
573 ALLOCATE (p2p(0:numtask - 1))
574
575 CALL gs_group%rank_compare(rs_group, p2p)
576
577 rp = p2p(g_pos)
578 mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
579 my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
580 mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
581 mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
582
583 n1 = maxval(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
584 n2 = maxval(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
585 nmax = max((2*n2)/numtask, 2)*mx2*mz2
586 nmax = max(nmax, n1*maxval(nyzray))
587 n1 = maxval(bo(2, 1, :, 2))
588 n2 = maxval(bo(2, 3, :, 2))
589
590 fft_scratch_size%nx = nx
591 fft_scratch_size%ny = ny
592 fft_scratch_size%nz = nz
593 fft_scratch_size%lmax = lmax
594 fft_scratch_size%mmax = mmax
595 fft_scratch_size%mx1 = mx1
596 fft_scratch_size%mx2 = mx2
597 fft_scratch_size%my1 = my1
598 fft_scratch_size%mz2 = mz2
599 fft_scratch_size%lg = lg
600 fft_scratch_size%mg = mg
601 fft_scratch_size%nbx = n1
602 fft_scratch_size%nbz = n2
603 mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
604 mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
605 mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
606 fft_scratch_size%mcz1 = mcz1
607 fft_scratch_size%mcx2 = mcx2
608 fft_scratch_size%mcz2 = mcz2
609 fft_scratch_size%nmax = nmax
610 fft_scratch_size%nmray = maxval(nyzray)
611 fft_scratch_size%nyzray = nyzray(g_pos)
612 fft_scratch_size%gs_group = gs_group
613 fft_scratch_size%rs_group = rs_group
614 fft_scratch_size%g_pos = g_pos
615 fft_scratch_size%r_pos = r_pos
616 fft_scratch_size%r_dim = r_dim
617 fft_scratch_size%numtask = numtask
618
619 IF (test) THEN
620 IF (g_pos == 0 .AND. output_unit > 0) THEN
621 WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_ps"
622 WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
623 WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg, mg
624 WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", nx, ny, nz
625 END IF
626 END IF
627
628 IF (r_dim(2) > 1) THEN
629
630 !
631 ! real space is distributed over x and y coordinate
632 ! we have two stages of communication
633 !
634
635 IF (r_dim(1) == 1) THEN
636 cpabort("This processor distribution is not supported.")
637 END IF
638 CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
639
640 IF (sign == fwfft) THEN
641 ! cin -> gin
642
643 IF (test) THEN
644 sum_data = abs(sum(cin))
645 CALL gs_group%sum(sum_data)
646 IF (g_pos == 0 .AND. output_unit > 0) THEN
647 WRITE (output_unit, '(A)') " Two step communication algorithm "
648 WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
649 WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
650 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
651 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
652 END IF
653 END IF
654
655 pbuf => fft_scratch%p1buf
656 qbuf => fft_scratch%p2buf
657
658 ! FFT along z
659 CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
660
661 rbuf => fft_scratch%p3buf
662
663 IF (test) THEN
664 sum_data = abs(sum(qbuf))
665 CALL gs_group%sum(sum_data)
666 IF (g_pos == 0 .AND. output_unit > 0) THEN
667 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
668 END IF
669 END IF
670
671 ! Exchange data ( transpose of matrix )
672 CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
673
674 IF (test) THEN
675 sum_data = abs(sum(rbuf))
676 CALL gs_group%sum(sum_data)
677 IF (g_pos == 0 .AND. output_unit > 0) THEN
678 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) T", sum_data
679 END IF
680 END IF
681
682 pbuf => fft_scratch%p4buf
683
684 ! FFT along y
685 CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
686
687 qbuf => fft_scratch%p5buf
688
689 IF (test) THEN
690 sum_data = abs(sum(pbuf))
691 CALL gs_group%sum(sum_data)
692 IF (g_pos == 0 .AND. output_unit > 0) THEN
693 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) TS", sum_data
694 END IF
695 END IF
696
697 ! Exchange data ( transpose of matrix ) and sort
698 CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
699 bo(:, :, :, 2), qbuf, fft_scratch)
700
701 IF (test) THEN
702 sum_data = abs(sum(qbuf))
703 CALL gs_group%sum(sum_data)
704 IF (g_pos == 0 .AND. output_unit > 0) THEN
705 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) TS", sum_data
706 END IF
707 END IF
708
709 ! FFT along x
710 CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
711
712 IF (test) THEN
713 sum_data = abs(sum(gin))
714 CALL gs_group%sum(sum_data)
715 IF (g_pos == 0 .AND. output_unit > 0) THEN
716 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
717 END IF
718 END IF
719
720 ELSE IF (sign == bwfft) THEN
721 ! gin -> cin
722
723 IF (test) THEN
724 sum_data = abs(sum(gin))
725 CALL gs_group%sum(sum_data)
726 IF (g_pos == 0 .AND. output_unit > 0) THEN
727 WRITE (output_unit, '(A)') " Two step communication algorithm "
728 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
729 WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
730 WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
731 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
732 END IF
733 END IF
734
735 pbuf => fft_scratch%p7buf
736
737 ! FFT along x
738 CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
739
740 qbuf => fft_scratch%p4buf
741
742 IF (test) THEN
743 sum_data = abs(sum(pbuf))
744 CALL gs_group%sum(sum_data)
745 IF (g_pos == 0 .AND. output_unit > 0) THEN
746 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
747 END IF
748 END IF
749
750 ! Exchange data ( transpose of matrix ) and sort
751 CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
752 bo(:, :, :, 2), qbuf, fft_scratch)
753
754 IF (test) THEN
755 sum_data = abs(sum(qbuf))
756 CALL gs_group%sum(sum_data)
757 IF (g_pos == 0 .AND. output_unit > 0) THEN
758 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
759 END IF
760 END IF
761
762 rbuf => fft_scratch%p3buf
763
764 ! FFT along y
765 CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
766
767 pbuf => fft_scratch%p2buf
768
769 IF (test) THEN
770 sum_data = abs(sum(rbuf))
771 CALL gs_group%sum(sum_data)
772 IF (g_pos == 0 .AND. output_unit > 0) THEN
773 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
774 END IF
775 END IF
776
777 ! Exchange data ( transpose of matrix )
778 CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
779
780 IF (test) THEN
781 sum_data = abs(sum(pbuf))
782 CALL gs_group%sum(sum_data)
783 IF (g_pos == 0 .AND. output_unit > 0) THEN
784 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) T", sum_data
785 END IF
786 END IF
787
788 qbuf => fft_scratch%p1buf
789
790 ! FFT along z
791 CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
792
793 IF (test) THEN
794 sum_data = abs(sum(cin))
795 CALL gs_group%sum(sum_data)
796 IF (g_pos == 0 .AND. output_unit > 0) THEN
797 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
798 END IF
799 END IF
800
801 ELSE
802
803 cpabort("Illegal fsign parameter.")
804
805 END IF
806
807 CALL release_fft_scratch(fft_scratch)
808
809 ELSE
810
811 !
812 ! real space is only distributed over x coordinate
813 ! we have one stage of communication, after the transform of
814 ! direction x
815 !
816
817 CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
818
819 sbuf => fft_scratch%r1buf
820 tbuf => fft_scratch%tbuf
821
822 sbuf = z_zero
823 tbuf = z_zero
824
825 IF (sign == fwfft) THEN
826 ! cin -> gin
827
828 IF (test) THEN
829 sum_data = abs(sum(cin))
830 CALL gs_group%sum(sum_data)
831 IF (g_pos == 0 .AND. output_unit > 0) THEN
832 WRITE (output_unit, '(A)') " One step communication algorithm "
833 WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
834 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
835 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
836 END IF
837 END IF
838
839 ! FFT along y and z
840 CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
841 CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
842
843 IF (test) THEN
844 sum_data = abs(sum(tbuf))
845 CALL gs_group%sum(sum_data)
846 IF (g_pos == 0 .AND. output_unit > 0) THEN
847 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
848 END IF
849 END IF
850
851 ! Exchange data ( transpose of matrix ) and sort
852 CALL yz_to_x(tbuf, gs_group, g_pos, p2p, yzp, nyzray, &
853 bo(:, :, :, 2), sbuf, fft_scratch)
854
855 IF (test) THEN
856 sum_data = abs(sum(sbuf))
857 CALL gs_group%sum(sum_data)
858 IF (g_pos == 0 .AND. output_unit > 0) THEN
859 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
860 END IF
861 END IF
862 ! FFT along x
863 CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
864
865 IF (test) THEN
866 sum_data = abs(sum(gin))
867 CALL gs_group%sum(sum_data)
868 IF (g_pos == 0 .AND. output_unit > 0) THEN
869 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
870 END IF
871 END IF
872
873 ELSE IF (sign == bwfft) THEN
874 ! gin -> cin
875
876 IF (test) THEN
877 sum_data = abs(sum(gin))
878 CALL gs_group%sum(sum_data)
879 IF (g_pos == 0 .AND. output_unit > 0) THEN
880 WRITE (output_unit, '(A)') " One step communication algorithm "
881 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
882 WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
883 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
884 END IF
885 END IF
886
887 ! FFT along x
888 CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
889
890 IF (test) THEN
891 sum_data = abs(sum(sbuf))
892 CALL gs_group%sum(sum_data)
893 IF (g_pos == 0 .AND. output_unit > 0) THEN
894 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
895 END IF
896 END IF
897
898 ! Exchange data ( transpose of matrix ) and sort
899 CALL x_to_yz(sbuf, gs_group, g_pos, p2p, yzp, nyzray, &
900 bo(:, :, :, 2), tbuf, fft_scratch)
901
902 IF (test) THEN
903 sum_data = abs(sum(tbuf))
904 CALL gs_group%sum(sum_data)
905 IF (g_pos == 0 .AND. output_unit > 0) THEN
906 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
907 END IF
908 END IF
909
910 ! FFT along y and z
911 CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
912 CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
913
914 IF (test) THEN
915 sum_data = abs(sum(cin))
916 CALL gs_group%sum(sum_data)
917 IF (g_pos == 0 .AND. output_unit > 0) THEN
918 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
919 END IF
920 END IF
921 ELSE
922 cpabort("Illegal fsign parameter.")
923 END IF
924
925 CALL release_fft_scratch(fft_scratch)
926
927 END IF
928
929 DEALLOCATE (p2p)
930
931 IF (PRESENT(status)) THEN
932 status = stat
933 END IF
934 CALL timestop(handle)
935
936 END SUBROUTINE fft3d_ps
937
938! **************************************************************************************************
939!> \brief ...
940!> \param fsign ...
941!> \param n ...
942!> \param zin ...
943!> \param gin ...
944!> \param group ...
945!> \param bo ...
946!> \param scale ...
947!> \param status ...
948!> \param debug ...
949! **************************************************************************************************
950 SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, scale, status, debug)
951
952 INTEGER, INTENT(IN) :: fsign
953 INTEGER, DIMENSION(3), INTENT(IN) :: n
954 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
955 INTENT(INOUT) :: zin
956 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
957 INTENT(INOUT) :: gin
958 TYPE(mp_cart_type), INTENT(IN) :: group
959 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
960 INTENT(IN) :: bo
961 REAL(KIND=dp), INTENT(IN), OPTIONAL :: scale
962 INTEGER, INTENT(OUT), OPTIONAL :: status
963 LOGICAL, INTENT(IN), OPTIONAL :: debug
964
965 CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_pb'
966
967 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
968 POINTER :: abuf, bbuf
969 INTEGER :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
970 mcz2, mx1, mx2, mx3, my1, my2, my3, &
971 my_pos, mz1, mz2, mz3, output_unit, &
972 sign, stat
973 INTEGER, DIMENSION(2) :: dim
974 LOGICAL :: test
975 REAL(KIND=dp) :: norm, sum_data
976 TYPE(fft_scratch_sizes) :: fft_scratch_size
977 TYPE(fft_scratch_type), POINTER :: fft_scratch
978
979!------------------------------------------------------------------------------
980! "Real Space" 1) xyZ or 1) xYZ
981! 2) xYz or not used
982! "G Space" 3) Xyz or 3) XYz
983!
984! There is one communicator (2-dimensional) for all distributions
985! np = n1 * n2, where np is the total number of processors
986! If n2 = 1, we have the second case and only one transpose step is needed
987!
988! Assignment of dimensions to axis for different steps
989! First case: 1) n1=x; n2=y
990! 2) n1=x; n2=z
991! 3) n1=y; n2=z
992! Second case 1) n1=x
993! 3) n1=z
994!
995! The more general case with two communicators for the initial and final
996! distribution is not covered.
997!------------------------------------------------------------------------------
998
999 CALL timeset(routinen, handle)
1000 output_unit = cp_logger_get_default_io_unit()
1001
1002 dim = group%num_pe_cart
1003 my_pos = group%mepos
1004
1005 IF (PRESENT(debug)) THEN
1006 test = debug
1007 ELSE
1008 test = .false.
1009 END IF
1010
1011 IF (PRESENT(scale)) THEN
1012 norm = scale
1013 ELSE
1014 norm = 1.0_dp
1015 END IF
1016
1017 sign = fsign
1018
1019 IF (test) THEN
1020 lg(1) = SIZE(gin, 1)
1021 lg(2) = SIZE(gin, 2)
1022 lz(1) = SIZE(zin, 1)
1023 lz(2) = SIZE(zin, 2)
1024 lz(3) = SIZE(zin, 3)
1025 IF (my_pos == 0 .AND. output_unit > 0) THEN
1026 WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_pb"
1027 WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
1028 WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg
1029 WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", lz
1030 END IF
1031 END IF
1032
1033 mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
1034 my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
1035 mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
1036 mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
1037 my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
1038 mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
1039 mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
1040 my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
1041 mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
1042 fft_scratch_size%mx1 = mx1
1043 fft_scratch_size%mx2 = mx2
1044 fft_scratch_size%mx3 = mx3
1045 fft_scratch_size%my1 = my1
1046 fft_scratch_size%my2 = my2
1047 fft_scratch_size%my3 = my3
1048 fft_scratch_size%mz1 = mz1
1049 fft_scratch_size%mz2 = mz2
1050 fft_scratch_size%mz3 = mz3
1051 mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
1052 mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
1053 mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
1054 mcy3 = maxval(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
1055 fft_scratch_size%mcz1 = mcz1
1056 fft_scratch_size%mcx2 = mcx2
1057 fft_scratch_size%mcz2 = mcz2
1058 fft_scratch_size%mcy3 = mcy3
1059 fft_scratch_size%gs_group = group
1060 fft_scratch_size%rs_group = group
1061 fft_scratch_size%g_pos = my_pos
1062 fft_scratch_size%numtask = dim(1)*dim(2)
1063
1064 IF (dim(1) > 1 .AND. dim(2) > 1) THEN
1065
1066 !
1067 ! First case; two stages of communication
1068 !
1069
1070 CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
1071
1072 IF (sign == fwfft) THEN
1073 ! Stage 1 -> 3
1074
1075 bbuf => fft_scratch%a2buf
1076
1077 IF (test) THEN
1078 sum_data = abs(sum(zin))
1079 CALL group%sum(sum_data)
1080 IF (my_pos == 0 .AND. output_unit > 0) THEN
1081 WRITE (output_unit, '(A)') " Two step communication algorithm "
1082 WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1083 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1084 END IF
1085 END IF
1086
1087 ! FFT along z
1088 CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
1089
1090 abuf => fft_scratch%a3buf
1091
1092 IF (test) THEN
1093 sum_data = abs(sum(bbuf))
1094 CALL group%sum(sum_data)
1095 IF (my_pos == 0 .AND. output_unit > 0) THEN
1096 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1097 END IF
1098 END IF
1099
1100 CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
1101
1102 bbuf => fft_scratch%a4buf
1103
1104 IF (test) THEN
1105 sum_data = abs(sum(abuf))
1106 CALL group%sum(sum_data)
1107 IF (my_pos == 0 .AND. output_unit > 0) THEN
1108 WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1109 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1110 END IF
1111 END IF
1112
1113 ! FFT along y
1114 CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1115
1116 abuf => fft_scratch%a5buf
1117
1118 IF (test) THEN
1119 sum_data = abs(sum(bbuf))
1120 CALL group%sum(sum_data)
1121 IF (my_pos == 0 .AND. output_unit > 0) THEN
1122 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1123 END IF
1124 END IF
1125
1126 CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
1127
1128 IF (test) THEN
1129 sum_data = abs(sum(abuf))
1130 CALL group%sum(sum_data)
1131 IF (my_pos == 0 .AND. output_unit > 0) THEN
1132 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1133 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1134 END IF
1135 END IF
1136
1137 ! FFT along x
1138 CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1139
1140 IF (test) THEN
1141 sum_data = abs(sum(gin))
1142 CALL group%sum(sum_data)
1143 IF (my_pos == 0 .AND. output_unit > 0) THEN
1144 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1145 END IF
1146 END IF
1147
1148 ELSEIF (sign == bwfft) THEN
1149 ! Stage 3 -> 1
1150
1151 bbuf => fft_scratch%a5buf
1152
1153 IF (test) THEN
1154 sum_data = abs(sum(gin))
1155 CALL group%sum(sum_data)
1156 IF (my_pos == 0 .AND. output_unit > 0) THEN
1157 WRITE (output_unit, '(A)') " Two step communication algorithm "
1158 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1159 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1160 END IF
1161 END IF
1162
1163 ! FFT along x
1164 CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1165
1166 abuf => fft_scratch%a4buf
1167
1168 IF (test) THEN
1169 sum_data = abs(sum(bbuf))
1170 CALL group%sum(sum_data)
1171 IF (my_pos == 0 .AND. output_unit > 0) THEN
1172 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1173 END IF
1174 END IF
1175
1176 CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
1177
1178 bbuf => fft_scratch%a3buf
1179
1180 IF (test) THEN
1181 sum_data = abs(sum(abuf))
1182 CALL group%sum(sum_data)
1183 IF (my_pos == 0 .AND. output_unit > 0) THEN
1184 WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1185 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1186 END IF
1187 END IF
1188
1189 ! FFT along y
1190 CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1191
1192 abuf => fft_scratch%a2buf
1193
1194 IF (test) THEN
1195 sum_data = abs(sum(bbuf))
1196 CALL group%sum(sum_data)
1197 IF (my_pos == 0 .AND. output_unit > 0) THEN
1198 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1199 END IF
1200 END IF
1201
1202 CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
1203
1204 IF (test) THEN
1205 sum_data = abs(sum(abuf))
1206 CALL group%sum(sum_data)
1207 IF (my_pos == 0 .AND. output_unit > 0) THEN
1208 WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1209 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1210 END IF
1211 END IF
1212
1213 ! FFT along z
1214 CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
1215
1216 IF (test) THEN
1217 sum_data = abs(sum(zin))
1218 CALL group%sum(sum_data)
1219 IF (my_pos == 0 .AND. output_unit > 0) THEN
1220 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1221 END IF
1222 END IF
1223
1224 ELSE
1225 cpabort("Illegal fsign parameter.")
1226 END IF
1227
1228 CALL release_fft_scratch(fft_scratch)
1229
1230 ELSEIF (dim(2) == 1) THEN
1231
1232 !
1233 ! Second case; one stage of communication
1234 !
1235
1236 CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
1237
1238 IF (sign == fwfft) THEN
1239 ! Stage 1 -> 3
1240
1241 IF (test) THEN
1242 sum_data = abs(sum(zin))
1243 CALL group%sum(sum_data)
1244 IF (my_pos == 0 .AND. output_unit > 0) THEN
1245 WRITE (output_unit, '(A)') " one step communication algorithm "
1246 WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1247 WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1248 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1249 END IF
1250 END IF
1251
1252 abuf => fft_scratch%a3buf
1253 bbuf => fft_scratch%a4buf
1254 ! FFT along z and y
1255 CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
1256 CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1257
1258 abuf => fft_scratch%a5buf
1259
1260 IF (test) THEN
1261 sum_data = abs(sum(bbuf))
1262 CALL group%sum(sum_data)
1263 IF (my_pos == 0 .AND. output_unit > 0) THEN
1264 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1265 END IF
1266 END IF
1267
1268 CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
1269
1270 IF (test) THEN
1271 sum_data = abs(sum(abuf))
1272 CALL group%sum(sum_data)
1273 IF (my_pos == 0 .AND. output_unit > 0) THEN
1274 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1275 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1276 END IF
1277 END IF
1278
1279 ! FFT along x
1280 CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1281
1282 IF (test) THEN
1283 sum_data = abs(sum(gin))
1284 CALL group%sum(sum_data)
1285 IF (my_pos == 0 .AND. output_unit > 0) THEN
1286 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1287 END IF
1288 END IF
1289
1290 ELSEIF (sign == bwfft) THEN
1291 ! Stage 3 -> 1
1292
1293 IF (test) THEN
1294 sum_data = abs(sum(gin))
1295 CALL group%sum(sum_data)
1296 IF (my_pos == 0 .AND. output_unit > 0) THEN
1297 WRITE (output_unit, '(A)') " one step communication algorithm "
1298 WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1299 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1300 END IF
1301 END IF
1302
1303 bbuf => fft_scratch%a5buf
1304
1305 ! FFT along x
1306 CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1307
1308 abuf => fft_scratch%a4buf
1309
1310 IF (test) THEN
1311 sum_data = abs(sum(bbuf))
1312 CALL group%sum(sum_data)
1313 IF (my_pos == 0 .AND. output_unit > 0) THEN
1314 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1315 END IF
1316 END IF
1317
1318 CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
1319
1320 bbuf => fft_scratch%a3buf
1321
1322 IF (test) THEN
1323 sum_data = abs(sum(abuf))
1324 CALL group%sum(sum_data)
1325 IF (my_pos == 0 .AND. output_unit > 0) THEN
1326 WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1327 WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1328 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1329 END IF
1330 END IF
1331
1332 ! FFT along y
1333 CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1334
1335 ! FFT along z
1336 CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
1337
1338 IF (test) THEN
1339 sum_data = abs(sum(zin))
1340 CALL group%sum(sum_data)
1341 IF (my_pos == 0 .AND. output_unit > 0) THEN
1342 WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1343 END IF
1344 END IF
1345
1346 ELSE
1347 cpabort("Illegal fsign parameter.")
1348 END IF
1349
1350 CALL release_fft_scratch(fft_scratch)
1351
1352 ELSE
1353
1354 cpabort("This partition not implemented.")
1355
1356 END IF
1357
1358 IF (PRESENT(status)) THEN
1359 status = stat
1360 END IF
1361
1362 CALL timestop(handle)
1363
1364 END SUBROUTINE fft3d_pb
1365
1366! **************************************************************************************************
1367!> \brief ...
1368!> \param sb ...
1369!> \param group ...
1370!> \param my_pos ...
1371!> \param p2p ...
1372!> \param yzp ...
1373!> \param nray ...
1374!> \param bo ...
1375!> \param tb ...
1376!> \param fft_scratch ...
1377!> \par History
1378!> 15. Feb. 2006 : single precision all_to_all
1379!> \author JGH (14-Jan-2001)
1380! **************************************************************************************************
1381 SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1382
1383 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1384 INTENT(IN) :: sb
1385 TYPE(mp_comm_type), INTENT(IN) :: group
1386 INTEGER, INTENT(IN) :: my_pos
1387 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1388 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1389 INTENT(IN) :: yzp
1390 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1391 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1392 INTENT(IN) :: bo
1393 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1394 INTENT(INOUT) :: tb
1395 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1396
1397 CHARACTER(len=*), PARAMETER :: routinen = 'x_to_yz'
1398
1399 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1400 POINTER :: rr
1401 COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1402 POINTER :: ss, tt
1403 INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1404 nm, np, nr, nx
1405 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1406
1407 CALL timeset(routinen, handle)
1408
1409 np = SIZE(p2p)
1410 scount => fft_scratch%scount
1411 rcount => fft_scratch%rcount
1412 sdispl => fft_scratch%sdispl
1413 rdispl => fft_scratch%rdispl
1414
1415 IF (alltoall_sgl) THEN
1416 ss => fft_scratch%ss
1417 tt => fft_scratch%tt
1418 ss(:, :) = cmplx(sb(:, :), kind=sp)
1419 tt(:, :) = 0._sp
1420 ELSE
1421 rr => fft_scratch%rr
1422 END IF
1423
1424 mpr = p2p(my_pos)
1425 nm = maxval(nray(0:np - 1))
1426 nr = nray(my_pos)
1427!$OMP PARALLEL DO DEFAULT(NONE), &
1428!$OMP PRIVATE(ix,nx), &
1429!$OMP SHARED(np,p2p,bo,nr,scount,sdispl)
1430 DO ip = 0, np - 1
1431 ix = p2p(ip)
1432 nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1433 scount(ip) = nr*nx
1434 sdispl(ip) = nr*(bo(1, 1, ix) - 1)
1435 END DO
1436!$OMP END PARALLEL DO
1437 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1438!$OMP PARALLEL DO DEFAULT(NONE), &
1439!$OMP PRIVATE(nr), &
1440!$OMP SHARED(np,nray,nx,rcount,rdispl,nm)
1441 DO ip = 0, np - 1
1442 nr = nray(ip)
1443 rcount(ip) = nr*nx
1444 rdispl(ip) = nm*nx*ip
1445 END DO
1446!$OMP END PARALLEL DO
1447 IF (alltoall_sgl) THEN
1448 CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
1449 ELSE
1450 CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
1451 END IF
1452
1453 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1454!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1455!$OMP PRIVATE(ixx,ir,iy,iz,ix) &
1456!$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tt,rr,tb)
1457 DO ip = 0, np - 1
1458 DO ix = 1, nx
1459 ixx = nray(ip)*(ix - 1)
1460 IF (alltoall_sgl) THEN
1461 DO ir = 1, nray(ip)
1462 iy = yzp(1, ir, ip)
1463 iz = yzp(2, ir, ip)
1464 tb(iy, iz, ix) = tt(ir + ixx, ip)
1465 END DO
1466 ELSE
1467 DO ir = 1, nray(ip)
1468 iy = yzp(1, ir, ip)
1469 iz = yzp(2, ir, ip)
1470 tb(iy, iz, ix) = rr(ir + ixx, ip)
1471 END DO
1472 END IF
1473 END DO
1474 END DO
1475!$OMP END PARALLEL DO
1476
1477 CALL timestop(handle)
1478
1479 END SUBROUTINE x_to_yz
1480
1481! **************************************************************************************************
1482!> \brief ...
1483!> \param tb ...
1484!> \param group ...
1485!> \param my_pos ...
1486!> \param p2p ...
1487!> \param yzp ...
1488!> \param nray ...
1489!> \param bo ...
1490!> \param sb ...
1491!> \param fft_scratch ...
1492!> \par History
1493!> 15. Feb. 2006 : single precision all_to_all
1494!> \author JGH (14-Jan-2001)
1495! **************************************************************************************************
1496 SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
1497
1498 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1499 INTENT(IN) :: tb
1500 TYPE(mp_comm_type), INTENT(IN) :: group
1501 INTEGER, INTENT(IN) :: my_pos
1502 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1503 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1504 INTENT(IN) :: yzp
1505 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1506 INTEGER, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1507 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1508 INTENT(INOUT) :: sb
1509 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1510
1511 CHARACTER(len=*), PARAMETER :: routinen = 'yz_to_x'
1512
1513 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1514 POINTER :: rr
1515 COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1516 POINTER :: ss, tt
1517 INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1518 nm, np, nr, nx
1519 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1520
1521 CALL timeset(routinen, handle)
1522
1523 np = SIZE(p2p)
1524 mpr = p2p(my_pos)
1525 scount => fft_scratch%scount
1526 rcount => fft_scratch%rcount
1527 sdispl => fft_scratch%sdispl
1528 rdispl => fft_scratch%rdispl
1529
1530 IF (alltoall_sgl) THEN
1531 ss => fft_scratch%ss
1532 tt => fft_scratch%tt
1533 ss = 0._sp
1534 tt = 0._sp
1535 ELSE
1536 rr => fft_scratch%rr
1537 END IF
1538
1539 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1540!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1541!$OMP PRIVATE(ip, ixx, ir, iy, iz, ix) &
1542!$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tb,tt,rr)
1543 DO ip = 0, np - 1
1544 DO ix = 1, nx
1545 ixx = nray(ip)*(ix - 1)
1546 IF (alltoall_sgl) THEN
1547 DO ir = 1, nray(ip)
1548 iy = yzp(1, ir, ip)
1549 iz = yzp(2, ir, ip)
1550 tt(ir + ixx, ip) = cmplx(tb(iy, iz, ix), kind=sp)
1551 END DO
1552 ELSE
1553 DO ir = 1, nray(ip)
1554 iy = yzp(1, ir, ip)
1555 iz = yzp(2, ir, ip)
1556 rr(ir + ixx, ip) = tb(iy, iz, ix)
1557 END DO
1558 END IF
1559 END DO
1560 END DO
1561!$OMP END PARALLEL DO
1562 nm = maxval(nray(0:np - 1))
1563 nr = nray(my_pos)
1564!$OMP PARALLEL DO DEFAULT(NONE), &
1565!$OMP PRIVATE(ix,nx), &
1566!$OMP SHARED(np,p2p,bo,rcount,rdispl,nr)
1567 DO ip = 0, np - 1
1568 ix = p2p(ip)
1569 nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1570 rcount(ip) = nr*nx
1571 rdispl(ip) = nr*(bo(1, 1, ix) - 1)
1572 END DO
1573!$OMP END PARALLEL DO
1574 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1575!$OMP PARALLEL DO DEFAULT(NONE), &
1576!$OMP PRIVATE(nr), &
1577!$OMP SHARED(np,nray,scount,sdispl,nx,nm)
1578 DO ip = 0, np - 1
1579 nr = nray(ip)
1580 scount(ip) = nr*nx
1581 sdispl(ip) = nm*nx*ip
1582 END DO
1583!$OMP END PARALLEL DO
1584
1585 IF (alltoall_sgl) THEN
1586 CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
1587 sb = ss
1588 ELSE
1589 CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
1590 END IF
1591
1592 CALL timestop(handle)
1593
1594 END SUBROUTINE yz_to_x
1595
1596! **************************************************************************************************
1597!> \brief ...
1598!> \param sb ...
1599!> \param group ...
1600!> \param dims ...
1601!> \param my_pos ...
1602!> \param p2p ...
1603!> \param yzp ...
1604!> \param nray ...
1605!> \param bo ...
1606!> \param tb ...
1607!> \param fft_scratch ...
1608!> \par History
1609!> 15. Feb. 2006 : single precision all_to_all
1610!> \author JGH (18-Jan-2001)
1611! **************************************************************************************************
1612 SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1613
1614 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1615 INTENT(IN) :: sb
1616
1617 CLASS(mp_comm_type), INTENT(IN) :: group
1618 INTEGER, DIMENSION(2), INTENT(IN) :: dims
1619 INTEGER, INTENT(IN) :: my_pos
1620 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1621 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1622 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1623 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1624 COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1625 TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1626
1627 CHARACTER(len=*), PARAMETER :: routinen = 'yz_to_xz'
1628
1629 COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1630 COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1631 INTEGER :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
1632 jj, jx, jy, jz, myx, myz, np, npx, &
1633 npz, nx, nz, rs_pos
1634 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1635 xcor, zcor
1636 INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1637
1638 CALL timeset(routinen, handle)
1639
1640 np = SIZE(p2p)
1641
1642 rs_pos = p2p(my_pos)
1643
1644 IF (alltoall_sgl) THEN
1645 yzbuf_sgl => fft_scratch%yzbuf_sgl
1646 xzbuf_sgl => fft_scratch%xzbuf_sgl
1647 ELSE
1648 yzbuf => fft_scratch%yzbuf
1649 xzbuf => fft_scratch%xzbuf
1650 END IF
1651 npx = dims(1)
1652 npz = dims(2)
1653 pgrid => fft_scratch%pgrid
1654 xcor => fft_scratch%xcor
1655 zcor => fft_scratch%zcor
1656 pzcoord => fft_scratch%pzcoord
1657 scount => fft_scratch%scount
1658 rcount => fft_scratch%rcount
1659 sdispl => fft_scratch%sdispl
1660 rdispl => fft_scratch%rdispl
1661
1662 nx = SIZE(sb, 2)
1663
1664! If the send and recv counts are not already cached, then
1665! calculate and store them
1666 IF (fft_scratch%in == 0) THEN
1667
1668 scount = 0
1669
1670 DO ix = 0, npx - 1
1671 ip = pgrid(ix, 0)
1672 xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1673 END DO
1674 DO iz = 0, npz - 1
1675 ip = pgrid(0, iz)
1676 zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1677 END DO
1678 DO jx = 1, nx
1679 IF (alltoall_sgl) THEN
1680 DO ir = 1, nray(my_pos)
1681 jy = yzp(1, ir, my_pos)
1682 jz = yzp(2, ir, my_pos)
1683 ip = pgrid(xcor(jx), zcor(jz))
1684 scount(ip) = scount(ip) + 1
1685 END DO
1686 ELSE
1687 DO ir = 1, nray(my_pos)
1688 jy = yzp(1, ir, my_pos)
1689 jz = yzp(2, ir, my_pos)
1690 ip = pgrid(xcor(jx), zcor(jz))
1691 scount(ip) = scount(ip) + 1
1692 END DO
1693 END IF
1694 END DO
1695
1696 CALL group%alltoall(scount, rcount, 1)
1697 fft_scratch%yzcount = scount
1698 fft_scratch%xzcount = rcount
1699
1700 ! Work out the correct displacements in the buffers
1701 sdispl(0) = 0
1702 rdispl(0) = 0
1703 DO ip = 1, np - 1
1704 sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1705 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1706 END DO
1707
1708 fft_scratch%yzdispl = sdispl
1709 fft_scratch%xzdispl = rdispl
1710
1711 icrs = 0
1712 DO ip = 0, np - 1
1713 IF (scount(ip) /= 0) icrs = icrs + 1
1714 IF (rcount(ip) /= 0) icrs = icrs + 1
1715 END DO
1716 CALL group%sum(icrs)
1717 fft_scratch%rsratio = real(icrs, kind=dp)/(real(2*np, kind=dp)*real(np, kind=dp))
1718
1719 fft_scratch%in = 1
1720 ELSE
1721 scount = fft_scratch%yzcount
1722 rcount = fft_scratch%xzcount
1723 sdispl = fft_scratch%yzdispl
1724 rdispl = fft_scratch%xzdispl
1725 END IF
1726
1727! Do the actual packing
1728!$OMP PARALLEL DO DEFAULT(NONE), &
1729!$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1730!$OMP SHARED(np,p2p,pzcoord,bo,nray,yzp,zcor),&
1731!$OMP SHARED(yzbuf,sb,scount,sdispl,my_pos),&
1732!$OMP SHARED(yzbuf_sgl,alltoall_sgl)
1733 DO ip = 0, np - 1
1734 IF (scount(ip) == 0) cycle
1735 ipl = p2p(ip)
1736 jj = 0
1737 nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1738 DO ir = 1, nray(my_pos)
1739 jz = yzp(2, ir, my_pos)
1740 IF (zcor(jz) == pzcoord(ipl)) THEN
1741 jj = jj + 1
1742 jy = yzp(1, ir, my_pos)
1743 IF (alltoall_sgl) THEN
1744 DO jx = 0, nx - 1
1745 yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = cmplx(sb(ir, jx + bo(1, 1, ipl)), kind=sp)
1746 END DO
1747 ELSE
1748 DO jx = 0, nx - 1
1749 yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
1750 END DO
1751 END IF
1752 END IF
1753 END DO
1754 END DO
1755!$OMP END PARALLEL DO
1756
1757 IF (alltoall_sgl) THEN
1758 CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
1759 ELSE
1760 IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1761 CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
1762 ELSE
1763 CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
1764 END IF
1765 END IF
1766
1767 myx = fft_scratch%sizes%r_pos(1)
1768 myz = fft_scratch%sizes%r_pos(2)
1769 nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
1770
1771!$OMP PARALLEL DO DEFAULT(NONE), &
1772!$OMP PRIVATE(ipr,jj,ir,jx,jy,jz),&
1773!$OMP SHARED(tb,np,p2p,bo,rs_pos,nray),&
1774!$OMP SHARED(yzp,alltoall_sgl,zcor,myz),&
1775!$OMP SHARED(xzbuf,xzbuf_sgl,nz,rdispl)
1776 DO ip = 0, np - 1
1777 ipr = p2p(ip)
1778 jj = 0
1779 DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
1780 DO ir = 1, nray(ip)
1781 jz = yzp(2, ir, ip)
1782 IF (alltoall_sgl) THEN
1783 IF (zcor(jz) == myz) THEN
1784 jj = jj + 1
1785 jy = yzp(1, ir, ip)
1786 jz = jz - bo(1, 3, rs_pos) + 1
1787 tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
1788 END IF
1789 ELSE
1790 IF (zcor(jz) == myz) THEN
1791 jj = jj + 1
1792 jy = yzp(1, ir, ip)
1793 jz = jz - bo(1, 3, rs_pos) + 1
1794 tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
1795 END IF
1796 END IF
1797 END DO
1798 END DO
1799 END DO
1800!$OMP END PARALLEL DO
1801
1802 CALL timestop(handle)
1803
1804 END SUBROUTINE yz_to_xz
1805
1806! **************************************************************************************************
1807!> \brief ...
1808!> \param sb ...
1809!> \param group ...
1810!> \param dims ...
1811!> \param my_pos ...
1812!> \param p2p ...
1813!> \param yzp ...
1814!> \param nray ...
1815!> \param bo ...
1816!> \param tb ...
1817!> \param fft_scratch ...
1818!> \par History
1819!> 15. Feb. 2006 : single precision all_to_all
1820!> \author JGH (19-Jan-2001)
1821! **************************************************************************************************
1822 SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1823
1824 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1825 INTENT(IN) :: sb
1826
1827 CLASS(mp_comm_type), INTENT(IN) :: group
1828 INTEGER, DIMENSION(2), INTENT(IN) :: dims
1829 INTEGER, INTENT(IN) :: my_pos
1830 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1831 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1832 INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1833 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1834 COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1835 TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1836
1837 CHARACTER(len=*), PARAMETER :: routinen = 'xz_to_yz'
1838
1839 COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1840 COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1841 INTEGER :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
1842 jj, jx, jy, jz, mp, myx, myz, np, npx, &
1843 npz, nx, nz
1844 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1845 xcor, zcor
1846 INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1847
1848 CALL timeset(routinen, handle)
1849
1850 np = SIZE(p2p)
1851
1852 IF (alltoall_sgl) THEN
1853 yzbuf_sgl => fft_scratch%yzbuf_sgl
1854 xzbuf_sgl => fft_scratch%xzbuf_sgl
1855 ELSE
1856 yzbuf => fft_scratch%yzbuf
1857 xzbuf => fft_scratch%xzbuf
1858 END IF
1859 npx = dims(1)
1860 npz = dims(2)
1861 pgrid => fft_scratch%pgrid
1862 xcor => fft_scratch%xcor
1863 zcor => fft_scratch%zcor
1864 pzcoord => fft_scratch%pzcoord
1865 scount => fft_scratch%scount
1866 rcount => fft_scratch%rcount
1867 sdispl => fft_scratch%sdispl
1868 rdispl => fft_scratch%rdispl
1869
1870! If the send and recv counts are not already cached, then
1871! calculate and store them
1872 IF (fft_scratch%in == 0) THEN
1873
1874 rcount = 0
1875 nx = maxval(bo(2, 1, :))
1876
1877 DO ix = 0, npx - 1
1878 ip = pgrid(ix, 0)
1879 xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1880 END DO
1881 DO iz = 0, npz - 1
1882 ip = pgrid(0, iz)
1883 zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1884 END DO
1885 DO jx = 1, nx
1886 DO ir = 1, nray(my_pos)
1887 jy = yzp(1, ir, my_pos)
1888 jz = yzp(2, ir, my_pos)
1889 ip = pgrid(xcor(jx), zcor(jz))
1890 rcount(ip) = rcount(ip) + 1
1891 END DO
1892 END DO
1893
1894 CALL group%alltoall(rcount, scount, 1)
1895 fft_scratch%xzcount = scount
1896 fft_scratch%yzcount = rcount
1897
1898 ! Work out the correct displacements in the buffers
1899 sdispl(0) = 0
1900 rdispl(0) = 0
1901 DO ip = 1, np - 1
1902 sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1903 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1904 END DO
1905
1906 fft_scratch%xzdispl = sdispl
1907 fft_scratch%yzdispl = rdispl
1908
1909 icrs = 0
1910 DO ip = 0, np - 1
1911 IF (scount(ip) /= 0) icrs = icrs + 1
1912 IF (rcount(ip) /= 0) icrs = icrs + 1
1913 END DO
1914 CALL group%sum(icrs)
1915 fft_scratch%rsratio = real(icrs, kind=dp)/(real(2*np, kind=dp)*real(np, kind=dp))
1916
1917 fft_scratch%in = 1
1918 ELSE
1919 scount = fft_scratch%xzcount
1920 rcount = fft_scratch%yzcount
1921 sdispl = fft_scratch%xzdispl
1922 rdispl = fft_scratch%yzdispl
1923 END IF
1924
1925! Now do the actual packing
1926 myx = fft_scratch%sizes%r_pos(1)
1927 myz = fft_scratch%sizes%r_pos(2)
1928 mp = p2p(my_pos)
1929 nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
1930 nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
1931
1932!$OMP PARALLEL DO DEFAULT(NONE), &
1933!$OMP PRIVATE(jj,ipl,ir,jx,jy,jz,ixx),&
1934!$OMP SHARED(np,p2p,nray,yzp,zcor,myz,bo,mp),&
1935!$OMP SHARED(alltoall_sgl,nx,scount,sdispl),&
1936!$OMP SHARED(xzbuf,xzbuf_sgl,sb,nz)
1937 DO ip = 0, np - 1
1938 jj = 0
1939 ipl = p2p(ip)
1940 DO ir = 1, nray(ip)
1941 jz = yzp(2, ir, ip)
1942 IF (zcor(jz) == myz) THEN
1943 jj = jj + 1
1944 jy = yzp(1, ir, ip)
1945 jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
1946 IF (alltoall_sgl) THEN
1947 DO jx = 0, nx - 1
1948 ixx = jj + jx*scount(ipl)/nx
1949 xzbuf_sgl(ixx + sdispl(ipl)) = cmplx(sb(jy, jz + jx*nz), kind=sp)
1950 END DO
1951 ELSE
1952 DO jx = 0, nx - 1
1953 ixx = jj + jx*scount(ipl)/nx
1954 xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
1955 END DO
1956 END IF
1957 END IF
1958 END DO
1959 END DO
1960!$OMP END PARALLEL DO
1961
1962 IF (alltoall_sgl) THEN
1963 CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
1964 ELSE
1965 IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1966 CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
1967 ELSE
1968 CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
1969 END IF
1970 END IF
1971
1972!$OMP PARALLEL DO DEFAULT(NONE), &
1973!$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1974!$OMP SHARED(p2p,pzcoord,bo,nray,my_pos,yzp),&
1975!$OMP SHARED(rcount,rdispl,tb,yzbuf,zcor),&
1976!$OMP SHARED(yzbuf_sgl,alltoall_sgl,np)
1977 DO ip = 0, np - 1
1978 IF (rcount(ip) == 0) cycle
1979 ipl = p2p(ip)
1980 jj = 0
1981 nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1982 DO ir = 1, nray(my_pos)
1983 jz = yzp(2, ir, my_pos)
1984 IF (zcor(jz) == pzcoord(ipl)) THEN
1985 jj = jj + 1
1986 jy = yzp(1, ir, my_pos)
1987 IF (alltoall_sgl) THEN
1988 DO jx = 0, nx - 1
1989 tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
1990 END DO
1991 ELSE
1992 DO jx = 0, nx - 1
1993 tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
1994 END DO
1995 END IF
1996 END IF
1997 END DO
1998 END DO
1999!$OMP END PARALLEL DO
2000
2001 CALL timestop(handle)
2002
2003 END SUBROUTINE xz_to_yz
2004
2005! **************************************************************************************************
2006!> \brief ...
2007!> \param cin ...
2008!> \param boin ...
2009!> \param boout ...
2010!> \param sout ...
2011!> \param fft_scratch ...
2012!> \par History
2013!> none
2014!> \author JGH (20-Jan-2001)
2015! **************************************************************************************************
2016 SUBROUTINE cube_transpose_1(cin, boin, boout, sout, fft_scratch)
2017
2018 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2019 INTENT(IN) :: cin
2020 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2021 INTENT(IN) :: boin, boout
2022 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2023 INTENT(OUT) :: sout
2024 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2025
2026 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_1'
2027
2028 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2029 POINTER :: rbuf
2030 INTEGER :: handle, ip, ipl, ir, is, ixy, iz, mip, &
2031 mz, np, nx, ny, nz
2032 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2033 INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2034 INTEGER, DIMENSION(2) :: dim, pos
2035
2036 CALL timeset(routinen, handle)
2037
2038 mip = fft_scratch%mip
2039 dim = fft_scratch%dim
2040 pos = fft_scratch%pos
2041 scount => fft_scratch%scount
2042 rcount => fft_scratch%rcount
2043 sdispl => fft_scratch%sdispl
2044 rdispl => fft_scratch%rdispl
2045 pgrid => fft_scratch%pgcube
2046 np = dim(2)
2047
2048 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2049 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2050
2051!$OMP PARALLEL DO DEFAULT(NONE), &
2052!$OMP PRIVATE(ipl,ny), &
2053!$OMP SHARED(np,pgrid,boout,scount,sdispl,nx,nz)
2054 DO ip = 0, np - 1
2055 ipl = pgrid(ip, 2)
2056 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2057 scount(ip) = nx*nz*ny
2058 sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
2059 END DO
2060!$OMP END PARALLEL DO
2061 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2062 mz = maxval(boin(2, 3, :) - boin(1, 3, :) + 1)
2063!$OMP PARALLEL DO DEFAULT(NONE), &
2064!$OMP PRIVATE(ipl,nz), &
2065!$OMP SHARED(np,pgrid,boin,nx,ny,rcount,rdispl,mz)
2066 DO ip = 0, np - 1
2067 ipl = pgrid(ip, 2)
2068 nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2069 rcount(ip) = nx*nz*ny
2070 rdispl(ip) = nx*ny*mz*ip
2071 END DO
2072!$OMP END PARALLEL DO
2073
2074 rbuf => fft_scratch%rbuf1
2075
2076 CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2077
2078!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2079!$OMP PRIVATE(ip,ipl,nz,iz,is,ir) &
2080!$OMP SHARED(nx,ny,np,pgrid,boin,sout,rbuf)
2081 DO ixy = 1, nx*ny
2082 DO ip = 0, np - 1
2083 ipl = pgrid(ip, 2)
2084 nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2085 DO iz = 1, nz
2086 is = boin(1, 3, ipl) + iz - 1
2087 ir = iz + nz*(ixy - 1)
2088 sout(is, ixy) = rbuf(ir, ip)
2089 END DO
2090 END DO
2091 END DO
2092!$OMP END PARALLEL DO
2093
2094 CALL timestop(handle)
2095
2096 END SUBROUTINE cube_transpose_1
2097
2098! **************************************************************************************************
2099!> \brief ...
2100!> \param cin ...
2101!> \param boin ...
2102!> \param boout ...
2103!> \param sout ...
2104!> \param fft_scratch ...
2105! **************************************************************************************************
2106 SUBROUTINE cube_transpose_2(cin, boin, boout, sout, fft_scratch)
2107
2108 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2109 INTENT(IN) :: cin
2110 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2111 INTENT(IN) :: boin, boout
2112 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2113 INTENT(OUT) :: sout
2114 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2115
2116 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_2'
2117
2118 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2119 POINTER :: rbuf
2120 INTEGER :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
2121 np, nx, ny, nz
2122 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2123 INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2124 INTEGER, DIMENSION(2) :: dim, pos
2125 TYPE(mp_comm_type) :: sub_group
2126
2127 CALL timeset(routinen, handle)
2128
2129 sub_group = fft_scratch%cart_sub_comm(2)
2130 mip = fft_scratch%mip
2131 dim = fft_scratch%dim
2132 pos = fft_scratch%pos
2133 scount => fft_scratch%scount
2134 rcount => fft_scratch%rcount
2135 sdispl => fft_scratch%sdispl
2136 rdispl => fft_scratch%rdispl
2137 pgrid => fft_scratch%pgcube
2138 np = dim(2)
2139
2140 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2141 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2142 mz = maxval(boout(2, 3, :) - boout(1, 3, :) + 1)
2143
2144 rbuf => fft_scratch%rbuf2
2145
2146!$OMP PARALLEL DEFAULT(NONE), &
2147!$OMP PRIVATE(ip,ipl,nz,iz,ir), &
2148!$OMP SHARED(nx,ny,np,pgrid,boout,rbuf,cin,scount,sdispl,mz)
2149!$OMP DO COLLAPSE(2)
2150 DO ixy = 1, nx*ny
2151 DO ip = 0, np - 1
2152 ipl = pgrid(ip, 2)
2153 nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2154 DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
2155 ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
2156 rbuf(ir, ip) = cin(iz, ixy)
2157 END DO
2158 END DO
2159 END DO
2160!$OMP END DO
2161!$OMP DO
2162 DO ip = 0, np - 1
2163 ipl = pgrid(ip, 2)
2164 nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2165 scount(ip) = nx*ny*nz
2166 sdispl(ip) = nx*ny*mz*ip
2167 END DO
2168!$OMP END DO
2169!$OMP END PARALLEL
2170 nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
2171!$OMP PARALLEL DO DEFAULT(NONE), &
2172!$OMP PRIVATE(ipl,ny), &
2173!$OMP SHARED(np,pgrid,boin,nx,nz,rcount,rdispl)
2174 DO ip = 0, np - 1
2175 ipl = pgrid(ip, 2)
2176 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2177 rcount(ip) = nx*ny*nz
2178 rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
2179 END DO
2180!$OMP END PARALLEL DO
2181
2182 CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2183
2184 CALL timestop(handle)
2185
2186 END SUBROUTINE cube_transpose_2
2187
2188! **************************************************************************************************
2189!> \brief ...
2190!> \param cin ...
2191!> \param boin ...
2192!> \param boout ...
2193!> \param sout ...
2194!> \param fft_scratch ...
2195! **************************************************************************************************
2196 SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
2197
2198 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2199 INTENT(IN) :: cin
2200 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2201 INTENT(IN) :: boin, boout
2202 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2203 INTENT(OUT) :: sout
2204 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2205
2206 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_3'
2207
2208 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2209 POINTER :: rbuf
2210 INTEGER :: handle, ip, ipl, ir, is, ixz, iy, lb, &
2211 mip, my, my_id, np, num_threads, nx, &
2212 ny, nz, ub
2213 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2214 INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2215 INTEGER, DIMENSION(2) :: dim, pos
2216 TYPE(mp_comm_type) :: sub_group
2217
2218 CALL timeset(routinen, handle)
2219
2220 sub_group = fft_scratch%cart_sub_comm(1)
2221 mip = fft_scratch%mip
2222 dim = fft_scratch%dim
2223 pos = fft_scratch%pos
2224 np = dim(1)
2225 scount => fft_scratch%scount
2226 rcount => fft_scratch%rcount
2227 sdispl => fft_scratch%sdispl
2228 rdispl => fft_scratch%rdispl
2229 pgrid => fft_scratch%pgcube
2230
2231 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2232 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2233!$OMP PARALLEL DO DEFAULT(NONE), &
2234!$OMP PRIVATE(ipl, nx), &
2235!$OMP SHARED(np,pgrid,boout,ny,nz,scount,sdispl)
2236 DO ip = 0, np - 1
2237 ipl = pgrid(ip, 1)
2238 nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
2239 scount(ip) = nx*nz*ny
2240 sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
2241 END DO
2242!$OMP END PARALLEL DO
2243 nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2244 my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2245!$OMP PARALLEL DO DEFAULT(NONE), &
2246!$OMP PRIVATE(ipl, ny), &
2247!$OMP SHARED(np,pgrid,boin,nx,nz,my,rcount,rdispl)
2248 DO ip = 0, np - 1
2249 ipl = pgrid(ip, 1)
2250 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2251 rcount(ip) = nx*nz*ny
2252 rdispl(ip) = nx*my*nz*ip
2253 END DO
2254!$OMP END PARALLEL DO
2255
2256 rbuf => fft_scratch%rbuf3
2257 num_threads = 1
2258 my_id = 0
2259!$OMP PARALLEL DEFAULT(NONE), &
2260!$OMP PRIVATE(NUM_THREADS, my_id, lb, ub) &
2261!$OMP SHARED(rbuf)
2262!$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2263!$ my_id = omp_get_thread_num()
2264 IF (my_id < num_threads) THEN
2265 lb = (SIZE(rbuf, 2)*my_id)/num_threads
2266 ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2267 rbuf(:, lb:ub) = 0.0_dp
2268 END IF
2269!$OMP END PARALLEL
2270
2271 CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2272
2273!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2274!$OMP PRIVATE(ip,ipl,ny,iy,is,ir) &
2275!$OMP SHARED(nx,nz,np,pgrid,boin,rbuf,sout)
2276 DO ixz = 1, nx*nz
2277 DO ip = 0, np - 1
2278 ipl = pgrid(ip, 1)
2279 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2280 DO iy = 1, ny
2281 is = boin(1, 2, ipl) + iy - 1
2282 ir = iy + ny*(ixz - 1)
2283 sout(is, ixz) = rbuf(ir, ip)
2284 END DO
2285 END DO
2286 END DO
2287!$OMP END PARALLEL DO
2288
2289 CALL timestop(handle)
2290
2291 END SUBROUTINE cube_transpose_3
2292
2293! **************************************************************************************************
2294!> \brief ...
2295!> \param cin ...
2296!> \param boin ...
2297!> \param boout ...
2298!> \param sout ...
2299!> \param fft_scratch ...
2300! **************************************************************************************************
2301 SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
2302
2303 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2304 INTENT(IN) :: cin
2305 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2306 INTENT(IN) :: boin, boout
2307 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2308 INTENT(OUT) :: sout
2309 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2310
2311 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_4'
2312
2313 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2314 POINTER :: rbuf
2315 INTEGER :: handle, ip, ipl, ir, iy, izx, lb, mip, &
2316 my, my_id, np, num_threads, nx, ny, &
2317 nz, ub
2318 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2319 INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2320 INTEGER, DIMENSION(2) :: dim, pos
2321 TYPE(mp_comm_type) :: sub_group
2322
2323 CALL timeset(routinen, handle)
2324
2325 sub_group = fft_scratch%cart_sub_comm(1)
2326 mip = fft_scratch%mip
2327 dim = fft_scratch%dim
2328 pos = fft_scratch%pos
2329 np = dim(1)
2330 scount => fft_scratch%scount
2331 rcount => fft_scratch%rcount
2332 sdispl => fft_scratch%sdispl
2333 rdispl => fft_scratch%rdispl
2334 pgrid => fft_scratch%pgcube
2335
2336 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2337 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2338 my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2339
2340 rbuf => fft_scratch%rbuf4
2341 num_threads = 1
2342 my_id = 0
2343!$OMP PARALLEL DEFAULT(NONE), &
2344!$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ipl,ny,iy,ir), &
2345!$OMP SHARED(rbuf,nz,nx,np,pgrid,boout,cin,my,scount,sdispl)
2346!$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2347!$ my_id = omp_get_thread_num()
2348 IF (my_id < num_threads) THEN
2349 lb = (SIZE(rbuf, 2)*my_id)/num_threads
2350 ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2351 rbuf(:, lb:ub) = 0.0_dp
2352 END IF
2353!$OMP BARRIER
2354
2355!$OMP DO COLLAPSE(2)
2356 DO izx = 1, nz*nx
2357 DO ip = 0, np - 1
2358 ipl = pgrid(ip, 1)
2359 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2360 DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
2361 ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
2362 rbuf(ir, ip) = cin(iy, izx)
2363 END DO
2364 END DO
2365 END DO
2366!$OMP END DO
2367!$OMP DO
2368 DO ip = 0, np - 1
2369 ipl = pgrid(ip, 1)
2370 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2371 scount(ip) = nx*ny*nz
2372 sdispl(ip) = nx*nz*my*ip
2373 END DO
2374!$OMP END DO
2375!$OMP END PARALLEL
2376 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2377!$OMP PARALLEL DO DEFAULT(NONE), &
2378!$OMP PRIVATE(ipl,nx), &
2379!$OMP SHARED(np,pgrid,boin,rcount,rdispl,ny,nz)
2380 DO ip = 0, np - 1
2381 ipl = pgrid(ip, 1)
2382 nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
2383 rcount(ip) = nx*ny*nz
2384 rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
2385 END DO
2386!$OMP END PARALLEL DO
2387
2388 CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2389
2390 CALL timestop(handle)
2391
2392 END SUBROUTINE cube_transpose_4
2393
2394! **************************************************************************************************
2395!> \brief ...
2396!> \param cin ...
2397!> \param group ...
2398!> \param boin ...
2399!> \param boout ...
2400!> \param sout ...
2401!> \param fft_scratch ...
2402! **************************************************************************************************
2403 SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
2404
2405 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2406 INTENT(IN) :: cin
2407
2408 CLASS(mp_comm_type), INTENT(IN) :: group
2409 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2410 COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2411 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2412
2413 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_5'
2414
2415 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2416 INTEGER :: handle, ip, ir, is, ixz, iy, lb, mip, &
2417 my, my_id, np, num_threads, nx, ny, &
2418 nz, ub
2419 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2420
2421 CALL timeset(routinen, handle)
2422
2423 np = fft_scratch%sizes%numtask
2424 mip = fft_scratch%mip
2425 scount => fft_scratch%scount
2426 rcount => fft_scratch%rcount
2427 sdispl => fft_scratch%sdispl
2428 rdispl => fft_scratch%rdispl
2429
2430 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2431 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2432!$OMP PARALLEL DO DEFAULT(NONE), &
2433!$OMP PRIVATE(nx), &
2434!$OMP SHARED(np,boout,ny,nz,scount,sdispl)
2435 DO ip = 0, np - 1
2436 nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
2437 scount(ip) = nx*nz*ny
2438 sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
2439 END DO
2440!$OMP END PARALLEL DO
2441 nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2442 my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2443!$OMP PARALLEL DO DEFAULT(NONE), &
2444!$OMP PRIVATE(ny), &
2445!$OMP SHARED(np,boin,nx,nz,rcount,rdispl,my)
2446 DO ip = 0, np - 1
2447 ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2448 rcount(ip) = nx*nz*ny
2449 rdispl(ip) = nx*my*nz*ip
2450 END DO
2451!$OMP END PARALLEL DO
2452
2453 rbuf => fft_scratch%rbuf5
2454 num_threads = 1
2455 my_id = 0
2456!$OMP PARALLEL DEFAULT(NONE), &
2457!$OMP PRIVATE(NUM_THREADS, my_id, lb, ub), &
2458!$OMP SHARED(rbuf)
2459!$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2460!$ my_id = omp_get_thread_num()
2461 IF (my_id < num_threads) THEN
2462 lb = (SIZE(rbuf, 2)*my_id)/num_threads
2463 ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2464 rbuf(:, lb:ub) = 0.0_dp
2465 END IF
2466!$OMP END PARALLEL
2467
2468 CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2469
2470!$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2471!$OMP PRIVATE(ip,ny,iy,is,ir) &
2472!$OMP SHARED(nx,nz,np,boin,sout,rbuf)
2473 DO ixz = 1, nx*nz
2474 DO ip = 0, np - 1
2475 ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2476 DO iy = 1, ny
2477 is = boin(1, 2, ip) + iy - 1
2478 ir = iy + ny*(ixz - 1)
2479 sout(is, ixz) = rbuf(ir, ip)
2480 END DO
2481 END DO
2482 END DO
2483!$OMP END PARALLEL DO
2484
2485 CALL timestop(handle)
2486
2487 END SUBROUTINE cube_transpose_5
2488
2489! **************************************************************************************************
2490!> \brief ...
2491!> \param cin ...
2492!> \param group ...
2493!> \param boin ...
2494!> \param boout ...
2495!> \param sout ...
2496!> \param fft_scratch ...
2497! **************************************************************************************************
2498 SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
2499
2500 COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2501 INTENT(IN) :: cin
2502
2503 CLASS(mp_comm_type), INTENT(IN) :: group
2504 INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2505 COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2506 TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2507
2508 CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_6'
2509
2510 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2511 INTEGER :: handle, ip, ir, iy, izx, lb, mip, my, &
2512 my_id, np, num_threads, nx, ny, nz, ub
2513 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2514
2515 CALL timeset(routinen, handle)
2516
2517 np = fft_scratch%sizes%numtask
2518 mip = fft_scratch%mip
2519 scount => fft_scratch%scount
2520 rcount => fft_scratch%rcount
2521 sdispl => fft_scratch%sdispl
2522 rdispl => fft_scratch%rdispl
2523
2524 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2525 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2526 my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2527
2528 rbuf => fft_scratch%rbuf5
2529 num_threads = 1
2530 my_id = 0
2531!$OMP PARALLEL DEFAULT(NONE), &
2532!$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ny,iy,ir), &
2533!$OMP SHARED(rbuf,nx,nz,np,boout,cin,my,scount,sdispl)
2534!$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2535!$ my_id = omp_get_thread_num()
2536 IF (my_id < num_threads) THEN
2537 lb = (SIZE(rbuf, 2)*my_id)/num_threads
2538 ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2539 rbuf(:, lb:ub) = 0.0_dp
2540 END IF
2541!$OMP BARRIER
2542
2543!$OMP DO COLLAPSE(2)
2544 DO izx = 1, nz*nx
2545 DO ip = 0, np - 1
2546 ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2547 DO iy = boout(1, 2, ip), boout(2, 2, ip)
2548 ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
2549 rbuf(ir, ip) = cin(iy, izx)
2550 END DO
2551 END DO
2552 END DO
2553!$OMP END DO
2554!$OMP DO
2555 DO ip = 0, np - 1
2556 ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2557 scount(ip) = nx*ny*nz
2558 sdispl(ip) = nx*nz*my*ip
2559 END DO
2560!$OMP END DO
2561!$OMP END PARALLEL
2562 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2563!$OMP PARALLEL DO DEFAULT(NONE), &
2564!$OMP PRIVATE(nx), &
2565!$OMP SHARED(np,boin,rcount,rdispl,nz,ny)
2566 DO ip = 0, np - 1
2567 nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
2568 rcount(ip) = nx*ny*nz
2569 rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
2570 END DO
2571!$OMP END PARALLEL DO
2572
2573 CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2574
2575 CALL timestop(handle)
2576
2577 END SUBROUTINE cube_transpose_6
2578
2579! **************************************************************************************************
2580!> \brief ...
2581! **************************************************************************************************
2582 SUBROUTINE init_fft_scratch_pool()
2583
2584 CALL release_fft_scratch_pool()
2585
2586 ! Allocate first scratch and mark it as used
2587 ALLOCATE (fft_scratch_first)
2588 ALLOCATE (fft_scratch_first%fft_scratch)
2589 ! this is a very special scratch, it seems, we always keep it 'most - recent' so we will never delete it
2590 fft_scratch_first%fft_scratch%last_tick = huge(fft_scratch_first%fft_scratch%last_tick)
2591
2593
2594 END SUBROUTINE init_fft_scratch_pool
2595
2596! **************************************************************************************************
2597!> \brief ...
2598!> \param fft_scratch ...
2599! **************************************************************************************************
2600 SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
2601 TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
2602
2603#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2604 INTEGER :: ierr
2605 COMPLEX(KIND=dp), POINTER :: dummy_ptr_z
2606#endif
2607
2608 ! deallocate structures
2609 IF (ASSOCIATED(fft_scratch%ziptr)) THEN
2610 CALL fft_dealloc(fft_scratch%ziptr)
2611 END IF
2612 IF (ASSOCIATED(fft_scratch%zoptr)) THEN
2613 CALL fft_dealloc(fft_scratch%zoptr)
2614 END IF
2615 IF (ASSOCIATED(fft_scratch%p1buf)) THEN
2616 CALL fft_dealloc(fft_scratch%p1buf)
2617 END IF
2618 IF (ASSOCIATED(fft_scratch%p2buf)) THEN
2619#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2620 dummy_ptr_z => fft_scratch%p2buf(1, 1)
2621 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2622#else
2623 CALL fft_dealloc(fft_scratch%p2buf)
2624#endif
2625 END IF
2626 IF (ASSOCIATED(fft_scratch%p3buf)) THEN
2627#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2628 dummy_ptr_z => fft_scratch%p3buf(1, 1)
2629 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2630#else
2631 CALL fft_dealloc(fft_scratch%p3buf)
2632#endif
2633 END IF
2634 IF (ASSOCIATED(fft_scratch%p4buf)) THEN
2635#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2636 dummy_ptr_z => fft_scratch%p4buf(1, 1)
2637 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2638#else
2639 CALL fft_dealloc(fft_scratch%p4buf)
2640#endif
2641 END IF
2642 IF (ASSOCIATED(fft_scratch%p5buf)) THEN
2643#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2644 dummy_ptr_z => fft_scratch%p5buf(1, 1)
2645 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2646#else
2647 CALL fft_dealloc(fft_scratch%p5buf)
2648#endif
2649 END IF
2650 IF (ASSOCIATED(fft_scratch%p6buf)) THEN
2651 CALL fft_dealloc(fft_scratch%p6buf)
2652 END IF
2653 IF (ASSOCIATED(fft_scratch%p7buf)) THEN
2654#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2655 dummy_ptr_z => fft_scratch%p7buf(1, 1)
2656 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2657#else
2658 CALL fft_dealloc(fft_scratch%p7buf)
2659#endif
2660 END IF
2661 IF (ASSOCIATED(fft_scratch%r1buf)) THEN
2662#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2663 dummy_ptr_z => fft_scratch%r1buf(1, 1)
2664 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2665#else
2666 CALL fft_dealloc(fft_scratch%r1buf)
2667#endif
2668 END IF
2669 IF (ASSOCIATED(fft_scratch%r2buf)) THEN
2670 CALL fft_dealloc(fft_scratch%r2buf)
2671 END IF
2672 IF (ASSOCIATED(fft_scratch%tbuf)) THEN
2673#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2674 dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
2675 ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2676#else
2677 CALL fft_dealloc(fft_scratch%tbuf)
2678#endif
2679 END IF
2680 IF (ASSOCIATED(fft_scratch%a1buf)) THEN
2681 CALL fft_dealloc(fft_scratch%a1buf)
2682 END IF
2683 IF (ASSOCIATED(fft_scratch%a2buf)) THEN
2684 CALL fft_dealloc(fft_scratch%a2buf)
2685 END IF
2686 IF (ASSOCIATED(fft_scratch%a3buf)) THEN
2687 CALL fft_dealloc(fft_scratch%a3buf)
2688 END IF
2689 IF (ASSOCIATED(fft_scratch%a4buf)) THEN
2690 CALL fft_dealloc(fft_scratch%a4buf)
2691 END IF
2692 IF (ASSOCIATED(fft_scratch%a5buf)) THEN
2693 CALL fft_dealloc(fft_scratch%a5buf)
2694 END IF
2695 IF (ASSOCIATED(fft_scratch%a6buf)) THEN
2696 CALL fft_dealloc(fft_scratch%a6buf)
2697 END IF
2698 IF (ASSOCIATED(fft_scratch%scount)) THEN
2699 DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
2700 fft_scratch%sdispl, fft_scratch%rdispl)
2701 END IF
2702 IF (ASSOCIATED(fft_scratch%rr)) THEN
2703 DEALLOCATE (fft_scratch%rr)
2704 END IF
2705 IF (ASSOCIATED(fft_scratch%xzbuf)) THEN
2706 DEALLOCATE (fft_scratch%xzbuf)
2707 END IF
2708 IF (ASSOCIATED(fft_scratch%yzbuf)) THEN
2709 DEALLOCATE (fft_scratch%yzbuf)
2710 END IF
2711 IF (ASSOCIATED(fft_scratch%xzbuf_sgl)) THEN
2712 DEALLOCATE (fft_scratch%xzbuf_sgl)
2713 END IF
2714 IF (ASSOCIATED(fft_scratch%yzbuf_sgl)) THEN
2715 DEALLOCATE (fft_scratch%yzbuf_sgl)
2716 END IF
2717 IF (ASSOCIATED(fft_scratch%ss)) THEN
2718 DEALLOCATE (fft_scratch%ss)
2719 END IF
2720 IF (ASSOCIATED(fft_scratch%tt)) THEN
2721 DEALLOCATE (fft_scratch%tt)
2722 END IF
2723 IF (ASSOCIATED(fft_scratch%pgrid)) THEN
2724 DEALLOCATE (fft_scratch%pgrid)
2725 END IF
2726 IF (ASSOCIATED(fft_scratch%pgcube)) THEN
2727 DEALLOCATE (fft_scratch%pgcube)
2728 END IF
2729 IF (ASSOCIATED(fft_scratch%xcor)) THEN
2730 DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
2731 END IF
2732 IF (ASSOCIATED(fft_scratch%pzcoord)) THEN
2733 DEALLOCATE (fft_scratch%pzcoord)
2734 END IF
2735 IF (ASSOCIATED(fft_scratch%xzcount)) THEN
2736 DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
2737 DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
2738 fft_scratch%in = 0
2739 fft_scratch%rsratio = 1._dp
2740 END IF
2741 IF (ASSOCIATED(fft_scratch%rbuf1)) THEN
2742 DEALLOCATE (fft_scratch%rbuf1)
2743 END IF
2744 IF (ASSOCIATED(fft_scratch%rbuf2)) THEN
2745 DEALLOCATE (fft_scratch%rbuf2)
2746 END IF
2747 IF (ASSOCIATED(fft_scratch%rbuf3)) THEN
2748 DEALLOCATE (fft_scratch%rbuf3)
2749 END IF
2750 IF (ASSOCIATED(fft_scratch%rbuf4)) THEN
2751 DEALLOCATE (fft_scratch%rbuf4)
2752 END IF
2753 IF (ASSOCIATED(fft_scratch%rbuf5)) THEN
2754 DEALLOCATE (fft_scratch%rbuf5)
2755 END IF
2756 IF (ASSOCIATED(fft_scratch%rbuf6)) THEN
2757 DEALLOCATE (fft_scratch%rbuf6)
2758 END IF
2759
2760 IF (fft_scratch%cart_sub_comm(1) /= mp_comm_null) THEN
2761 CALL fft_scratch%cart_sub_comm(1)%free()
2762 END IF
2763 IF (fft_scratch%cart_sub_comm(2) /= mp_comm_null) THEN
2764 CALL fft_scratch%cart_sub_comm(2)%free()
2765 END IF
2766
2767 CALL fft_destroy_plan(fft_scratch%fft_plan(1))
2768 CALL fft_destroy_plan(fft_scratch%fft_plan(2))
2769 CALL fft_destroy_plan(fft_scratch%fft_plan(3))
2770 CALL fft_destroy_plan(fft_scratch%fft_plan(4))
2771 CALL fft_destroy_plan(fft_scratch%fft_plan(5))
2772 CALL fft_destroy_plan(fft_scratch%fft_plan(6))
2773
2774 END SUBROUTINE deallocate_fft_scratch_type
2775
2776! **************************************************************************************************
2777!> \brief ...
2778! **************************************************************************************************
2779 SUBROUTINE release_fft_scratch_pool()
2780
2781 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch, fft_scratch_current
2782
2783 IF (init_fft_pool == 0) NULLIFY (fft_scratch_first)
2784
2785 fft_scratch => fft_scratch_first
2786 DO
2787 IF (ASSOCIATED(fft_scratch)) THEN
2788 fft_scratch_current => fft_scratch
2789 fft_scratch => fft_scratch_current%fft_scratch_next
2790 NULLIFY (fft_scratch_current%fft_scratch_next)
2791
2792 CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
2793
2794 DEALLOCATE (fft_scratch_current%fft_scratch)
2795 DEALLOCATE (fft_scratch_current)
2796 ELSE
2797 EXIT
2798 END IF
2799 END DO
2800
2801 init_fft_pool = 0
2802
2803 END SUBROUTINE release_fft_scratch_pool
2804
2805! **************************************************************************************************
2806!> \brief ...
2807! **************************************************************************************************
2808 SUBROUTINE resize_fft_scratch_pool()
2809
2810 INTEGER :: last_tick, nscratch
2811 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, fft_scratch_old
2812
2813 nscratch = 0
2814
2815 last_tick = huge(last_tick)
2816 NULLIFY (fft_scratch_old)
2817
2818 ! start at the global pool, count, and find a deletion candidate
2819 fft_scratch_current => fft_scratch_first
2820 DO
2821 IF (ASSOCIATED(fft_scratch_current)) THEN
2822 nscratch = nscratch + 1
2823 ! is this a candidate for deletion (i.e. least recently used, and not in use)
2824 IF (.NOT. fft_scratch_current%fft_scratch%in_use) THEN
2825 IF (fft_scratch_current%fft_scratch%last_tick < last_tick) THEN
2826 last_tick = fft_scratch_current%fft_scratch%last_tick
2827 fft_scratch_old => fft_scratch_current
2828 END IF
2829 END IF
2830 fft_scratch_current => fft_scratch_current%fft_scratch_next
2831 ELSE
2832 EXIT
2833 END IF
2834 END DO
2835
2836 ! we should delete a scratch
2837 IF (nscratch > fft_pool_scratch_limit) THEN
2838 ! note that we never deallocate the first (special) element of the list
2839 IF (ASSOCIATED(fft_scratch_old)) THEN
2840 fft_scratch_current => fft_scratch_first
2841 DO
2842 IF (ASSOCIATED(fft_scratch_current)) THEN
2843 ! should we delete the next in the list?
2844 IF (ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old)) THEN
2845 ! fix the linked list
2846 fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
2847
2848 ! deallocate the element
2849 CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
2850 DEALLOCATE (fft_scratch_old%fft_scratch)
2851 DEALLOCATE (fft_scratch_old)
2852
2853 ELSE
2854 fft_scratch_current => fft_scratch_current%fft_scratch_next
2855 END IF
2856 ELSE
2857 EXIT
2858 END IF
2859 END DO
2860
2861 ELSE
2862 cpwarn("The number of the scratches exceeded the limit, but none could be deallocated")
2863 END IF
2864 END IF
2865
2866 END SUBROUTINE resize_fft_scratch_pool
2867
2868! **************************************************************************************************
2869!> \brief ...
2870!> \param fft_scratch ...
2871!> \param tf_type ...
2872!> \param n ...
2873!> \param fft_sizes ...
2874! **************************************************************************************************
2875 SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
2876 TYPE(fft_scratch_type), POINTER :: fft_scratch
2877 INTEGER, INTENT(IN) :: tf_type
2878 INTEGER, DIMENSION(:), INTENT(IN) :: n
2879 TYPE(fft_scratch_sizes), INTENT(IN), &
2880 OPTIONAL :: fft_sizes
2881
2882 CHARACTER(len=*), PARAMETER :: routinen = 'get_fft_scratch'
2883
2884 INTEGER :: coord(2), dim(2), handle, i, ix, iz, lg, lmax, m1, m2, &
2885 mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
2886 nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
2887 INTEGER, DIMENSION(3) :: pcoord
2888 LOGICAL :: equal
2889 LOGICAL, DIMENSION(2) :: dims
2890 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, &
2891 fft_scratch_last, &
2892 fft_scratch_new
2893#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2894 INTEGER :: ierr
2895 INTEGER(KIND=C_SIZE_T) :: length
2896 TYPE(c_ptr) :: cptr_r1buf, cptr_tbuf, &
2897 cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
2898#endif
2899 CALL timeset(routinen, handle)
2900
2901 ! this is the place to check that the scratch_pool does not grow without limits
2902 ! before we add a new scratch check the size of the pool and release some of the list if needed
2903 CALL resize_fft_scratch_pool()
2904
2905 ! get the required scratch
2907 fft_scratch_current => fft_scratch_first
2908 DO
2909 IF (ASSOCIATED(fft_scratch_current)) THEN
2910 IF (fft_scratch_current%fft_scratch%in_use) THEN
2911 fft_scratch_last => fft_scratch_current
2912 fft_scratch_current => fft_scratch_current%fft_scratch_next
2913 cycle
2914 END IF
2915 IF (tf_type /= fft_scratch_current%fft_scratch%tf_type) THEN
2916 fft_scratch_last => fft_scratch_current
2917 fft_scratch_current => fft_scratch_current%fft_scratch_next
2918 cycle
2919 END IF
2920 IF (.NOT. all(n == fft_scratch_current%fft_scratch%nfft)) THEN
2921 fft_scratch_last => fft_scratch_current
2922 fft_scratch_current => fft_scratch_current%fft_scratch_next
2923 cycle
2924 END IF
2925 IF (PRESENT(fft_sizes)) THEN
2926 IF (fft_sizes%gs_group /= fft_scratch_current%fft_scratch%group) THEN
2927 fft_scratch_last => fft_scratch_current
2928 fft_scratch_current => fft_scratch_current%fft_scratch_next
2929 cycle
2930 END IF
2931 CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
2932 IF (.NOT. equal) THEN
2933 fft_scratch_last => fft_scratch_current
2934 fft_scratch_current => fft_scratch_current%fft_scratch_next
2935 cycle
2936 END IF
2937 END IF
2938 ! Success
2939 fft_scratch => fft_scratch_current%fft_scratch
2940 fft_scratch_current%fft_scratch%in_use = .true.
2941 EXIT
2942 ELSE
2943 ! We cannot find the scratch type in this pool
2944 ! Generate a new scratch set
2945 ALLOCATE (fft_scratch_new)
2946 ALLOCATE (fft_scratch_new%fft_scratch)
2947
2948 IF (tf_type .NE. 400) THEN
2949 fft_scratch_new%fft_scratch%sizes = fft_sizes
2950 np = fft_sizes%numtask
2951 ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
2952 fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
2953 fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
2954 END IF
2955
2956 SELECT CASE (tf_type)
2957 CASE DEFAULT
2958 cpabort("")
2959 CASE (100) ! fft3d_pb: full cube distribution
2960 mx1 = fft_sizes%mx1
2961 my1 = fft_sizes%my1
2962 mx2 = fft_sizes%mx2
2963 mz2 = fft_sizes%mz2
2964 my3 = fft_sizes%my3
2965 mz3 = fft_sizes%mz3
2966 CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
2967 CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
2968 CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
2969 CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
2970 CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
2971 CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
2972 fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
2973
2974 dim = fft_sizes%rs_group%num_pe_cart
2975 pos = fft_sizes%rs_group%mepos_cart
2976 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
2977 fft_scratch_new%fft_scratch%dim = dim
2978 fft_scratch_new%fft_scratch%pos = pos
2979 mcz1 = fft_sizes%mcz1
2980 mcx2 = fft_sizes%mcx2
2981 mcz2 = fft_sizes%mcz2
2982 mcy3 = fft_sizes%mcy3
2983 ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
2984 ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
2985 ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:dim(1) - 1))
2986 ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:dim(1) - 1))
2987
2988 dims = (/.true., .false./)
2989 CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
2990 dims = (/.false., .true./)
2991 CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
2992
2993 !initialise pgcube
2994 DO i = 0, dim(1) - 1
2995 coord = (/i, pos(2)/)
2996 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
2997 END DO
2998 DO i = 0, dim(2) - 1
2999 coord = (/pos(1), i/)
3000 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3001 END DO
3002
3003 !set up fft plans
3004 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., n(3), mx1*my1, &
3005 fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf, fft_plan_style)
3006 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., n(2), mx2*mz2, &
3007 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3008 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), my3*mz3, &
3009 fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3010 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), my3*mz3, &
3011 fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3012 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., n(2), mx2*mz2, &
3013 fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3014 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., n(3), mx1*my1, &
3015 fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3016
3017 CASE (101) ! fft3d_pb: full cube distribution (dim 1)
3018 mx1 = fft_sizes%mx1
3019 my1 = fft_sizes%my1
3020 mz1 = fft_sizes%mz1
3021 my3 = fft_sizes%my3
3022 mz3 = fft_sizes%mz3
3023 CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
3024 CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
3025 fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3026 CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
3027 CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
3028 CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
3029 CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
3030
3031 dim = fft_sizes%rs_group%num_pe_cart
3032 pos = fft_sizes%rs_group%mepos_cart
3033 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3034 fft_scratch_new%fft_scratch%dim = dim
3035 fft_scratch_new%fft_scratch%pos = pos
3036 mcy3 = fft_sizes%mcy3
3037 ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:dim(1) - 1))
3038 ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:dim(1) - 1))
3039
3040 !set up fft plans
3041 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., n(3), mx1*my1, &
3042 fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3043 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., n(2), mx1*mz1, &
3044 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3045 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), my3*mz3, &
3046 fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3047 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), my3*mz3, &
3048 fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3049 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., n(2), mx1*mz1, &
3050 fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3051 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., n(3), mx1*my1, &
3052 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3053
3054 CASE (200) ! fft3d_ps: plane distribution
3055 nx = fft_sizes%nx
3056 ny = fft_sizes%ny
3057 nz = fft_sizes%nz
3058 mx2 = fft_sizes%mx2
3059 lmax = fft_sizes%lmax
3060 mmax = fft_sizes%mmax
3061 lg = fft_sizes%lg
3062 mg = fft_sizes%mg
3063 np = fft_sizes%numtask
3064 nmray = fft_sizes%nmray
3065 nyzray = fft_sizes%nyzray
3066#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3067 length = int(2*dp_size*max(mmax, 1)*max(lmax, 1), kind=c_size_t)
3068 ierr = offload_malloc_pinned_mem(cptr_r1buf, length)
3069 cpassert(ierr == 0)
3070 CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/max(mmax, 1), max(lmax, 1)/))
3071 length = int(2*dp_size*max(ny, 1)*max(nz, 1)*max(nx, 1), kind=c_size_t)
3072 ierr = offload_malloc_pinned_mem(cptr_tbuf, length)
3073 cpassert(ierr == 0)
3074 CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/max(ny, 1), max(nz, 1), max(nx, 1)/))
3075#else
3076 CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
3077 CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
3078#endif
3079 fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3080 CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
3081 nm = nmray*mx2
3082 IF (alltoall_sgl) THEN
3083 ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
3084 ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
3085 ELSE
3086 ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
3087 END IF
3088
3089 !set up fft plans
3090 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., nz, nx*ny, &
3091 fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3092 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., ny, nx*nz, &
3093 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3094 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), nyzray, &
3095 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf, fft_plan_style)
3096 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), nyzray, &
3097 fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3098 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., ny, nx*nz, &
3099 fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3100 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., nz, nx*ny, &
3101 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3102
3103 CASE (300) ! fft3d_ps: block distribution
3104 mx1 = fft_sizes%mx1
3105 mx2 = fft_sizes%mx2
3106 my1 = fft_sizes%my1
3107 mz2 = fft_sizes%mz2
3108 mcx2 = fft_sizes%mcx2
3109 lg = fft_sizes%lg
3110 mg = fft_sizes%mg
3111 nmax = fft_sizes%nmax
3112 nmray = fft_sizes%nmray
3113 nyzray = fft_sizes%nyzray
3114 m1 = fft_sizes%r_dim(1)
3115 m2 = fft_sizes%r_dim(2)
3116 nbx = fft_sizes%nbx
3117 nbz = fft_sizes%nbz
3118 CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
3119 CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
3120#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3121 length = int(2*dp_size*max(n(3), 1)*max(mx1*my1, 1), kind=c_size_t)
3122 ierr = offload_malloc_pinned_mem(cptr_p2buf, length)
3123 cpassert(ierr == 0)
3124 CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/max(n(3), 1), max(mx1*my1, 1)/))
3125 length = int(2*dp_size*max(mx2*mz2, 1)*max(n(2), 1), kind=c_size_t)
3126 ierr = offload_malloc_pinned_mem(cptr_p3buf, length)
3127 cpassert(ierr == 0)
3128 CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/max(mx2*mz2, 1), max(n(2), 1)/))
3129 length = int(2*dp_size*max(n(2), 1)*max(mx2*mz2, 1), kind=c_size_t)
3130 ierr = offload_malloc_pinned_mem(cptr_p4buf, length)
3131 cpassert(ierr == 0)
3132 CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/max(n(2), 1), max(mx2*mz2, 1)/))
3133 length = int(2*dp_size*max(nyzray, 1)*max(n(1), 1), kind=c_size_t)
3134 ierr = offload_malloc_pinned_mem(cptr_p5buf, length)
3135 cpassert(ierr == 0)
3136 CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/max(nyzray, 1), max(n(1), 1)/))
3137 length = int(2*dp_size*max(mg, 1)*max(lg, 1), kind=c_size_t)
3138 ierr = offload_malloc_pinned_mem(cptr_p7buf, length)
3139 cpassert(ierr == 0)
3140 CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/max(mg, 1), max(lg, 1)/))
3141#else
3142 CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
3143 CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
3144 CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
3145 CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
3146 CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
3147#endif
3148 IF (alltoall_sgl) THEN
3149 ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
3150 ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
3151 ELSE
3152 ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
3153 ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
3154 END IF
3155 ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
3156 ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
3157 ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
3158 ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
3159 ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
3160 fft_scratch_new%fft_scratch%yzcount(0:np - 1))
3161 ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
3162 fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
3163 fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3164
3165 dim = fft_sizes%rs_group%num_pe_cart
3166 pos = fft_sizes%rs_group%mepos_cart
3167 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3168 fft_scratch_new%fft_scratch%dim = dim
3169 fft_scratch_new%fft_scratch%pos = pos
3170 mcz1 = fft_sizes%mcz1
3171 mcz2 = fft_sizes%mcz2
3172 ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
3173 ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
3174
3175 dims = (/.false., .true./)
3176 CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
3177
3178 !initialise pgcube
3179 DO i = 0, dim(2) - 1
3180 coord = (/pos(1), i/)
3181 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3182 END DO
3183
3184 !initialise pgrid
3185 DO ix = 0, m1 - 1
3186 DO iz = 0, m2 - 1
3187 coord = (/ix, iz/)
3188 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
3189 END DO
3190 END DO
3191
3192 !initialise pzcoord
3193 DO i = 0, np - 1
3194 CALL fft_sizes%rs_group%coords(i, pcoord)
3195 fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
3196 END DO
3197
3198 !set up fft plans
3199 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., n(3), mx1*my1, &
3200 fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf, fft_plan_style)
3201 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., n(2), mx2*mz2, &
3202 fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf, fft_plan_style)
3203 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), nyzray, &
3204 fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf, fft_plan_style)
3205 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), nyzray, &
3206 fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf, fft_plan_style)
3207 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., n(2), mx2*mz2, &
3208 fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf, fft_plan_style)
3209 CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., n(3), mx1*my1, &
3210 fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf, fft_plan_style)
3211
3212 CASE (400) ! serial FFT
3213 np = 0
3214 CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
3215 CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
3216
3217 !in place plans
3218 CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, .true., fwfft, n, &
3219 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3220 CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, .true., bwfft, n, &
3221 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3222 ! out of place plans
3223 CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, .false., fwfft, n, &
3224 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3225 CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, .false., bwfft, n, &
3226 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3227
3228 END SELECT
3229
3230 NULLIFY (fft_scratch_new%fft_scratch_next)
3231 fft_scratch_new%fft_scratch%fft_scratch_id = &
3232 fft_scratch_last%fft_scratch%fft_scratch_id + 1
3233 fft_scratch_new%fft_scratch%in_use = .true.
3234 fft_scratch_new%fft_scratch%nfft = n
3235 fft_scratch_last%fft_scratch_next => fft_scratch_new
3236 fft_scratch_new%fft_scratch%tf_type = tf_type
3237 fft_scratch => fft_scratch_new%fft_scratch
3238 EXIT
3239
3240 END IF
3241 END DO
3242
3243 fft_scratch%last_tick = tick_fft_pool
3244
3245 CALL timestop(handle)
3246
3247 END SUBROUTINE get_fft_scratch
3248
3249! **************************************************************************************************
3250!> \brief ...
3251!> \param fft_scratch ...
3252! **************************************************************************************************
3253 SUBROUTINE release_fft_scratch(fft_scratch)
3254
3255 TYPE(fft_scratch_type), POINTER :: fft_scratch
3256
3257 INTEGER :: scratch_id
3258 TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current
3259
3260 scratch_id = fft_scratch%fft_scratch_id
3261
3262 fft_scratch_current => fft_scratch_first
3263 DO
3264 IF (ASSOCIATED(fft_scratch_current)) THEN
3265 IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id) THEN
3266 fft_scratch%in_use = .false.
3267 NULLIFY (fft_scratch)
3268 EXIT
3269 END IF
3270 fft_scratch_current => fft_scratch_current%fft_scratch_next
3271 ELSE
3272 ! We cannot find the scratch type in this pool
3273 cpabort("")
3274 EXIT
3275 END IF
3276 END DO
3277
3278 END SUBROUTINE release_fft_scratch
3279
3280! **************************************************************************************************
3281!> \brief ...
3282!> \param rs ...
3283!> \param scount ...
3284!> \param sdispl ...
3285!> \param rq ...
3286!> \param rcount ...
3287!> \param rdispl ...
3288!> \param group ...
3289! **************************************************************************************************
3290 SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
3291 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rs
3292 INTEGER, DIMENSION(:), POINTER :: scount, sdispl
3293 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rq
3294 INTEGER, DIMENSION(:), POINTER :: rcount, rdispl
3295
3296 CLASS(mp_comm_type), INTENT(IN) :: group
3297
3298 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: msgin, msgout
3299 INTEGER :: ip, n, nr, ns, pos
3300 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: rreq, sreq
3301
3302 CALL group%sync()
3303 n = group%num_pe
3304 pos = group%mepos
3305 ALLOCATE (sreq(0:n - 1))
3306 ALLOCATE (rreq(0:n - 1))
3307 nr = 0
3308 DO ip = 0, n - 1
3309 IF (rcount(ip) == 0) cycle
3310 IF (ip == pos) cycle
3311 msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
3312 CALL group%irecv(msgout, ip, rreq(nr))
3313 nr = nr + 1
3314 END DO
3315 ns = 0
3316 DO ip = 0, n - 1
3317 IF (scount(ip) == 0) cycle
3318 IF (ip == pos) cycle
3319 msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
3320 CALL group%isend(msgin, ip, sreq(ns))
3321 ns = ns + 1
3322 END DO
3323 IF (rcount(pos) /= 0) THEN
3324 IF (rcount(pos) /= scount(pos)) cpabort("")
3325 rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
3326 END IF
3327 CALL mp_waitall(sreq(0:ns - 1))
3328 CALL mp_waitall(rreq(0:nr - 1))
3329 DEALLOCATE (sreq)
3330 DEALLOCATE (rreq)
3331 CALL group%sync()
3332
3333 END SUBROUTINE sparse_alltoall
3334
3335! **************************************************************************************************
3336!> \brief test data structures for equality. It is assumed that if they are
3337!> different for one mpi task they are different for all (??)
3338!> \param fft_size_1 ...
3339!> \param fft_size_2 ...
3340!> \param equal ...
3341! **************************************************************************************************
3342 SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
3343 TYPE(fft_scratch_sizes) :: fft_size_1, fft_size_2
3344 LOGICAL :: equal
3345
3346 equal = .true.
3347
3348 equal = equal .AND. fft_size_1%nx == fft_size_2%nx
3349 equal = equal .AND. fft_size_1%ny == fft_size_2%ny
3350 equal = equal .AND. fft_size_1%nz == fft_size_2%nz
3351
3352 equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
3353 equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
3354 equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
3355
3356 equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
3357 equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
3358 equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
3359
3360 equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
3361 equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
3362 equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
3363
3364 equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
3365 equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
3366 equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
3367 equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
3368
3369 equal = equal .AND. fft_size_1%lg == fft_size_2%lg
3370 equal = equal .AND. fft_size_1%mg == fft_size_2%mg
3371
3372 equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
3373 equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
3374
3375 equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
3376 equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
3377
3378 equal = equal .AND. fft_size_1%gs_group == fft_size_2%gs_group
3379 equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
3380
3381 equal = equal .AND. all(fft_size_1%g_pos == fft_size_2%g_pos)
3382 equal = equal .AND. all(fft_size_1%r_pos == fft_size_2%r_pos)
3383 equal = equal .AND. all(fft_size_1%r_dim == fft_size_2%r_dim)
3384
3385 equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
3386
3387 END SUBROUTINE is_equal
3388
3389END MODULE fft_tools
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
subroutine, public fft_do_init(fft_type, wisdom_file)
...
Definition fft_lib.F:63
subroutine, public fft_3d(plan, scale, zin, zout, stat)
...
Definition fft_lib.F:172
subroutine, public fft_create_plan_1dm(plan, fft_type, fsign, trans, n, m, zin, zout, plan_style)
...
Definition fft_lib.F:212
subroutine, public fft_destroy_plan(plan)
...
Definition fft_lib.F:241
integer function, public fft_library(fftlib)
Interface to FFT libraries.
Definition fft_lib.F:42
subroutine, public fft_1dm(plan, zin, zout, scale, stat)
...
Definition fft_lib.F:261
subroutine, public fft_get_lengths(fft_type, data, max_length)
...
Definition fft_lib.F:106
subroutine, public fft_create_plan_3d(plan, fft_type, fft_in_place, fsign, n, zin, zout, plan_style)
...
Definition fft_lib.F:136
subroutine, public fft_do_cleanup(fft_type, wisdom_file, ionode)
...
Definition fft_lib.F:84
Type to store data about a (1D or 3D) FFT, including FFTW plan.
Definition fft_plan.F:18
subroutine, public fft_radix_operations(radix_in, radix_out, operation)
Determine the allowed lengths of FFT's '''.
Definition fft_tools.F:239
integer, save init_fft_pool
Definition fft_tools.F:129
subroutine, public yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
...
Definition fft_tools.F:1497
integer, parameter, public bwfft
Definition fft_tools.F:145
subroutine, public yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
...
Definition fft_tools.F:1613
integer, save, public fft_plan_style
Definition fft_tools.F:156
subroutine, public cube_transpose_1(cin, boin, boout, sout, fft_scratch)
...
Definition fft_tools.F:2017
integer, parameter, public fft_radix_closest
Definition fft_tools.F:146
subroutine, public fft_fw1d(n, m, trans, zin, zout, scale, stat)
Performs m 1-D forward FFT-s of size n.
Definition fft_tools.F:331
subroutine, public release_fft_scratch(fft_scratch)
...
Definition fft_tools.F:3254
subroutine, public x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
...
Definition fft_tools.F:1382
integer, save tick_fft_pool
Definition fft_tools.F:131
subroutine, public init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, plan_style)
...
Definition fft_tools.F:185
subroutine, public get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
...
Definition fft_tools.F:2876
integer, parameter, public fwfft
Definition fft_tools.F:145
integer, save, public fft_type
Definition fft_tools.F:153
integer, parameter, public fft_radix_next_odd
Definition fft_tools.F:148
subroutine, public xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
...
Definition fft_tools.F:1823
subroutine, public cube_transpose_2(cin, boin, boout, sout, fft_scratch)
...
Definition fft_tools.F:2107
type(fft_scratch_pool_type), pointer, save fft_scratch_first
Definition fft_tools.F:134
integer, save fft_pool_scratch_limit
Definition fft_tools.F:133
integer, parameter, public fft_radix_next
Definition fft_tools.F:146
subroutine, public finalize_fft(para_env, wisdom_file)
does whatever is needed to finalize the current fft setup
Definition fft_tools.F:216
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp_size
Definition kinds.F:36
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Definition of mathematical constants and functions.
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
type(mp_comm_type), parameter, public mp_comm_null
Fortran API for the offload package, which is written in C.
Definition offload_api.F:12
integer function, public offload_free_pinned_mem(buffer)
free pinned memory
Definition offload_api.F:75
integer function, public offload_malloc_pinned_mem(buffer, length)
allocate pinned memory.
Definition offload_api.F:52