(git:b195825)
fft_tools.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \par History
10 !> JGH (30-Nov-2000): ESSL FFT Library added
11 !> JGH (05-Jan-2001): Added SGI library FFT
12 !> JGH (14-Jan-2001): Added parallel 3d FFT
13 !> JGH (10-Feb-2006): New interface type
14 !> JGH (31-Mar-2008): Remove local allocates and reshapes (performance)
15 !> Possible problems can be related with setting arrays
16 !> not to zero
17 !> Some interfaces could be further simplified by avoiding
18 !> an initial copy. However, this assumes contiguous arrays
19 !> IAB (15-Oct-2008): Moved mp_cart_sub calls out of cube_tranpose_* and into
20 !> fft_scratch type, reducing number of calls dramatically
21 !> IAB (05-Dec-2008): Moved all other non-essential MPI calls into scratch type
22 !> IAB (09-Jan-2009): Added fft_plan_type to store FFT data, including cached FFTW plans
23 !> IAB (13-Feb-2009): Extended plan caching to serial 3D FFT (fft3d_s)
24 !> IAB (09-Oct-2009): Added OpenMP directives to parallel 3D FFT
25 !> (c) The Numerical Algorithms Group (NAG) Ltd, 2008-2009 on behalf of the HECToR project
26 !> \author JGH
27 ! **************************************************************************************************
28 MODULE fft_tools
29  USE iso_c_binding, ONLY: c_f_pointer,&
30  c_loc,&
31  c_ptr,&
32  c_size_t
34  USE fft_lib, ONLY: &
35  fft_1dm, fft_3d, fft_alloc, fft_create_plan_1dm, fft_create_plan_3d, fft_dealloc, &
37  USE fft_plan, ONLY: fft_plan_type
38  USE kinds, ONLY: dp,&
39  dp_size,&
40  sp
41  USE mathconstants, ONLY: z_zero
42  USE message_passing, ONLY: mp_cart_type,&
43  mp_comm_null,&
44  mp_comm_type,&
45  mp_request_type,&
46  mp_waitall
49 
50 !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
51 
52 #include "../base/base_uses.f90"
53 
54  IMPLICIT NONE
55 
56  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'fft_tools'
57 
58  ! Types for the pool of scratch data needed in FFT routines
59  ! keep the subroutine "is_equal" up-to-date
60  ! needs a default initialization
62  INTEGER :: nx = 0, ny = 0, nz = 0
63  INTEGER :: lmax = 0, mmax = 0, nmax = 0
64  INTEGER :: mx1 = 0, mx2 = 0, mx3 = 0
65  INTEGER :: my1 = 0, my2 = 0, my3 = 0
66  INTEGER :: mz1 = 0, mz2 = 0, mz3 = 0
67  INTEGER :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
68  INTEGER :: lg = 0, mg = 0
69  INTEGER :: nbx = 0, nbz = 0
70  INTEGER :: nmray = 0, nyzray = 0
71  TYPE(mp_comm_type) :: gs_group = mp_comm_type()
72  TYPE(mp_cart_type) :: rs_group = mp_cart_type()
73  INTEGER, DIMENSION(2) :: g_pos = 0, r_pos = 0, r_dim = 0
74  INTEGER :: numtask = 0
75  END TYPE fft_scratch_sizes
76 
78  INTEGER :: fft_scratch_id = 0
79  INTEGER :: tf_type = -1
80  LOGICAL :: in_use = .true.
81  TYPE(mp_comm_type) :: group = mp_comm_type()
82  INTEGER, DIMENSION(3) :: nfft = -1
83  ! to be used in cube_transpose_* routines
84  TYPE(mp_cart_type), DIMENSION(2) :: cart_sub_comm = mp_cart_type()
85  INTEGER, DIMENSION(2) :: dim = -1, pos = -1
86  ! to be used in fft3d_s
87  COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
88  :: ziptr => null(), zoptr => null()
89  ! to be used in fft3d_ps : block distribution
90  COMPLEX(KIND=dp), DIMENSION(:, :), CONTIGUOUS, POINTER &
91  :: p1buf => null(), p2buf => null(), p3buf => null(), p4buf => null(), &
92  p5buf => null(), p6buf => null(), p7buf => null()
93  ! to be used in fft3d_ps : plane distribution
94  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
95  :: r1buf => null(), r2buf => null()
96  COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
97  :: tbuf => null()
98  ! to be used in fft3d_pb
99  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
100  :: a1buf => null(), a2buf => null(), a3buf => null(), &
101  a4buf => null(), a5buf => null(), a6buf => null()
102  ! to be used in communication routines
103  INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: scount => null(), rcount => null(), sdispl => null(), rdispl => null()
104  INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgcube => null()
105  INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: xzcount => null(), yzcount => null(), xzdispl => null(), yzdispl => null()
106  INTEGER :: in = 0, mip = -1
107  REAL(kind=dp) :: rsratio = 1.0_dp
108  COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS &
109  :: xzbuf => null(), yzbuf => null()
110  COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS &
111  :: xzbuf_sgl => null(), yzbuf_sgl => null()
112  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
113  :: rbuf1 => null(), rbuf2 => null(), rbuf3 => null(), rbuf4 => null(), &
114  rbuf5 => null(), rbuf6 => null(), rr => null()
115  COMPLEX(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS &
116  :: ss => null(), tt => null()
117  INTEGER, DIMENSION(:, :), POINTER, CONTIGUOUS :: pgrid => null()
118  INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: xcor => null(), zcor => null(), pzcoord => null()
120  TYPE(fft_plan_type), DIMENSION(6) :: fft_plan = fft_plan_type()
121  INTEGER :: last_tick = -1
122  END TYPE fft_scratch_type
123 
125  TYPE(fft_scratch_type), POINTER :: fft_scratch => null()
126  TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_next => null()
127  END TYPE fft_scratch_pool_type
128 
129  INTEGER, SAVE :: init_fft_pool = 0
130  ! the clock for fft pool. Allows to identify the least recently used scratch
131  INTEGER, SAVE :: tick_fft_pool = 0
132  ! limit the number of scratch pools to fft_pool_scratch_limit.
133  INTEGER, SAVE :: fft_pool_scratch_limit = 15
135  ! END of types for the pool of scratch data needed in FFT routines
136 
137  PRIVATE
138  PUBLIC :: init_fft, fft3d, finalize_fft
139  PUBLIC :: fft_alloc, fft_dealloc
140  PUBLIC :: fft_radix_operations, fft_fw1d
141  PUBLIC :: fwfft, bwfft
143  PUBLIC :: fft_radix_next_odd
144 
145  INTEGER, PARAMETER :: fwfft = +1, bwfft = -1
146  INTEGER, PARAMETER :: fft_radix_closest = 493, fft_radix_next = 494
147  INTEGER, PARAMETER :: fft_radix_allowed = 495, fft_radix_disallowed = 496
148  INTEGER, PARAMETER :: fft_radix_next_odd = 497
149 
150  REAL(kind=dp), PARAMETER :: ratio_sparse_alltoall = 0.5_dp
151 
152  ! these saved variables are FFT globals
153  INTEGER, SAVE :: fft_type = 0
154  LOGICAL, SAVE :: alltoall_sgl = .false.
155  LOGICAL, SAVE :: use_fftsg_sizes = .true.
156  INTEGER, SAVE :: fft_plan_style = 1
157 
158  ! these are only needed for pw_gpu (-D__OFFLOAD)
161  PUBLIC :: yz_to_x, x_to_yz, xz_to_yz, yz_to_xz
163  PUBLIC :: fft_type, fft_plan_style
164 
165  INTERFACE fft3d
166  MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
167  END INTERFACE
168 
169 ! **************************************************************************************************
170 
171 CONTAINS
172 
173 ! **************************************************************************************************
174 !> \brief ...
175 !> \param fftlib ...
176 !> \param alltoall ...
177 !> \param fftsg_sizes ...
178 !> \param pool_limit ...
179 !> \param wisdom_file ...
180 !> \param plan_style ...
181 !> \author JGH
182 ! **************************************************************************************************
183  SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
184  plan_style)
185 
186  CHARACTER(LEN=*), INTENT(IN) :: fftlib
187  LOGICAL, INTENT(IN) :: alltoall, fftsg_sizes
188  INTEGER, INTENT(IN) :: pool_limit
189  CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
190  INTEGER, INTENT(IN) :: plan_style
191 
192  use_fftsg_sizes = fftsg_sizes
193  alltoall_sgl = alltoall
194  fft_pool_scratch_limit = pool_limit
195  fft_type = fft_library(fftlib)
196  fft_plan_style = plan_style
197 
198  IF (fft_type <= 0) cpabort("Unknown FFT library: "//trim(fftlib))
199 
200  CALL fft_do_init(fft_type, wisdom_file)
201 
202  ! setup the FFT scratch pool, if one is associated, clear first
203  CALL release_fft_scratch_pool()
204  CALL init_fft_scratch_pool()
205 
206  END SUBROUTINE init_fft
207 
208 ! **************************************************************************************************
209 !> \brief does whatever is needed to finalize the current fft setup
210 !> \param para_env ...
211 !> \param wisdom_file ...
212 !> \par History
213 !> 10.2007 created [Joost VandeVondele]
214 ! **************************************************************************************************
215  SUBROUTINE finalize_fft(para_env, wisdom_file)
216  CLASS(mp_comm_type) :: para_env
217  CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
218 
219 ! release the FFT scratch pool
220 
221  CALL release_fft_scratch_pool()
222 
223  ! finalize fft libs
224 
225  CALL fft_do_cleanup(fft_type, wisdom_file, para_env%is_source())
226 
227  END SUBROUTINE finalize_fft
228 
229 ! **************************************************************************************************
230 !> \brief Determine the allowed lengths of FFT's '''
231 !> \param radix_in ...
232 !> \param radix_out ...
233 !> \param operation ...
234 !> \par History
235 !> new library structure (JGH)
236 !> \author Ari Seitsonen
237 ! **************************************************************************************************
238  SUBROUTINE fft_radix_operations(radix_in, radix_out, operation)
239 
240  INTEGER, INTENT(IN) :: radix_in
241  INTEGER, INTENT(OUT) :: radix_out
242  INTEGER, INTENT(IN) :: operation
243 
244  INTEGER, PARAMETER :: fft_type_sg = 1
245 
246  INTEGER :: i, iloc, ldata
247  INTEGER, ALLOCATABLE, DIMENSION(:) :: data
248 
249  ldata = 1024
250  ALLOCATE (DATA(ldata))
251  DATA = -1
252 
253  ! if the user wants to use fftsg sizes go for it
254  IF (use_fftsg_sizes) THEN
255  CALL fft_get_lengths(fft_type_sg, DATA, ldata)
256  ELSE
257  CALL fft_get_lengths(fft_type, DATA, ldata)
258  END IF
259 
260  iloc = 0
261  DO i = 1, ldata
262  IF (DATA(i) == radix_in) THEN
263  iloc = i
264  EXIT
265  ELSE
266  IF (operation == fft_radix_allowed) THEN
267  cycle
268  ELSE IF (DATA(i) > radix_in) THEN
269  iloc = i
270  EXIT
271  END IF
272  END IF
273  END DO
274 
275  IF (iloc == 0) THEN
276  cpabort("Index to radix array not found.")
277  END IF
278 
279  IF (operation == fft_radix_allowed) THEN
280  IF (DATA(iloc) == radix_in) THEN
281  radix_out = fft_radix_allowed
282  ELSE
283  radix_out = fft_radix_disallowed
284  END IF
285 
286  ELSE IF (operation == fft_radix_closest) THEN
287  IF (DATA(iloc) == radix_in) THEN
288  radix_out = DATA(iloc)
289  ELSE
290  IF (abs(DATA(iloc - 1) - radix_in) <= &
291  abs(DATA(iloc) - radix_in)) THEN
292  radix_out = DATA(iloc - 1)
293  ELSE
294  radix_out = DATA(iloc)
295  END IF
296  END IF
297 
298  ELSE IF (operation == fft_radix_next) THEN
299  radix_out = DATA(iloc)
300 
301  ELSE IF (operation == fft_radix_next_odd) THEN
302  DO i = iloc, ldata
303  IF (mod(DATA(i), 2) == 1) THEN
304  radix_out = DATA(i)
305  EXIT
306  END IF
307  END DO
308  IF (mod(radix_out, 2) == 0) THEN
309  cpabort("No odd radix found.")
310  END IF
311 
312  ELSE
313  cpabort("Disallowed radix operation.")
314  END IF
315 
316  DEALLOCATE (data)
317 
318  END SUBROUTINE fft_radix_operations
319 
320 ! **************************************************************************************************
321 !> \brief Performs m 1-D forward FFT-s of size n.
322 !> \param n size of the FFT
323 !> \param m number of FFT-s
324 !> \param trans shape of input and output arrays: [n x m] if it is FALSE, [m x n] if it is TRUE
325 !> \param zin input array
326 !> \param zout output array
327 !> \param scale scaling factor
328 !> \param stat status of the operation, non-zero code indicates an error
329 ! **************************************************************************************************
330  SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
331  INTEGER, INTENT(in) :: n, m
332  LOGICAL, INTENT(in) :: trans
333  COMPLEX(kind=dp), DIMENSION(*), INTENT(inout) :: zin, zout
334  REAL(kind=dp), INTENT(in) :: scale
335  INTEGER, INTENT(out) :: stat
336 
337  CHARACTER(len=*), PARAMETER :: routinen = 'fft_fw1d'
338 
339  INTEGER :: handle
340  TYPE(fft_plan_type) :: fft_plan
341 
342  CALL timeset(routinen, handle)
343 
344  IF (fft_type == 3) THEN
345  CALL fft_create_plan_1dm(fft_plan, fft_type, fwfft, trans, n, m, zin, zout, fft_plan_style)
346  CALL fft_1dm(fft_plan, zin, zout, scale, stat)
348  ELSE
349  CALL cp_warn(__location__, &
350  "FFT library in use cannot handle transformation of an arbitrary length.")
351  stat = 1
352  END IF
353 
354  CALL timestop(handle)
355  END SUBROUTINE fft_fw1d
356 
357 ! **************************************************************************************************
358 !> \brief Calls the 3D-FFT function from the initialized library
359 !> \param fsign ...
360 !> \param n ...
361 !> \param zin ...
362 !> \param zout ...
363 !> \param scale ...
364 !> \param status ...
365 !> \param debug ...
366 !> \par History
367 !> none
368 !> \author JGH
369 ! **************************************************************************************************
370  SUBROUTINE fft3d_s(fsign, n, zin, zout, scale, status, debug)
371 
372  INTEGER, INTENT(IN) :: fsign
373  INTEGER, DIMENSION(:), INTENT(INOUT) :: n
374  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
375  INTENT(INOUT) :: zin
376  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
377  INTENT(INOUT), OPTIONAL, TARGET :: zout
378  REAL(kind=dp), INTENT(IN), OPTIONAL :: scale
379  INTEGER, INTENT(OUT), OPTIONAL :: status
380  LOGICAL, INTENT(IN), OPTIONAL :: debug
381 
382  CHARACTER(len=*), PARAMETER :: routinen = 'fft3d_s'
383 
384  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
385  POINTER :: zoptr
386  COMPLEX(KIND=dp), DIMENSION(1, 1, 1), TARGET :: zdum
387  INTEGER :: handle, ld(3), lo(3), output_unit, sign, &
388  stat
389  LOGICAL :: fft_in_place, test
390  REAL(kind=dp) :: in_sum, norm, out_sum
391  TYPE(fft_scratch_type), POINTER :: fft_scratch
392 
393  CALL timeset(routinen, handle)
394  output_unit = cp_logger_get_default_io_unit()
395 
396  IF (PRESENT(scale)) THEN
397  norm = scale
398  ELSE
399  norm = 1.0_dp
400  END IF
401 
402  IF (PRESENT(debug)) THEN
403  test = debug
404  ELSE
405  test = .false.
406  END IF
407 
408  IF (PRESENT(zout)) THEN
409  fft_in_place = .false.
410  ELSE
411  fft_in_place = .true.
412  END IF
413 
414  IF (test) THEN
415  in_sum = sum(abs(zin))
416  END IF
417 
418  ld(1) = SIZE(zin, 1)
419  ld(2) = SIZE(zin, 2)
420  ld(3) = SIZE(zin, 3)
421 
422  IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3)) THEN
423  cpabort("Size and dimension (zin) have to be the same.")
424  END IF
425 
426  sign = fsign
427  CALL get_fft_scratch(fft_scratch, tf_type=400, n=n)
428 
429  IF (fft_in_place) THEN
430  zoptr => zdum
431  IF (fsign == fwfft) THEN
432  CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
433  ELSE
434  CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
435  END IF
436  ELSE
437  IF (fsign == fwfft) THEN
438  CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
439  ELSE
440  CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
441  END IF
442  END IF
443 
444  CALL release_fft_scratch(fft_scratch)
445 
446  IF (PRESENT(zout)) THEN
447  lo(1) = SIZE(zout, 1)
448  lo(2) = SIZE(zout, 2)
449  lo(3) = SIZE(zout, 3)
450  IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3)) THEN
451  cpabort("Size and dimension (zout) have to be the same.")
452  END IF
453  END IF
454 
455  IF (PRESENT(status)) THEN
456  status = stat
457  END IF
458 
459  IF (test .AND. output_unit > 0) THEN
460  IF (PRESENT(zout)) THEN
461  out_sum = sum(abs(zout))
462  WRITE (output_unit, '(A)') " Out of place 3D FFT (local) : fft3d_s"
463  WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
464  WRITE (output_unit, '(A,T60,3I7)') " Input array dimensions ", ld
465  WRITE (output_unit, '(A,T60,3I7)') " Output array dimensions ", lo
466  WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
467  WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
468  ELSE
469  out_sum = sum(abs(zin))
470  WRITE (output_unit, '(A)') " In place 3D FFT (local) : fft3d_s"
471  WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
472  WRITE (output_unit, '(A,T60,3I7)') " Input/output array dimensions ", ld
473  WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
474  WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
475  END IF
476  END IF
477 
478  CALL timestop(handle)
479 
480  END SUBROUTINE fft3d_s
481 
482 ! **************************************************************************************************
483 !> \brief ...
484 !> \param fsign ...
485 !> \param n ...
486 !> \param cin ...
487 !> \param gin ...
488 !> \param gs_group ...
489 !> \param rs_group ...
490 !> \param yzp ...
491 !> \param nyzray ...
492 !> \param bo ...
493 !> \param scale ...
494 !> \param status ...
495 !> \param debug ...
496 ! **************************************************************************************************
497  SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, &
498  bo, scale, status, debug)
499 
500  INTEGER, INTENT(IN) :: fsign
501  INTEGER, DIMENSION(:), INTENT(IN) :: n
502  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
503  INTENT(INOUT) :: cin
504  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
505  INTENT(INOUT) :: gin
506  TYPE(mp_comm_type), INTENT(IN) :: gs_group
507  TYPE(mp_cart_type), INTENT(IN) :: rs_group
508  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
509  INTENT(IN) :: yzp
510  INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nyzray
511  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
512  INTENT(IN) :: bo
513  REAL(kind=dp), INTENT(IN), OPTIONAL :: scale
514  INTEGER, INTENT(OUT), OPTIONAL :: status
515  LOGICAL, INTENT(IN), OPTIONAL :: debug
516 
517  CHARACTER(len=*), PARAMETER :: routinen = 'fft3d_ps'
518 
519  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
520  POINTER :: pbuf, qbuf, rbuf, sbuf
521  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
522  POINTER :: tbuf
523  INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
524  nmax, numtask, numtask_g, numtask_r, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, &
525  sign, stat
526  INTEGER, ALLOCATABLE, DIMENSION(:) :: p2p
527  LOGICAL :: test
528  REAL(kind=dp) :: norm, sum_data
529  TYPE(fft_scratch_sizes) :: fft_scratch_size
530  TYPE(fft_scratch_type), POINTER :: fft_scratch
531 
532  CALL timeset(routinen, handle)
533  output_unit = cp_logger_get_default_io_unit()
534 
535  IF (PRESENT(debug)) THEN
536  test = debug
537  ELSE
538  test = .false.
539  END IF
540 
541  numtask_g = gs_group%num_pe
542  g_pos = gs_group%mepos
543  numtask_r = rs_group%num_pe
544  r_dim = rs_group%num_pe_cart
545  r_pos = rs_group%mepos_cart
546  IF (numtask_g /= numtask_r) THEN
547  cpabort("Real space and G space groups are different.")
548  END IF
549  numtask = numtask_r
550 
551  IF (PRESENT(scale)) THEN
552  norm = scale
553  ELSE
554  norm = 1.0_dp
555  END IF
556 
557  sign = fsign
558 
559  lg = SIZE(gin, 1)
560  mg = SIZE(gin, 2)
561 
562  nx = SIZE(cin, 1)
563  ny = SIZE(cin, 2)
564  nz = SIZE(cin, 3)
565 
566  IF (mg == 0) THEN
567  mmax = 1
568  ELSE
569  mmax = mg
570  END IF
571  lmax = max(lg, (nx*ny*nz)/mmax + 1)
572 
573  ALLOCATE (p2p(0:numtask - 1))
574 
575  CALL gs_group%rank_compare(rs_group, p2p)
576 
577  rp = p2p(g_pos)
578  mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
579  my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
580  mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
581  mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
582 
583  n1 = maxval(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
584  n2 = maxval(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
585  nmax = max((2*n2)/numtask, 2)*mx2*mz2
586  nmax = max(nmax, n1*maxval(nyzray))
587  n1 = maxval(bo(2, 1, :, 2))
588  n2 = maxval(bo(2, 3, :, 2))
589 
590  fft_scratch_size%nx = nx
591  fft_scratch_size%ny = ny
592  fft_scratch_size%nz = nz
593  fft_scratch_size%lmax = lmax
594  fft_scratch_size%mmax = mmax
595  fft_scratch_size%mx1 = mx1
596  fft_scratch_size%mx2 = mx2
597  fft_scratch_size%my1 = my1
598  fft_scratch_size%mz2 = mz2
599  fft_scratch_size%lg = lg
600  fft_scratch_size%mg = mg
601  fft_scratch_size%nbx = n1
602  fft_scratch_size%nbz = n2
603  mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
604  mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
605  mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
606  fft_scratch_size%mcz1 = mcz1
607  fft_scratch_size%mcx2 = mcx2
608  fft_scratch_size%mcz2 = mcz2
609  fft_scratch_size%nmax = nmax
610  fft_scratch_size%nmray = maxval(nyzray)
611  fft_scratch_size%nyzray = nyzray(g_pos)
612  fft_scratch_size%gs_group = gs_group
613  fft_scratch_size%rs_group = rs_group
614  fft_scratch_size%g_pos = g_pos
615  fft_scratch_size%r_pos = r_pos
616  fft_scratch_size%r_dim = r_dim
617  fft_scratch_size%numtask = numtask
618 
619  IF (test) THEN
620  IF (g_pos == 0 .AND. output_unit > 0) THEN
621  WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_ps"
622  WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
623  WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg, mg
624  WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", nx, ny, nz
625  END IF
626  END IF
627 
628  IF (r_dim(2) > 1) THEN
629 
630  !
631  ! real space is distributed over x and y coordinate
632  ! we have two stages of communication
633  !
634 
635  IF (r_dim(1) == 1) THEN
636  cpabort("This processor distribution is not supported.")
637  END IF
638  CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
639 
640  IF (sign == fwfft) THEN
641  ! cin -> gin
642 
643  IF (test) THEN
644  sum_data = abs(sum(cin))
645  CALL gs_group%sum(sum_data)
646  IF (g_pos == 0 .AND. output_unit > 0) THEN
647  WRITE (output_unit, '(A)') " Two step communication algorithm "
648  WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
649  WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
650  WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
651  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
652  END IF
653  END IF
654 
655  pbuf => fft_scratch%p1buf
656  qbuf => fft_scratch%p2buf
657 
658  ! FFT along z
659  CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
660 
661  rbuf => fft_scratch%p3buf
662 
663  IF (test) THEN
664  sum_data = abs(sum(qbuf))
665  CALL gs_group%sum(sum_data)
666  IF (g_pos == 0 .AND. output_unit > 0) THEN
667  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
668  END IF
669  END IF
670 
671  ! Exchange data ( transpose of matrix )
672  CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
673 
674  IF (test) THEN
675  sum_data = abs(sum(rbuf))
676  CALL gs_group%sum(sum_data)
677  IF (g_pos == 0 .AND. output_unit > 0) THEN
678  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) T", sum_data
679  END IF
680  END IF
681 
682  pbuf => fft_scratch%p4buf
683 
684  ! FFT along y
685  CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
686 
687  qbuf => fft_scratch%p5buf
688 
689  IF (test) THEN
690  sum_data = abs(sum(pbuf))
691  CALL gs_group%sum(sum_data)
692  IF (g_pos == 0 .AND. output_unit > 0) THEN
693  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) TS", sum_data
694  END IF
695  END IF
696 
697  ! Exchange data ( transpose of matrix ) and sort
698  CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
699  bo(:, :, :, 2), qbuf, fft_scratch)
700 
701  IF (test) THEN
702  sum_data = abs(sum(qbuf))
703  CALL gs_group%sum(sum_data)
704  IF (g_pos == 0 .AND. output_unit > 0) THEN
705  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) TS", sum_data
706  END IF
707  END IF
708 
709  ! FFT along x
710  CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
711 
712  IF (test) THEN
713  sum_data = abs(sum(gin))
714  CALL gs_group%sum(sum_data)
715  IF (g_pos == 0 .AND. output_unit > 0) THEN
716  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
717  END IF
718  END IF
719 
720  ELSE IF (sign == bwfft) THEN
721  ! gin -> cin
722 
723  IF (test) THEN
724  sum_data = abs(sum(gin))
725  CALL gs_group%sum(sum_data)
726  IF (g_pos == 0 .AND. output_unit > 0) THEN
727  WRITE (output_unit, '(A)') " Two step communication algorithm "
728  WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
729  WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
730  WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
731  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
732  END IF
733  END IF
734 
735  pbuf => fft_scratch%p7buf
736 
737  ! FFT along x
738  CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
739 
740  qbuf => fft_scratch%p4buf
741 
742  IF (test) THEN
743  sum_data = abs(sum(pbuf))
744  CALL gs_group%sum(sum_data)
745  IF (g_pos == 0 .AND. output_unit > 0) THEN
746  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
747  END IF
748  END IF
749 
750  ! Exchange data ( transpose of matrix ) and sort
751  CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
752  bo(:, :, :, 2), qbuf, fft_scratch)
753 
754  IF (test) THEN
755  sum_data = abs(sum(qbuf))
756  CALL gs_group%sum(sum_data)
757  IF (g_pos == 0 .AND. output_unit > 0) THEN
758  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
759  END IF
760  END IF
761 
762  rbuf => fft_scratch%p3buf
763 
764  ! FFT along y
765  CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
766 
767  pbuf => fft_scratch%p2buf
768 
769  IF (test) THEN
770  sum_data = abs(sum(rbuf))
771  CALL gs_group%sum(sum_data)
772  IF (g_pos == 0 .AND. output_unit > 0) THEN
773  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
774  END IF
775  END IF
776 
777  ! Exchange data ( transpose of matrix )
778  CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
779 
780  IF (test) THEN
781  sum_data = abs(sum(pbuf))
782  CALL gs_group%sum(sum_data)
783  IF (g_pos == 0 .AND. output_unit > 0) THEN
784  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) T", sum_data
785  END IF
786  END IF
787 
788  qbuf => fft_scratch%p1buf
789 
790  ! FFT along z
791  CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
792 
793  IF (test) THEN
794  sum_data = abs(sum(cin))
795  CALL gs_group%sum(sum_data)
796  IF (g_pos == 0 .AND. output_unit > 0) THEN
797  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
798  END IF
799  END IF
800 
801  ELSE
802 
803  cpabort("Illegal fsign parameter.")
804 
805  END IF
806 
807  CALL release_fft_scratch(fft_scratch)
808 
809  ELSE
810 
811  !
812  ! real space is only distributed over x coordinate
813  ! we have one stage of communication, after the transform of
814  ! direction x
815  !
816 
817  CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
818 
819  sbuf => fft_scratch%r1buf
820  tbuf => fft_scratch%tbuf
821 
822  sbuf = z_zero
823  tbuf = z_zero
824 
825  IF (sign == fwfft) THEN
826  ! cin -> gin
827 
828  IF (test) THEN
829  sum_data = abs(sum(cin))
830  CALL gs_group%sum(sum_data)
831  IF (g_pos == 0 .AND. output_unit > 0) THEN
832  WRITE (output_unit, '(A)') " One step communication algorithm "
833  WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
834  WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
835  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
836  END IF
837  END IF
838 
839  ! FFT along y and z
840  CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
841  CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
842 
843  IF (test) THEN
844  sum_data = abs(sum(tbuf))
845  CALL gs_group%sum(sum_data)
846  IF (g_pos == 0 .AND. output_unit > 0) THEN
847  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
848  END IF
849  END IF
850 
851  ! Exchange data ( transpose of matrix ) and sort
852  CALL yz_to_x(tbuf, gs_group, g_pos, p2p, yzp, nyzray, &
853  bo(:, :, :, 2), sbuf, fft_scratch)
854 
855  IF (test) THEN
856  sum_data = abs(sum(sbuf))
857  CALL gs_group%sum(sum_data)
858  IF (g_pos == 0 .AND. output_unit > 0) THEN
859  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
860  END IF
861  END IF
862  ! FFT along x
863  CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
864 
865  IF (test) THEN
866  sum_data = abs(sum(gin))
867  CALL gs_group%sum(sum_data)
868  IF (g_pos == 0 .AND. output_unit > 0) THEN
869  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
870  END IF
871  END IF
872 
873  ELSE IF (sign == bwfft) THEN
874  ! gin -> cin
875 
876  IF (test) THEN
877  sum_data = abs(sum(gin))
878  CALL gs_group%sum(sum_data)
879  IF (g_pos == 0 .AND. output_unit > 0) THEN
880  WRITE (output_unit, '(A)') " One step communication algorithm "
881  WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
882  WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
883  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
884  END IF
885  END IF
886 
887  ! FFT along x
888  CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
889 
890  IF (test) THEN
891  sum_data = abs(sum(sbuf))
892  CALL gs_group%sum(sum_data)
893  IF (g_pos == 0 .AND. output_unit > 0) THEN
894  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
895  END IF
896  END IF
897 
898  ! Exchange data ( transpose of matrix ) and sort
899  CALL x_to_yz(sbuf, gs_group, g_pos, p2p, yzp, nyzray, &
900  bo(:, :, :, 2), tbuf, fft_scratch)
901 
902  IF (test) THEN
903  sum_data = abs(sum(tbuf))
904  CALL gs_group%sum(sum_data)
905  IF (g_pos == 0 .AND. output_unit > 0) THEN
906  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
907  END IF
908  END IF
909 
910  ! FFT along y and z
911  CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
912  CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
913 
914  IF (test) THEN
915  sum_data = abs(sum(cin))
916  CALL gs_group%sum(sum_data)
917  IF (g_pos == 0 .AND. output_unit > 0) THEN
918  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
919  END IF
920  END IF
921  ELSE
922  cpabort("Illegal fsign parameter.")
923  END IF
924 
925  CALL release_fft_scratch(fft_scratch)
926 
927  END IF
928 
929  DEALLOCATE (p2p)
930 
931  IF (PRESENT(status)) THEN
932  status = stat
933  END IF
934  CALL timestop(handle)
935 
936  END SUBROUTINE fft3d_ps
937 
938 ! **************************************************************************************************
939 !> \brief ...
940 !> \param fsign ...
941 !> \param n ...
942 !> \param zin ...
943 !> \param gin ...
944 !> \param group ...
945 !> \param bo ...
946 !> \param scale ...
947 !> \param status ...
948 !> \param debug ...
949 ! **************************************************************************************************
950  SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, scale, status, debug)
951 
952  INTEGER, INTENT(IN) :: fsign
953  INTEGER, DIMENSION(3), INTENT(IN) :: n
954  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
955  INTENT(INOUT) :: zin
956  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
957  INTENT(INOUT) :: gin
958  TYPE(mp_cart_type), INTENT(IN) :: group
959  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
960  INTENT(IN) :: bo
961  REAL(kind=dp), INTENT(IN), OPTIONAL :: scale
962  INTEGER, INTENT(OUT), OPTIONAL :: status
963  LOGICAL, INTENT(IN), OPTIONAL :: debug
964 
965  CHARACTER(len=*), PARAMETER :: routinen = 'fft3d_pb'
966 
967  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
968  POINTER :: abuf, bbuf
969  INTEGER :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
970  mcz2, mx1, mx2, mx3, my1, my2, my3, &
971  my_pos, mz1, mz2, mz3, output_unit, &
972  sign, stat
973  INTEGER, DIMENSION(2) :: dim
974  LOGICAL :: test
975  REAL(kind=dp) :: norm, sum_data
976  TYPE(fft_scratch_sizes) :: fft_scratch_size
977  TYPE(fft_scratch_type), POINTER :: fft_scratch
978 
979 !------------------------------------------------------------------------------
980 ! "Real Space" 1) xyZ or 1) xYZ
981 ! 2) xYz or not used
982 ! "G Space" 3) Xyz or 3) XYz
983 !
984 ! There is one communicator (2-dimensional) for all distributions
985 ! np = n1 * n2, where np is the total number of processors
986 ! If n2 = 1, we have the second case and only one transpose step is needed
987 !
988 ! Assignment of dimensions to axis for different steps
989 ! First case: 1) n1=x; n2=y
990 ! 2) n1=x; n2=z
991 ! 3) n1=y; n2=z
992 ! Second case 1) n1=x
993 ! 3) n1=z
994 !
995 ! The more general case with two communicators for the initial and final
996 ! distribution is not covered.
997 !------------------------------------------------------------------------------
998 
999  CALL timeset(routinen, handle)
1000  output_unit = cp_logger_get_default_io_unit()
1001 
1002  dim = group%num_pe_cart
1003  my_pos = group%mepos
1004 
1005  IF (PRESENT(debug)) THEN
1006  test = debug
1007  ELSE
1008  test = .false.
1009  END IF
1010 
1011  IF (PRESENT(scale)) THEN
1012  norm = scale
1013  ELSE
1014  norm = 1.0_dp
1015  END IF
1016 
1017  sign = fsign
1018 
1019  IF (test) THEN
1020  lg(1) = SIZE(gin, 1)
1021  lg(2) = SIZE(gin, 2)
1022  lz(1) = SIZE(zin, 1)
1023  lz(2) = SIZE(zin, 2)
1024  lz(3) = SIZE(zin, 3)
1025  IF (my_pos == 0 .AND. output_unit > 0) THEN
1026  WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_pb"
1027  WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
1028  WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg
1029  WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", lz
1030  END IF
1031  END IF
1032 
1033  mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
1034  my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
1035  mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
1036  mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
1037  my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
1038  mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
1039  mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
1040  my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
1041  mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
1042  fft_scratch_size%mx1 = mx1
1043  fft_scratch_size%mx2 = mx2
1044  fft_scratch_size%mx3 = mx3
1045  fft_scratch_size%my1 = my1
1046  fft_scratch_size%my2 = my2
1047  fft_scratch_size%my3 = my3
1048  fft_scratch_size%mz1 = mz1
1049  fft_scratch_size%mz2 = mz2
1050  fft_scratch_size%mz3 = mz3
1051  mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
1052  mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
1053  mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
1054  mcy3 = maxval(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
1055  fft_scratch_size%mcz1 = mcz1
1056  fft_scratch_size%mcx2 = mcx2
1057  fft_scratch_size%mcz2 = mcz2
1058  fft_scratch_size%mcy3 = mcy3
1059  fft_scratch_size%gs_group = group
1060  fft_scratch_size%rs_group = group
1061  fft_scratch_size%g_pos = my_pos
1062  fft_scratch_size%numtask = dim(1)*dim(2)
1063 
1064  IF (dim(1) > 1 .AND. dim(2) > 1) THEN
1065 
1066  !
1067  ! First case; two stages of communication
1068  !
1069 
1070  CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
1071 
1072  IF (sign == fwfft) THEN
1073  ! Stage 1 -> 3
1074 
1075  bbuf => fft_scratch%a2buf
1076 
1077  IF (test) THEN
1078  sum_data = abs(sum(zin))
1079  CALL group%sum(sum_data)
1080  IF (my_pos == 0 .AND. output_unit > 0) THEN
1081  WRITE (output_unit, '(A)') " Two step communication algorithm "
1082  WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1083  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1084  END IF
1085  END IF
1086 
1087  ! FFT along z
1088  CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
1089 
1090  abuf => fft_scratch%a3buf
1091 
1092  IF (test) THEN
1093  sum_data = abs(sum(bbuf))
1094  CALL group%sum(sum_data)
1095  IF (my_pos == 0 .AND. output_unit > 0) THEN
1096  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1097  END IF
1098  END IF
1099 
1100  CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
1101 
1102  bbuf => fft_scratch%a4buf
1103 
1104  IF (test) THEN
1105  sum_data = abs(sum(abuf))
1106  CALL group%sum(sum_data)
1107  IF (my_pos == 0 .AND. output_unit > 0) THEN
1108  WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1109  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1110  END IF
1111  END IF
1112 
1113  ! FFT along y
1114  CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1115 
1116  abuf => fft_scratch%a5buf
1117 
1118  IF (test) THEN
1119  sum_data = abs(sum(bbuf))
1120  CALL group%sum(sum_data)
1121  IF (my_pos == 0 .AND. output_unit > 0) THEN
1122  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1123  END IF
1124  END IF
1125 
1126  CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
1127 
1128  IF (test) THEN
1129  sum_data = abs(sum(abuf))
1130  CALL group%sum(sum_data)
1131  IF (my_pos == 0 .AND. output_unit > 0) THEN
1132  WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1133  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1134  END IF
1135  END IF
1136 
1137  ! FFT along x
1138  CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1139 
1140  IF (test) THEN
1141  sum_data = abs(sum(gin))
1142  CALL group%sum(sum_data)
1143  IF (my_pos == 0 .AND. output_unit > 0) THEN
1144  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1145  END IF
1146  END IF
1147 
1148  ELSEIF (sign == bwfft) THEN
1149  ! Stage 3 -> 1
1150 
1151  bbuf => fft_scratch%a5buf
1152 
1153  IF (test) THEN
1154  sum_data = abs(sum(gin))
1155  CALL group%sum(sum_data)
1156  IF (my_pos == 0 .AND. output_unit > 0) THEN
1157  WRITE (output_unit, '(A)') " Two step communication algorithm "
1158  WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1159  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1160  END IF
1161  END IF
1162 
1163  ! FFT along x
1164  CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1165 
1166  abuf => fft_scratch%a4buf
1167 
1168  IF (test) THEN
1169  sum_data = abs(sum(bbuf))
1170  CALL group%sum(sum_data)
1171  IF (my_pos == 0 .AND. output_unit > 0) THEN
1172  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1173  END IF
1174  END IF
1175 
1176  CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
1177 
1178  bbuf => fft_scratch%a3buf
1179 
1180  IF (test) THEN
1181  sum_data = abs(sum(abuf))
1182  CALL group%sum(sum_data)
1183  IF (my_pos == 0 .AND. output_unit > 0) THEN
1184  WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1185  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1186  END IF
1187  END IF
1188 
1189  ! FFT along y
1190  CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1191 
1192  abuf => fft_scratch%a2buf
1193 
1194  IF (test) THEN
1195  sum_data = abs(sum(bbuf))
1196  CALL group%sum(sum_data)
1197  IF (my_pos == 0 .AND. output_unit > 0) THEN
1198  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1199  END IF
1200  END IF
1201 
1202  CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
1203 
1204  IF (test) THEN
1205  sum_data = abs(sum(abuf))
1206  CALL group%sum(sum_data)
1207  IF (my_pos == 0 .AND. output_unit > 0) THEN
1208  WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1209  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1210  END IF
1211  END IF
1212 
1213  ! FFT along z
1214  CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
1215 
1216  IF (test) THEN
1217  sum_data = abs(sum(zin))
1218  CALL group%sum(sum_data)
1219  IF (my_pos == 0 .AND. output_unit > 0) THEN
1220  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1221  END IF
1222  END IF
1223 
1224  ELSE
1225  cpabort("Illegal fsign parameter.")
1226  END IF
1227 
1228  CALL release_fft_scratch(fft_scratch)
1229 
1230  ELSEIF (dim(2) == 1) THEN
1231 
1232  !
1233  ! Second case; one stage of communication
1234  !
1235 
1236  CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
1237 
1238  IF (sign == fwfft) THEN
1239  ! Stage 1 -> 3
1240 
1241  IF (test) THEN
1242  sum_data = abs(sum(zin))
1243  CALL group%sum(sum_data)
1244  IF (my_pos == 0 .AND. output_unit > 0) THEN
1245  WRITE (output_unit, '(A)') " one step communication algorithm "
1246  WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1247  WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1248  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1249  END IF
1250  END IF
1251 
1252  abuf => fft_scratch%a3buf
1253  bbuf => fft_scratch%a4buf
1254  ! FFT along z and y
1255  CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
1256  CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1257 
1258  abuf => fft_scratch%a5buf
1259 
1260  IF (test) THEN
1261  sum_data = abs(sum(bbuf))
1262  CALL group%sum(sum_data)
1263  IF (my_pos == 0 .AND. output_unit > 0) THEN
1264  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1265  END IF
1266  END IF
1267 
1268  CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
1269 
1270  IF (test) THEN
1271  sum_data = abs(sum(abuf))
1272  CALL group%sum(sum_data)
1273  IF (my_pos == 0 .AND. output_unit > 0) THEN
1274  WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1275  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1276  END IF
1277  END IF
1278 
1279  ! FFT along x
1280  CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1281 
1282  IF (test) THEN
1283  sum_data = abs(sum(gin))
1284  CALL group%sum(sum_data)
1285  IF (my_pos == 0 .AND. output_unit > 0) THEN
1286  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1287  END IF
1288  END IF
1289 
1290  ELSEIF (sign == bwfft) THEN
1291  ! Stage 3 -> 1
1292 
1293  IF (test) THEN
1294  sum_data = abs(sum(gin))
1295  CALL group%sum(sum_data)
1296  IF (my_pos == 0 .AND. output_unit > 0) THEN
1297  WRITE (output_unit, '(A)') " one step communication algorithm "
1298  WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1299  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1300  END IF
1301  END IF
1302 
1303  bbuf => fft_scratch%a5buf
1304 
1305  ! FFT along x
1306  CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1307 
1308  abuf => fft_scratch%a4buf
1309 
1310  IF (test) THEN
1311  sum_data = abs(sum(bbuf))
1312  CALL group%sum(sum_data)
1313  IF (my_pos == 0 .AND. output_unit > 0) THEN
1314  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1315  END IF
1316  END IF
1317 
1318  CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
1319 
1320  bbuf => fft_scratch%a3buf
1321 
1322  IF (test) THEN
1323  sum_data = abs(sum(abuf))
1324  CALL group%sum(sum_data)
1325  IF (my_pos == 0 .AND. output_unit > 0) THEN
1326  WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1327  WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1328  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1329  END IF
1330  END IF
1331 
1332  ! FFT along y
1333  CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1334 
1335  ! FFT along z
1336  CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
1337 
1338  IF (test) THEN
1339  sum_data = abs(sum(zin))
1340  CALL group%sum(sum_data)
1341  IF (my_pos == 0 .AND. output_unit > 0) THEN
1342  WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1343  END IF
1344  END IF
1345 
1346  ELSE
1347  cpabort("Illegal fsign parameter.")
1348  END IF
1349 
1350  CALL release_fft_scratch(fft_scratch)
1351 
1352  ELSE
1353 
1354  cpabort("This partition not implemented.")
1355 
1356  END IF
1357 
1358  IF (PRESENT(status)) THEN
1359  status = stat
1360  END IF
1361 
1362  CALL timestop(handle)
1363 
1364  END SUBROUTINE fft3d_pb
1365 
1366 ! **************************************************************************************************
1367 !> \brief ...
1368 !> \param sb ...
1369 !> \param group ...
1370 !> \param my_pos ...
1371 !> \param p2p ...
1372 !> \param yzp ...
1373 !> \param nray ...
1374 !> \param bo ...
1375 !> \param tb ...
1376 !> \param fft_scratch ...
1377 !> \par History
1378 !> 15. Feb. 2006 : single precision all_to_all
1379 !> \author JGH (14-Jan-2001)
1380 ! **************************************************************************************************
1381  SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1382 
1383  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1384  INTENT(IN) :: sb
1385  TYPE(mp_comm_type), INTENT(IN) :: group
1386  INTEGER, INTENT(IN) :: my_pos
1387  INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1388  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1389  INTENT(IN) :: yzp
1390  INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1391  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1392  INTENT(IN) :: bo
1393  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1394  INTENT(INOUT) :: tb
1395  TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1396 
1397  CHARACTER(len=*), PARAMETER :: routinen = 'x_to_yz'
1398 
1399  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1400  POINTER :: rr
1401  COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1402  POINTER :: ss, tt
1403  INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1404  nm, np, nr, nx
1405  INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1406 
1407  CALL timeset(routinen, handle)
1408 
1409  np = SIZE(p2p)
1410  scount => fft_scratch%scount
1411  rcount => fft_scratch%rcount
1412  sdispl => fft_scratch%sdispl
1413  rdispl => fft_scratch%rdispl
1414 
1415  IF (alltoall_sgl) THEN
1416  ss => fft_scratch%ss
1417  tt => fft_scratch%tt
1418  ss(:, :) = cmplx(sb(:, :), kind=sp)
1419  tt(:, :) = 0._sp
1420  ELSE
1421  rr => fft_scratch%rr
1422  END IF
1423 
1424  mpr = p2p(my_pos)
1425  nm = maxval(nray(0:np - 1))
1426  nr = nray(my_pos)
1427 !$OMP PARALLEL DO DEFAULT(NONE), &
1428 !$OMP PRIVATE(ix,nx), &
1429 !$OMP SHARED(np,p2p,bo,nr,scount,sdispl)
1430  DO ip = 0, np - 1
1431  ix = p2p(ip)
1432  nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1433  scount(ip) = nr*nx
1434  sdispl(ip) = nr*(bo(1, 1, ix) - 1)
1435  END DO
1436 !$OMP END PARALLEL DO
1437  nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1438 !$OMP PARALLEL DO DEFAULT(NONE), &
1439 !$OMP PRIVATE(nr), &
1440 !$OMP SHARED(np,nray,nx,rcount,rdispl,nm)
1441  DO ip = 0, np - 1
1442  nr = nray(ip)
1443  rcount(ip) = nr*nx
1444  rdispl(ip) = nm*nx*ip
1445  END DO
1446 !$OMP END PARALLEL DO
1447  IF (alltoall_sgl) THEN
1448  CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
1449  ELSE
1450  CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
1451  END IF
1452 
1453  nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1454 !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1455 !$OMP PRIVATE(ixx,ir,iy,iz,ix) &
1456 !$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tt,rr,tb)
1457  DO ip = 0, np - 1
1458  DO ix = 1, nx
1459  ixx = nray(ip)*(ix - 1)
1460  IF (alltoall_sgl) THEN
1461  DO ir = 1, nray(ip)
1462  iy = yzp(1, ir, ip)
1463  iz = yzp(2, ir, ip)
1464  tb(iy, iz, ix) = tt(ir + ixx, ip)
1465  END DO
1466  ELSE
1467  DO ir = 1, nray(ip)
1468  iy = yzp(1, ir, ip)
1469  iz = yzp(2, ir, ip)
1470  tb(iy, iz, ix) = rr(ir + ixx, ip)
1471  END DO
1472  END IF
1473  END DO
1474  END DO
1475 !$OMP END PARALLEL DO
1476 
1477  CALL timestop(handle)
1478 
1479  END SUBROUTINE x_to_yz
1480 
1481 ! **************************************************************************************************
1482 !> \brief ...
1483 !> \param tb ...
1484 !> \param group ...
1485 !> \param my_pos ...
1486 !> \param p2p ...
1487 !> \param yzp ...
1488 !> \param nray ...
1489 !> \param bo ...
1490 !> \param sb ...
1491 !> \param fft_scratch ...
1492 !> \par History
1493 !> 15. Feb. 2006 : single precision all_to_all
1494 !> \author JGH (14-Jan-2001)
1495 ! **************************************************************************************************
1496  SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
1497 
1498  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1499  INTENT(IN) :: tb
1500  TYPE(mp_comm_type), INTENT(IN) :: group
1501  INTEGER, INTENT(IN) :: my_pos
1502  INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1503  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1504  INTENT(IN) :: yzp
1505  INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1506  INTEGER, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1507  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1508  INTENT(INOUT) :: sb
1509  TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1510 
1511  CHARACTER(len=*), PARAMETER :: routinen = 'yz_to_x'
1512 
1513  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1514  POINTER :: rr
1515  COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1516  POINTER :: ss, tt
1517  INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1518  nm, np, nr, nx
1519  INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1520 
1521  CALL timeset(routinen, handle)
1522 
1523  np = SIZE(p2p)
1524  mpr = p2p(my_pos)
1525  scount => fft_scratch%scount
1526  rcount => fft_scratch%rcount
1527  sdispl => fft_scratch%sdispl
1528  rdispl => fft_scratch%rdispl
1529 
1530  IF (alltoall_sgl) THEN
1531  ss => fft_scratch%ss
1532  tt => fft_scratch%tt
1533  ss = 0._sp
1534  tt = 0._sp
1535  ELSE
1536  rr => fft_scratch%rr
1537  END IF
1538 
1539  nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1540 !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1541 !$OMP PRIVATE(ip, ixx, ir, iy, iz, ix) &
1542 !$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tb,tt,rr)
1543  DO ip = 0, np - 1
1544  DO ix = 1, nx
1545  ixx = nray(ip)*(ix - 1)
1546  IF (alltoall_sgl) THEN
1547  DO ir = 1, nray(ip)
1548  iy = yzp(1, ir, ip)
1549  iz = yzp(2, ir, ip)
1550  tt(ir + ixx, ip) = cmplx(tb(iy, iz, ix), kind=sp)
1551  END DO
1552  ELSE
1553  DO ir = 1, nray(ip)
1554  iy = yzp(1, ir, ip)
1555  iz = yzp(2, ir, ip)
1556  rr(ir + ixx, ip) = tb(iy, iz, ix)
1557  END DO
1558  END IF
1559  END DO
1560  END DO
1561 !$OMP END PARALLEL DO
1562  nm = maxval(nray(0:np - 1))
1563  nr = nray(my_pos)
1564 !$OMP PARALLEL DO DEFAULT(NONE), &
1565 !$OMP PRIVATE(ix,nx), &
1566 !$OMP SHARED(np,p2p,bo,rcount,rdispl,nr)
1567  DO ip = 0, np - 1
1568  ix = p2p(ip)
1569  nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1570  rcount(ip) = nr*nx
1571  rdispl(ip) = nr*(bo(1, 1, ix) - 1)
1572  END DO
1573 !$OMP END PARALLEL DO
1574  nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1575 !$OMP PARALLEL DO DEFAULT(NONE), &
1576 !$OMP PRIVATE(nr), &
1577 !$OMP SHARED(np,nray,scount,sdispl,nx,nm)
1578  DO ip = 0, np - 1
1579  nr = nray(ip)
1580  scount(ip) = nr*nx
1581  sdispl(ip) = nm*nx*ip
1582  END DO
1583 !$OMP END PARALLEL DO
1584 
1585  IF (alltoall_sgl) THEN
1586  CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
1587  sb = ss
1588  ELSE
1589  CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
1590  END IF
1591 
1592  CALL timestop(handle)
1593 
1594  END SUBROUTINE yz_to_x
1595 
1596 ! **************************************************************************************************
1597 !> \brief ...
1598 !> \param sb ...
1599 !> \param group ...
1600 !> \param dims ...
1601 !> \param my_pos ...
1602 !> \param p2p ...
1603 !> \param yzp ...
1604 !> \param nray ...
1605 !> \param bo ...
1606 !> \param tb ...
1607 !> \param fft_scratch ...
1608 !> \par History
1609 !> 15. Feb. 2006 : single precision all_to_all
1610 !> \author JGH (18-Jan-2001)
1611 ! **************************************************************************************************
1612  SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1613 
1614  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1615  INTENT(IN) :: sb
1616 
1617  CLASS(mp_comm_type), INTENT(IN) :: group
1618  INTEGER, DIMENSION(2), INTENT(IN) :: dims
1619  INTEGER, INTENT(IN) :: my_pos
1620  INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1621  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1622  INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1623  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1624  COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1625  TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1626 
1627  CHARACTER(len=*), PARAMETER :: routinen = 'yz_to_xz'
1628 
1629  COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1630  COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1631  INTEGER :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
1632  jj, jx, jy, jz, myx, myz, np, npx, &
1633  npz, nx, nz, rs_pos
1634  INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1635  xcor, zcor
1636  INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1637 
1638  CALL timeset(routinen, handle)
1639 
1640  np = SIZE(p2p)
1641 
1642  rs_pos = p2p(my_pos)
1643 
1644  IF (alltoall_sgl) THEN
1645  yzbuf_sgl => fft_scratch%yzbuf_sgl
1646  xzbuf_sgl => fft_scratch%xzbuf_sgl
1647  ELSE
1648  yzbuf => fft_scratch%yzbuf
1649  xzbuf => fft_scratch%xzbuf
1650  END IF
1651  npx = dims(1)
1652  npz = dims(2)
1653  pgrid => fft_scratch%pgrid
1654  xcor => fft_scratch%xcor
1655  zcor => fft_scratch%zcor
1656  pzcoord => fft_scratch%pzcoord
1657  scount => fft_scratch%scount
1658  rcount => fft_scratch%rcount
1659  sdispl => fft_scratch%sdispl
1660  rdispl => fft_scratch%rdispl
1661 
1662  nx = SIZE(sb, 2)
1663 
1664 ! If the send and recv counts are not already cached, then
1665 ! calculate and store them
1666  IF (fft_scratch%in == 0) THEN
1667 
1668  scount = 0
1669 
1670  DO ix = 0, npx - 1
1671  ip = pgrid(ix, 0)
1672  xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1673  END DO
1674  DO iz = 0, npz - 1
1675  ip = pgrid(0, iz)
1676  zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1677  END DO
1678  DO jx = 1, nx
1679  IF (alltoall_sgl) THEN
1680  DO ir = 1, nray(my_pos)
1681  jy = yzp(1, ir, my_pos)
1682  jz = yzp(2, ir, my_pos)
1683  ip = pgrid(xcor(jx), zcor(jz))
1684  scount(ip) = scount(ip) + 1
1685  END DO
1686  ELSE
1687  DO ir = 1, nray(my_pos)
1688  jy = yzp(1, ir, my_pos)
1689  jz = yzp(2, ir, my_pos)
1690  ip = pgrid(xcor(jx), zcor(jz))
1691  scount(ip) = scount(ip) + 1
1692  END DO
1693  END IF
1694  END DO
1695 
1696  CALL group%alltoall(scount, rcount, 1)
1697  fft_scratch%yzcount = scount
1698  fft_scratch%xzcount = rcount
1699 
1700  ! Work out the correct displacements in the buffers
1701  sdispl(0) = 0
1702  rdispl(0) = 0
1703  DO ip = 1, np - 1
1704  sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1705  rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1706  END DO
1707 
1708  fft_scratch%yzdispl = sdispl
1709  fft_scratch%xzdispl = rdispl
1710 
1711  icrs = 0
1712  DO ip = 0, np - 1
1713  IF (scount(ip) /= 0) icrs = icrs + 1
1714  IF (rcount(ip) /= 0) icrs = icrs + 1
1715  END DO
1716  CALL group%sum(icrs)
1717  fft_scratch%rsratio = real(icrs, kind=dp)/(real(2*np, kind=dp)*real(np, kind=dp))
1718 
1719  fft_scratch%in = 1
1720  ELSE
1721  scount = fft_scratch%yzcount
1722  rcount = fft_scratch%xzcount
1723  sdispl = fft_scratch%yzdispl
1724  rdispl = fft_scratch%xzdispl
1725  END IF
1726 
1727 ! Do the actual packing
1728 !$OMP PARALLEL DO DEFAULT(NONE), &
1729 !$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1730 !$OMP SHARED(np,p2p,pzcoord,bo,nray,yzp,zcor),&
1731 !$OMP SHARED(yzbuf,sb,scount,sdispl,my_pos),&
1732 !$OMP SHARED(yzbuf_sgl,alltoall_sgl)
1733  DO ip = 0, np - 1
1734  IF (scount(ip) == 0) cycle
1735  ipl = p2p(ip)
1736  jj = 0
1737  nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1738  DO ir = 1, nray(my_pos)
1739  jz = yzp(2, ir, my_pos)
1740  IF (zcor(jz) == pzcoord(ipl)) THEN
1741  jj = jj + 1
1742  jy = yzp(1, ir, my_pos)
1743  IF (alltoall_sgl) THEN
1744  DO jx = 0, nx - 1
1745  yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = cmplx(sb(ir, jx + bo(1, 1, ipl)), kind=sp)
1746  END DO
1747  ELSE
1748  DO jx = 0, nx - 1
1749  yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
1750  END DO
1751  END IF
1752  END IF
1753  END DO
1754  END DO
1755 !$OMP END PARALLEL DO
1756 
1757  IF (alltoall_sgl) THEN
1758  CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
1759  ELSE
1760  IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1761  CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
1762  ELSE
1763  CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
1764  END IF
1765  END IF
1766 
1767  myx = fft_scratch%sizes%r_pos(1)
1768  myz = fft_scratch%sizes%r_pos(2)
1769  nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
1770 
1771 !$OMP PARALLEL DO DEFAULT(NONE), &
1772 !$OMP PRIVATE(ipr,jj,ir,jx,jy,jz),&
1773 !$OMP SHARED(tb,np,p2p,bo,rs_pos,nray),&
1774 !$OMP SHARED(yzp,alltoall_sgl,zcor,myz),&
1775 !$OMP SHARED(xzbuf,xzbuf_sgl,nz,rdispl)
1776  DO ip = 0, np - 1
1777  ipr = p2p(ip)
1778  jj = 0
1779  DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
1780  DO ir = 1, nray(ip)
1781  jz = yzp(2, ir, ip)
1782  IF (alltoall_sgl) THEN
1783  IF (zcor(jz) == myz) THEN
1784  jj = jj + 1
1785  jy = yzp(1, ir, ip)
1786  jz = jz - bo(1, 3, rs_pos) + 1
1787  tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
1788  END IF
1789  ELSE
1790  IF (zcor(jz) == myz) THEN
1791  jj = jj + 1
1792  jy = yzp(1, ir, ip)
1793  jz = jz - bo(1, 3, rs_pos) + 1
1794  tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
1795  END IF
1796  END IF
1797  END DO
1798  END DO
1799  END DO
1800 !$OMP END PARALLEL DO
1801 
1802  CALL timestop(handle)
1803 
1804  END SUBROUTINE yz_to_xz
1805 
1806 ! **************************************************************************************************
1807 !> \brief ...
1808 !> \param sb ...
1809 !> \param group ...
1810 !> \param dims ...
1811 !> \param my_pos ...
1812 !> \param p2p ...
1813 !> \param yzp ...
1814 !> \param nray ...
1815 !> \param bo ...
1816 !> \param tb ...
1817 !> \param fft_scratch ...
1818 !> \par History
1819 !> 15. Feb. 2006 : single precision all_to_all
1820 !> \author JGH (19-Jan-2001)
1821 ! **************************************************************************************************
1822  SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1823 
1824  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1825  INTENT(IN) :: sb
1826 
1827  CLASS(mp_comm_type), INTENT(IN) :: group
1828  INTEGER, DIMENSION(2), INTENT(IN) :: dims
1829  INTEGER, INTENT(IN) :: my_pos
1830  INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1831  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1832  INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1833  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1834  COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1835  TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1836 
1837  CHARACTER(len=*), PARAMETER :: routinen = 'xz_to_yz'
1838 
1839  COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1840  COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1841  INTEGER :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
1842  jj, jx, jy, jz, mp, myx, myz, np, npx, &
1843  npz, nx, nz
1844  INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1845  xcor, zcor
1846  INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1847 
1848  CALL timeset(routinen, handle)
1849 
1850  np = SIZE(p2p)
1851 
1852  IF (alltoall_sgl) THEN
1853  yzbuf_sgl => fft_scratch%yzbuf_sgl
1854  xzbuf_sgl => fft_scratch%xzbuf_sgl
1855  ELSE
1856  yzbuf => fft_scratch%yzbuf
1857  xzbuf => fft_scratch%xzbuf
1858  END IF
1859  npx = dims(1)
1860  npz = dims(2)
1861  pgrid => fft_scratch%pgrid
1862  xcor => fft_scratch%xcor
1863  zcor => fft_scratch%zcor
1864  pzcoord => fft_scratch%pzcoord
1865  scount => fft_scratch%scount
1866  rcount => fft_scratch%rcount
1867  sdispl => fft_scratch%sdispl
1868  rdispl => fft_scratch%rdispl
1869 
1870 ! If the send and recv counts are not already cached, then
1871 ! calculate and store them
1872  IF (fft_scratch%in == 0) THEN
1873 
1874  rcount = 0
1875  nx = maxval(bo(2, 1, :))
1876 
1877  DO ix = 0, npx - 1
1878  ip = pgrid(ix, 0)
1879  xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1880  END DO
1881  DO iz = 0, npz - 1
1882  ip = pgrid(0, iz)
1883  zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1884  END DO
1885  DO jx = 1, nx
1886  DO ir = 1, nray(my_pos)
1887  jy = yzp(1, ir, my_pos)
1888  jz = yzp(2, ir, my_pos)
1889  ip = pgrid(xcor(jx), zcor(jz))
1890  rcount(ip) = rcount(ip) + 1
1891  END DO
1892  END DO
1893 
1894  CALL group%alltoall(rcount, scount, 1)
1895  fft_scratch%xzcount = scount
1896  fft_scratch%yzcount = rcount
1897 
1898  ! Work out the correct displacements in the buffers
1899  sdispl(0) = 0
1900  rdispl(0) = 0
1901  DO ip = 1, np - 1
1902  sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1903  rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1904  END DO
1905 
1906  fft_scratch%xzdispl = sdispl
1907  fft_scratch%yzdispl = rdispl
1908 
1909  icrs = 0
1910  DO ip = 0, np - 1
1911  IF (scount(ip) /= 0) icrs = icrs + 1
1912  IF (rcount(ip) /= 0) icrs = icrs + 1
1913  END DO
1914  CALL group%sum(icrs)
1915  fft_scratch%rsratio = real(icrs, kind=dp)/(real(2*np, kind=dp)*real(np, kind=dp))
1916 
1917  fft_scratch%in = 1
1918  ELSE
1919  scount = fft_scratch%xzcount
1920  rcount = fft_scratch%yzcount
1921  sdispl = fft_scratch%xzdispl
1922  rdispl = fft_scratch%yzdispl
1923  END IF
1924 
1925 ! Now do the actual packing
1926  myx = fft_scratch%sizes%r_pos(1)
1927  myz = fft_scratch%sizes%r_pos(2)
1928  mp = p2p(my_pos)
1929  nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
1930  nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
1931 
1932 !$OMP PARALLEL DO DEFAULT(NONE), &
1933 !$OMP PRIVATE(jj,ipl,ir,jx,jy,jz,ixx),&
1934 !$OMP SHARED(np,p2p,nray,yzp,zcor,myz,bo,mp),&
1935 !$OMP SHARED(alltoall_sgl,nx,scount,sdispl),&
1936 !$OMP SHARED(xzbuf,xzbuf_sgl,sb,nz)
1937  DO ip = 0, np - 1
1938  jj = 0
1939  ipl = p2p(ip)
1940  DO ir = 1, nray(ip)
1941  jz = yzp(2, ir, ip)
1942  IF (zcor(jz) == myz) THEN
1943  jj = jj + 1
1944  jy = yzp(1, ir, ip)
1945  jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
1946  IF (alltoall_sgl) THEN
1947  DO jx = 0, nx - 1
1948  ixx = jj + jx*scount(ipl)/nx
1949  xzbuf_sgl(ixx + sdispl(ipl)) = cmplx(sb(jy, jz + jx*nz), kind=sp)
1950  END DO
1951  ELSE
1952  DO jx = 0, nx - 1
1953  ixx = jj + jx*scount(ipl)/nx
1954  xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
1955  END DO
1956  END IF
1957  END IF
1958  END DO
1959  END DO
1960 !$OMP END PARALLEL DO
1961 
1962  IF (alltoall_sgl) THEN
1963  CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
1964  ELSE
1965  IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1966  CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
1967  ELSE
1968  CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
1969  END IF
1970  END IF
1971 
1972 !$OMP PARALLEL DO DEFAULT(NONE), &
1973 !$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1974 !$OMP SHARED(p2p,pzcoord,bo,nray,my_pos,yzp),&
1975 !$OMP SHARED(rcount,rdispl,tb,yzbuf,zcor),&
1976 !$OMP SHARED(yzbuf_sgl,alltoall_sgl,np)
1977  DO ip = 0, np - 1
1978  IF (rcount(ip) == 0) cycle
1979  ipl = p2p(ip)
1980  jj = 0
1981  nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1982  DO ir = 1, nray(my_pos)
1983  jz = yzp(2, ir, my_pos)
1984  IF (zcor(jz) == pzcoord(ipl)) THEN
1985  jj = jj + 1
1986  jy = yzp(1, ir, my_pos)
1987  IF (alltoall_sgl) THEN
1988  DO jx = 0, nx - 1
1989  tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
1990  END DO
1991  ELSE
1992  DO jx = 0, nx - 1
1993  tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
1994  END DO
1995  END IF
1996  END IF
1997  END DO
1998  END DO
1999 !$OMP END PARALLEL DO
2000 
2001  CALL timestop(handle)
2002 
2003  END SUBROUTINE xz_to_yz
2004 
2005 ! **************************************************************************************************
2006 !> \brief ...
2007 !> \param cin ...
2008 !> \param boin ...
2009 !> \param boout ...
2010 !> \param sout ...
2011 !> \param fft_scratch ...
2012 !> \par History
2013 !> none
2014 !> \author JGH (20-Jan-2001)
2015 ! **************************************************************************************************
2016  SUBROUTINE cube_transpose_1(cin, boin, boout, sout, fft_scratch)
2017 
2018  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2019  INTENT(IN) :: cin
2020  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2021  INTENT(IN) :: boin, boout
2022  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2023  INTENT(OUT) :: sout
2024  TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2025 
2026  CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_1'
2027 
2028  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2029  POINTER :: rbuf
2030  INTEGER :: handle, ip, ipl, ir, is, ixy, iz, mip, &
2031  mz, np, nx, ny, nz
2032  INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2033  INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2034  INTEGER, DIMENSION(2) :: dim, pos
2035 
2036  CALL timeset(routinen, handle)
2037 
2038  mip = fft_scratch%mip
2039  dim = fft_scratch%dim
2040  pos = fft_scratch%pos
2041  scount => fft_scratch%scount
2042  rcount => fft_scratch%rcount
2043  sdispl => fft_scratch%sdispl
2044  rdispl => fft_scratch%rdispl
2045  pgrid => fft_scratch%pgcube
2046  np = dim(2)
2047 
2048  nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2049  nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2050 
2051 !$OMP PARALLEL DO DEFAULT(NONE), &
2052 !$OMP PRIVATE(ipl,ny), &
2053 !$OMP SHARED(np,pgrid,boout,scount,sdispl,nx,nz)
2054  DO ip = 0, np - 1
2055  ipl = pgrid(ip, 2)
2056  ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2057  scount(ip) = nx*nz*ny
2058  sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
2059  END DO
2060 !$OMP END PARALLEL DO
2061  ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2062  mz = maxval(boin(2, 3, :) - boin(1, 3, :) + 1)
2063 !$OMP PARALLEL DO DEFAULT(NONE), &
2064 !$OMP PRIVATE(ipl,nz), &
2065 !$OMP SHARED(np,pgrid,boin,nx,ny,rcount,rdispl,mz)
2066  DO ip = 0, np - 1
2067  ipl = pgrid(ip, 2)
2068  nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2069  rcount(ip) = nx*nz*ny
2070  rdispl(ip) = nx*ny*mz*ip
2071  END DO
2072 !$OMP END PARALLEL DO
2073 
2074  rbuf => fft_scratch%rbuf1
2075 
2076  CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2077 
2078 !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2079 !$OMP PRIVATE(ip,ipl,nz,iz,is,ir) &
2080 !$OMP SHARED(nx,ny,np,pgrid,boin,sout,rbuf)
2081  DO ixy = 1, nx*ny
2082  DO ip = 0, np - 1
2083  ipl = pgrid(ip, 2)
2084  nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2085  DO iz = 1, nz
2086  is = boin(1, 3, ipl) + iz - 1
2087  ir = iz + nz*(ixy - 1)
2088  sout(is, ixy) = rbuf(ir, ip)
2089  END DO
2090  END DO
2091  END DO
2092 !$OMP END PARALLEL DO
2093 
2094  CALL timestop(handle)
2095 
2096  END SUBROUTINE cube_transpose_1
2097 
2098 ! **************************************************************************************************
2099 !> \brief ...
2100 !> \param cin ...
2101 !> \param boin ...
2102 !> \param boout ...
2103 !> \param sout ...
2104 !> \param fft_scratch ...
2105 ! **************************************************************************************************
2106  SUBROUTINE cube_transpose_2(cin, boin, boout, sout, fft_scratch)
2107 
2108  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2109  INTENT(IN) :: cin
2110  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2111  INTENT(IN) :: boin, boout
2112  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2113  INTENT(OUT) :: sout
2114  TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2115 
2116  CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_2'
2117 
2118  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2119  POINTER :: rbuf
2120  INTEGER :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
2121  np, nx, ny, nz
2122  INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2123  INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2124  INTEGER, DIMENSION(2) :: dim, pos
2125  TYPE(mp_comm_type) :: sub_group
2126 
2127  CALL timeset(routinen, handle)
2128 
2129  sub_group = fft_scratch%cart_sub_comm(2)
2130  mip = fft_scratch%mip
2131  dim = fft_scratch%dim
2132  pos = fft_scratch%pos
2133  scount => fft_scratch%scount
2134  rcount => fft_scratch%rcount
2135  sdispl => fft_scratch%sdispl
2136  rdispl => fft_scratch%rdispl
2137  pgrid => fft_scratch%pgcube
2138  np = dim(2)
2139 
2140  nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2141  ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2142  mz = maxval(boout(2, 3, :) - boout(1, 3, :) + 1)
2143 
2144  rbuf => fft_scratch%rbuf2
2145 
2146 !$OMP PARALLEL DEFAULT(NONE), &
2147 !$OMP PRIVATE(ip,ipl,nz,iz,ir), &
2148 !$OMP SHARED(nx,ny,np,pgrid,boout,rbuf,cin,scount,sdispl,mz)
2149 !$OMP DO COLLAPSE(2)
2150  DO ixy = 1, nx*ny
2151  DO ip = 0, np - 1
2152  ipl = pgrid(ip, 2)
2153  nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2154  DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
2155  ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
2156  rbuf(ir, ip) = cin(iz, ixy)
2157  END DO
2158  END DO
2159  END DO
2160 !$OMP END DO
2161 !$OMP DO
2162  DO ip = 0, np - 1
2163  ipl = pgrid(ip, 2)
2164  nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2165  scount(ip) = nx*ny*nz
2166  sdispl(ip) = nx*ny*mz*ip
2167  END DO
2168 !$OMP END DO
2169 !$OMP END PARALLEL
2170  nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
2171 !$OMP PARALLEL DO DEFAULT(NONE), &
2172 !$OMP PRIVATE(ipl,ny), &
2173 !$OMP SHARED(np,pgrid,boin,nx,nz,rcount,rdispl)
2174  DO ip = 0, np - 1
2175  ipl = pgrid(ip, 2)
2176  ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2177  rcount(ip) = nx*ny*nz
2178  rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
2179  END DO
2180 !$OMP END PARALLEL DO
2181 
2182  CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2183 
2184  CALL timestop(handle)
2185 
2186  END SUBROUTINE cube_transpose_2
2187 
2188 ! **************************************************************************************************
2189 !> \brief ...
2190 !> \param cin ...
2191 !> \param boin ...
2192 !> \param boout ...
2193 !> \param sout ...
2194 !> \param fft_scratch ...
2195 ! **************************************************************************************************
2196  SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
2197 
2198  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2199  INTENT(IN) :: cin
2200  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2201  INTENT(IN) :: boin, boout
2202  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2203  INTENT(OUT) :: sout
2204  TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2205 
2206  CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_3'
2207 
2208  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2209  POINTER :: rbuf
2210  INTEGER :: handle, ip, ipl, ir, is, ixz, iy, lb, &
2211  mip, my, my_id, np, num_threads, nx, &
2212  ny, nz, ub
2213  INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2214  INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2215  INTEGER, DIMENSION(2) :: dim, pos
2216  TYPE(mp_comm_type) :: sub_group
2217 
2218  CALL timeset(routinen, handle)
2219 
2220  sub_group = fft_scratch%cart_sub_comm(1)
2221  mip = fft_scratch%mip
2222  dim = fft_scratch%dim
2223  pos = fft_scratch%pos
2224  np = dim(1)
2225  scount => fft_scratch%scount
2226  rcount => fft_scratch%rcount
2227  sdispl => fft_scratch%sdispl
2228  rdispl => fft_scratch%rdispl
2229  pgrid => fft_scratch%pgcube
2230 
2231  ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2232  nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2233 !$OMP PARALLEL DO DEFAULT(NONE), &
2234 !$OMP PRIVATE(ipl, nx), &
2235 !$OMP SHARED(np,pgrid,boout,ny,nz,scount,sdispl)
2236  DO ip = 0, np - 1
2237  ipl = pgrid(ip, 1)
2238  nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
2239  scount(ip) = nx*nz*ny
2240  sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
2241  END DO
2242 !$OMP END PARALLEL DO
2243  nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2244  my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2245 !$OMP PARALLEL DO DEFAULT(NONE), &
2246 !$OMP PRIVATE(ipl, ny), &
2247 !$OMP SHARED(np,pgrid,boin,nx,nz,my,rcount,rdispl)
2248  DO ip = 0, np - 1
2249  ipl = pgrid(ip, 1)
2250  ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2251  rcount(ip) = nx*nz*ny
2252  rdispl(ip) = nx*my*nz*ip
2253  END DO
2254 !$OMP END PARALLEL DO
2255 
2256  rbuf => fft_scratch%rbuf3
2257  num_threads = 1
2258  my_id = 0
2259 !$OMP PARALLEL DEFAULT(NONE), &
2260 !$OMP PRIVATE(NUM_THREADS, my_id, lb, ub) &
2261 !$OMP SHARED(rbuf)
2262 !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2263 !$ my_id = omp_get_thread_num()
2264  IF (my_id < num_threads) THEN
2265  lb = (SIZE(rbuf, 2)*my_id)/num_threads
2266  ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2267  rbuf(:, lb:ub) = 0.0_dp
2268  END IF
2269 !$OMP END PARALLEL
2270 
2271  CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2272 
2273 !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2274 !$OMP PRIVATE(ip,ipl,ny,iy,is,ir) &
2275 !$OMP SHARED(nx,nz,np,pgrid,boin,rbuf,sout)
2276  DO ixz = 1, nx*nz
2277  DO ip = 0, np - 1
2278  ipl = pgrid(ip, 1)
2279  ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2280  DO iy = 1, ny
2281  is = boin(1, 2, ipl) + iy - 1
2282  ir = iy + ny*(ixz - 1)
2283  sout(is, ixz) = rbuf(ir, ip)
2284  END DO
2285  END DO
2286  END DO
2287 !$OMP END PARALLEL DO
2288 
2289  CALL timestop(handle)
2290 
2291  END SUBROUTINE cube_transpose_3
2292 
2293 ! **************************************************************************************************
2294 !> \brief ...
2295 !> \param cin ...
2296 !> \param boin ...
2297 !> \param boout ...
2298 !> \param sout ...
2299 !> \param fft_scratch ...
2300 ! **************************************************************************************************
2301  SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
2302 
2303  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2304  INTENT(IN) :: cin
2305  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2306  INTENT(IN) :: boin, boout
2307  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2308  INTENT(OUT) :: sout
2309  TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2310 
2311  CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_4'
2312 
2313  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2314  POINTER :: rbuf
2315  INTEGER :: handle, ip, ipl, ir, iy, izx, lb, mip, &
2316  my, my_id, np, num_threads, nx, ny, &
2317  nz, ub
2318  INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2319  INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2320  INTEGER, DIMENSION(2) :: dim, pos
2321  TYPE(mp_comm_type) :: sub_group
2322 
2323  CALL timeset(routinen, handle)
2324 
2325  sub_group = fft_scratch%cart_sub_comm(1)
2326  mip = fft_scratch%mip
2327  dim = fft_scratch%dim
2328  pos = fft_scratch%pos
2329  np = dim(1)
2330  scount => fft_scratch%scount
2331  rcount => fft_scratch%rcount
2332  sdispl => fft_scratch%sdispl
2333  rdispl => fft_scratch%rdispl
2334  pgrid => fft_scratch%pgcube
2335 
2336  nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2337  nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2338  my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2339 
2340  rbuf => fft_scratch%rbuf4
2341  num_threads = 1
2342  my_id = 0
2343 !$OMP PARALLEL DEFAULT(NONE), &
2344 !$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ipl,ny,iy,ir), &
2345 !$OMP SHARED(rbuf,nz,nx,np,pgrid,boout,cin,my,scount,sdispl)
2346 !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2347 !$ my_id = omp_get_thread_num()
2348  IF (my_id < num_threads) THEN
2349  lb = (SIZE(rbuf, 2)*my_id)/num_threads
2350  ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2351  rbuf(:, lb:ub) = 0.0_dp
2352  END IF
2353 !$OMP BARRIER
2354 
2355 !$OMP DO COLLAPSE(2)
2356  DO izx = 1, nz*nx
2357  DO ip = 0, np - 1
2358  ipl = pgrid(ip, 1)
2359  ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2360  DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
2361  ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
2362  rbuf(ir, ip) = cin(iy, izx)
2363  END DO
2364  END DO
2365  END DO
2366 !$OMP END DO
2367 !$OMP DO
2368  DO ip = 0, np - 1
2369  ipl = pgrid(ip, 1)
2370  ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2371  scount(ip) = nx*ny*nz
2372  sdispl(ip) = nx*nz*my*ip
2373  END DO
2374 !$OMP END DO
2375 !$OMP END PARALLEL
2376  ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2377 !$OMP PARALLEL DO DEFAULT(NONE), &
2378 !$OMP PRIVATE(ipl,nx), &
2379 !$OMP SHARED(np,pgrid,boin,rcount,rdispl,ny,nz)
2380  DO ip = 0, np - 1
2381  ipl = pgrid(ip, 1)
2382  nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
2383  rcount(ip) = nx*ny*nz
2384  rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
2385  END DO
2386 !$OMP END PARALLEL DO
2387 
2388  CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2389 
2390  CALL timestop(handle)
2391 
2392  END SUBROUTINE cube_transpose_4
2393 
2394 ! **************************************************************************************************
2395 !> \brief ...
2396 !> \param cin ...
2397 !> \param group ...
2398 !> \param boin ...
2399 !> \param boout ...
2400 !> \param sout ...
2401 !> \param fft_scratch ...
2402 ! **************************************************************************************************
2403  SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
2404 
2405  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2406  INTENT(IN) :: cin
2407 
2408  CLASS(mp_comm_type), INTENT(IN) :: group
2409  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2410  COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2411  TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2412 
2413  CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_5'
2414 
2415  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2416  INTEGER :: handle, ip, ir, is, ixz, iy, lb, mip, &
2417  my, my_id, np, num_threads, nx, ny, &
2418  nz, ub
2419  INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2420 
2421  CALL timeset(routinen, handle)
2422 
2423  np = fft_scratch%sizes%numtask
2424  mip = fft_scratch%mip
2425  scount => fft_scratch%scount
2426  rcount => fft_scratch%rcount
2427  sdispl => fft_scratch%sdispl
2428  rdispl => fft_scratch%rdispl
2429 
2430  ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2431  nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2432 !$OMP PARALLEL DO DEFAULT(NONE), &
2433 !$OMP PRIVATE(nx), &
2434 !$OMP SHARED(np,boout,ny,nz,scount,sdispl)
2435  DO ip = 0, np - 1
2436  nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
2437  scount(ip) = nx*nz*ny
2438  sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
2439  END DO
2440 !$OMP END PARALLEL DO
2441  nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2442  my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2443 !$OMP PARALLEL DO DEFAULT(NONE), &
2444 !$OMP PRIVATE(ny), &
2445 !$OMP SHARED(np,boin,nx,nz,rcount,rdispl,my)
2446  DO ip = 0, np - 1
2447  ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2448  rcount(ip) = nx*nz*ny
2449  rdispl(ip) = nx*my*nz*ip
2450  END DO
2451 !$OMP END PARALLEL DO
2452 
2453  rbuf => fft_scratch%rbuf5
2454  num_threads = 1
2455  my_id = 0
2456 !$OMP PARALLEL DEFAULT(NONE), &
2457 !$OMP PRIVATE(NUM_THREADS, my_id, lb, ub), &
2458 !$OMP SHARED(rbuf)
2459 !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2460 !$ my_id = omp_get_thread_num()
2461  IF (my_id < num_threads) THEN
2462  lb = (SIZE(rbuf, 2)*my_id)/num_threads
2463  ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2464  rbuf(:, lb:ub) = 0.0_dp
2465  END IF
2466 !$OMP END PARALLEL
2467 
2468  CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2469 
2470 !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2471 !$OMP PRIVATE(ip,ny,iy,is,ir) &
2472 !$OMP SHARED(nx,nz,np,boin,sout,rbuf)
2473  DO ixz = 1, nx*nz
2474  DO ip = 0, np - 1
2475  ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2476  DO iy = 1, ny
2477  is = boin(1, 2, ip) + iy - 1
2478  ir = iy + ny*(ixz - 1)
2479  sout(is, ixz) = rbuf(ir, ip)
2480  END DO
2481  END DO
2482  END DO
2483 !$OMP END PARALLEL DO
2484 
2485  CALL timestop(handle)
2486 
2487  END SUBROUTINE cube_transpose_5
2488 
2489 ! **************************************************************************************************
2490 !> \brief ...
2491 !> \param cin ...
2492 !> \param group ...
2493 !> \param boin ...
2494 !> \param boout ...
2495 !> \param sout ...
2496 !> \param fft_scratch ...
2497 ! **************************************************************************************************
2498  SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
2499 
2500  COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2501  INTENT(IN) :: cin
2502 
2503  CLASS(mp_comm_type), INTENT(IN) :: group
2504  INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2505  COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2506  TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2507 
2508  CHARACTER(len=*), PARAMETER :: routinen = 'cube_transpose_6'
2509 
2510  COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2511  INTEGER :: handle, ip, ir, iy, izx, lb, mip, my, &
2512  my_id, np, num_threads, nx, ny, nz, ub
2513  INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2514 
2515  CALL timeset(routinen, handle)
2516 
2517  np = fft_scratch%sizes%numtask
2518  mip = fft_scratch%mip
2519  scount => fft_scratch%scount
2520  rcount => fft_scratch%rcount
2521  sdispl => fft_scratch%sdispl
2522  rdispl => fft_scratch%rdispl
2523 
2524  nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2525  nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2526  my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2527 
2528  rbuf => fft_scratch%rbuf5
2529  num_threads = 1
2530  my_id = 0
2531 !$OMP PARALLEL DEFAULT(NONE), &
2532 !$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ny,iy,ir), &
2533 !$OMP SHARED(rbuf,nx,nz,np,boout,cin,my,scount,sdispl)
2534 !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2535 !$ my_id = omp_get_thread_num()
2536  IF (my_id < num_threads) THEN
2537  lb = (SIZE(rbuf, 2)*my_id)/num_threads
2538  ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2539  rbuf(:, lb:ub) = 0.0_dp
2540  END IF
2541 !$OMP BARRIER
2542 
2543 !$OMP DO COLLAPSE(2)
2544  DO izx = 1, nz*nx
2545  DO ip = 0, np - 1
2546  ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2547  DO iy = boout(1, 2, ip), boout(2, 2, ip)
2548  ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
2549  rbuf(ir, ip) = cin(iy, izx)
2550  END DO
2551  END DO
2552  END DO
2553 !$OMP END DO
2554 !$OMP DO
2555  DO ip = 0, np - 1
2556  ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2557  scount(ip) = nx*ny*nz
2558  sdispl(ip) = nx*nz*my*ip
2559  END DO
2560 !$OMP END DO
2561 !$OMP END PARALLEL
2562  ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2563 !$OMP PARALLEL DO DEFAULT(NONE), &
2564 !$OMP PRIVATE(nx), &
2565 !$OMP SHARED(np,boin,rcount,rdispl,nz,ny)
2566  DO ip = 0, np - 1
2567  nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
2568  rcount(ip) = nx*ny*nz
2569  rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
2570  END DO
2571 !$OMP END PARALLEL DO
2572 
2573  CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2574 
2575  CALL timestop(handle)
2576 
2577  END SUBROUTINE cube_transpose_6
2578 
2579 ! **************************************************************************************************
2580 !> \brief ...
2581 ! **************************************************************************************************
2582  SUBROUTINE init_fft_scratch_pool()
2583 
2584  CALL release_fft_scratch_pool()
2585 
2586  ! Allocate first scratch and mark it as used
2587  ALLOCATE (fft_scratch_first)
2588  ALLOCATE (fft_scratch_first%fft_scratch)
2589  ! this is a very special scratch, it seems, we always keep it 'most - recent' so we will never delete it
2590  fft_scratch_first%fft_scratch%last_tick = huge(fft_scratch_first%fft_scratch%last_tick)
2591 
2593 
2594  END SUBROUTINE init_fft_scratch_pool
2595 
2596 ! **************************************************************************************************
2597 !> \brief ...
2598 !> \param fft_scratch ...
2599 ! **************************************************************************************************
2600  SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
2601  TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
2602 
2603 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2604  INTEGER :: ierr
2605  COMPLEX(KIND=dp), POINTER :: dummy_ptr_z
2606 #endif
2607 
2608  ! deallocate structures
2609  IF (ASSOCIATED(fft_scratch%ziptr)) THEN
2610  CALL fft_dealloc(fft_scratch%ziptr)
2611  END IF
2612  IF (ASSOCIATED(fft_scratch%zoptr)) THEN
2613  CALL fft_dealloc(fft_scratch%zoptr)
2614  END IF
2615  IF (ASSOCIATED(fft_scratch%p1buf)) THEN
2616  CALL fft_dealloc(fft_scratch%p1buf)
2617  END IF
2618  IF (ASSOCIATED(fft_scratch%p2buf)) THEN
2619 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2620  dummy_ptr_z => fft_scratch%p2buf(1, 1)
2621  ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2622 #else
2623  CALL fft_dealloc(fft_scratch%p2buf)
2624 #endif
2625  END IF
2626  IF (ASSOCIATED(fft_scratch%p3buf)) THEN
2627 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2628  dummy_ptr_z => fft_scratch%p3buf(1, 1)
2629  ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2630 #else
2631  CALL fft_dealloc(fft_scratch%p3buf)
2632 #endif
2633  END IF
2634  IF (ASSOCIATED(fft_scratch%p4buf)) THEN
2635 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2636  dummy_ptr_z => fft_scratch%p4buf(1, 1)
2637  ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2638 #else
2639  CALL fft_dealloc(fft_scratch%p4buf)
2640 #endif
2641  END IF
2642  IF (ASSOCIATED(fft_scratch%p5buf)) THEN
2643 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2644  dummy_ptr_z => fft_scratch%p5buf(1, 1)
2645  ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2646 #else
2647  CALL fft_dealloc(fft_scratch%p5buf)
2648 #endif
2649  END IF
2650  IF (ASSOCIATED(fft_scratch%p6buf)) THEN
2651  CALL fft_dealloc(fft_scratch%p6buf)
2652  END IF
2653  IF (ASSOCIATED(fft_scratch%p7buf)) THEN
2654 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2655  dummy_ptr_z => fft_scratch%p7buf(1, 1)
2656  ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2657 #else
2658  CALL fft_dealloc(fft_scratch%p7buf)
2659 #endif
2660  END IF
2661  IF (ASSOCIATED(fft_scratch%r1buf)) THEN
2662 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2663  dummy_ptr_z => fft_scratch%r1buf(1, 1)
2664  ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2665 #else
2666  CALL fft_dealloc(fft_scratch%r1buf)
2667 #endif
2668  END IF
2669  IF (ASSOCIATED(fft_scratch%r2buf)) THEN
2670  CALL fft_dealloc(fft_scratch%r2buf)
2671  END IF
2672  IF (ASSOCIATED(fft_scratch%tbuf)) THEN
2673 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2674  dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
2675  ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2676 #else
2677  CALL fft_dealloc(fft_scratch%tbuf)
2678 #endif
2679  END IF
2680  IF (ASSOCIATED(fft_scratch%a1buf)) THEN
2681  CALL fft_dealloc(fft_scratch%a1buf)
2682  END IF
2683  IF (ASSOCIATED(fft_scratch%a2buf)) THEN
2684  CALL fft_dealloc(fft_scratch%a2buf)
2685  END IF
2686  IF (ASSOCIATED(fft_scratch%a3buf)) THEN
2687  CALL fft_dealloc(fft_scratch%a3buf)
2688  END IF
2689  IF (ASSOCIATED(fft_scratch%a4buf)) THEN
2690  CALL fft_dealloc(fft_scratch%a4buf)
2691  END IF
2692  IF (ASSOCIATED(fft_scratch%a5buf)) THEN
2693  CALL fft_dealloc(fft_scratch%a5buf)
2694  END IF
2695  IF (ASSOCIATED(fft_scratch%a6buf)) THEN
2696  CALL fft_dealloc(fft_scratch%a6buf)
2697  END IF
2698  IF (ASSOCIATED(fft_scratch%scount)) THEN
2699  DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
2700  fft_scratch%sdispl, fft_scratch%rdispl)
2701  END IF
2702  IF (ASSOCIATED(fft_scratch%rr)) THEN
2703  DEALLOCATE (fft_scratch%rr)
2704  END IF
2705  IF (ASSOCIATED(fft_scratch%xzbuf)) THEN
2706  DEALLOCATE (fft_scratch%xzbuf)
2707  END IF
2708  IF (ASSOCIATED(fft_scratch%yzbuf)) THEN
2709  DEALLOCATE (fft_scratch%yzbuf)
2710  END IF
2711  IF (ASSOCIATED(fft_scratch%xzbuf_sgl)) THEN
2712  DEALLOCATE (fft_scratch%xzbuf_sgl)
2713  END IF
2714  IF (ASSOCIATED(fft_scratch%yzbuf_sgl)) THEN
2715  DEALLOCATE (fft_scratch%yzbuf_sgl)
2716  END IF
2717  IF (ASSOCIATED(fft_scratch%ss)) THEN
2718  DEALLOCATE (fft_scratch%ss)
2719  END IF
2720  IF (ASSOCIATED(fft_scratch%tt)) THEN
2721  DEALLOCATE (fft_scratch%tt)
2722  END IF
2723  IF (ASSOCIATED(fft_scratch%pgrid)) THEN
2724  DEALLOCATE (fft_scratch%pgrid)
2725  END IF
2726  IF (ASSOCIATED(fft_scratch%pgcube)) THEN
2727  DEALLOCATE (fft_scratch%pgcube)
2728  END IF
2729  IF (ASSOCIATED(fft_scratch%xcor)) THEN
2730  DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
2731  END IF
2732  IF (ASSOCIATED(fft_scratch%pzcoord)) THEN
2733  DEALLOCATE (fft_scratch%pzcoord)
2734  END IF
2735  IF (ASSOCIATED(fft_scratch%xzcount)) THEN
2736  DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
2737  DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
2738  fft_scratch%in = 0
2739  fft_scratch%rsratio = 1._dp
2740  END IF
2741  IF (ASSOCIATED(fft_scratch%rbuf1)) THEN
2742  DEALLOCATE (fft_scratch%rbuf1)
2743  END IF
2744  IF (ASSOCIATED(fft_scratch%rbuf2)) THEN
2745  DEALLOCATE (fft_scratch%rbuf2)
2746  END IF
2747  IF (ASSOCIATED(fft_scratch%rbuf3)) THEN
2748  DEALLOCATE (fft_scratch%rbuf3)
2749  END IF
2750  IF (ASSOCIATED(fft_scratch%rbuf4)) THEN
2751  DEALLOCATE (fft_scratch%rbuf4)
2752  END IF
2753  IF (ASSOCIATED(fft_scratch%rbuf5)) THEN
2754  DEALLOCATE (fft_scratch%rbuf5)
2755  END IF
2756  IF (ASSOCIATED(fft_scratch%rbuf6)) THEN
2757  DEALLOCATE (fft_scratch%rbuf6)
2758  END IF
2759 
2760  IF (fft_scratch%cart_sub_comm(1) /= mp_comm_null) THEN
2761  CALL fft_scratch%cart_sub_comm(1)%free()
2762  END IF
2763  IF (fft_scratch%cart_sub_comm(2) /= mp_comm_null) THEN
2764  CALL fft_scratch%cart_sub_comm(2)%free()
2765  END IF
2766 
2767  CALL fft_destroy_plan(fft_scratch%fft_plan(1))
2768  CALL fft_destroy_plan(fft_scratch%fft_plan(2))
2769  CALL fft_destroy_plan(fft_scratch%fft_plan(3))
2770  CALL fft_destroy_plan(fft_scratch%fft_plan(4))
2771  CALL fft_destroy_plan(fft_scratch%fft_plan(5))
2772  CALL fft_destroy_plan(fft_scratch%fft_plan(6))
2773 
2774  END SUBROUTINE deallocate_fft_scratch_type
2775 
2776 ! **************************************************************************************************
2777 !> \brief ...
2778 ! **************************************************************************************************
2779  SUBROUTINE release_fft_scratch_pool()
2780 
2781  TYPE(fft_scratch_pool_type), POINTER :: fft_scratch, fft_scratch_current
2782 
2783  IF (init_fft_pool == 0) NULLIFY (fft_scratch_first)
2784 
2785  fft_scratch => fft_scratch_first
2786  DO
2787  IF (ASSOCIATED(fft_scratch)) THEN
2788  fft_scratch_current => fft_scratch
2789  fft_scratch => fft_scratch_current%fft_scratch_next
2790  NULLIFY (fft_scratch_current%fft_scratch_next)
2791 
2792  CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
2793 
2794  DEALLOCATE (fft_scratch_current%fft_scratch)
2795  DEALLOCATE (fft_scratch_current)
2796  ELSE
2797  EXIT
2798  END IF
2799  END DO
2800 
2801  init_fft_pool = 0
2802 
2803  END SUBROUTINE release_fft_scratch_pool
2804 
2805 ! **************************************************************************************************
2806 !> \brief ...
2807 ! **************************************************************************************************
2808  SUBROUTINE resize_fft_scratch_pool()
2809 
2810  INTEGER :: last_tick, nscratch
2811  TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, fft_scratch_old
2812 
2813  nscratch = 0
2814 
2815  last_tick = huge(last_tick)
2816  NULLIFY (fft_scratch_old)
2817 
2818  ! start at the global pool, count, and find a deletion candidate
2819  fft_scratch_current => fft_scratch_first
2820  DO
2821  IF (ASSOCIATED(fft_scratch_current)) THEN
2822  nscratch = nscratch + 1
2823  ! is this a candidate for deletion (i.e. least recently used, and not in use)
2824  IF (.NOT. fft_scratch_current%fft_scratch%in_use) THEN
2825  IF (fft_scratch_current%fft_scratch%last_tick < last_tick) THEN
2826  last_tick = fft_scratch_current%fft_scratch%last_tick
2827  fft_scratch_old => fft_scratch_current
2828  END IF
2829  END IF
2830  fft_scratch_current => fft_scratch_current%fft_scratch_next
2831  ELSE
2832  EXIT
2833  END IF
2834  END DO
2835 
2836  ! we should delete a scratch
2837  IF (nscratch > fft_pool_scratch_limit) THEN
2838  ! note that we never deallocate the first (special) element of the list
2839  IF (ASSOCIATED(fft_scratch_old)) THEN
2840  fft_scratch_current => fft_scratch_first
2841  DO
2842  IF (ASSOCIATED(fft_scratch_current)) THEN
2843  ! should we delete the next in the list?
2844  IF (ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old)) THEN
2845  ! fix the linked list
2846  fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
2847 
2848  ! deallocate the element
2849  CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
2850  DEALLOCATE (fft_scratch_old%fft_scratch)
2851  DEALLOCATE (fft_scratch_old)
2852 
2853  ELSE
2854  fft_scratch_current => fft_scratch_current%fft_scratch_next
2855  END IF
2856  ELSE
2857  EXIT
2858  END IF
2859  END DO
2860 
2861  ELSE
2862  cpwarn("The number of the scratches exceeded the limit, but none could be deallocated")
2863  END IF
2864  END IF
2865 
2866  END SUBROUTINE resize_fft_scratch_pool
2867 
2868 ! **************************************************************************************************
2869 !> \brief ...
2870 !> \param fft_scratch ...
2871 !> \param tf_type ...
2872 !> \param n ...
2873 !> \param fft_sizes ...
2874 ! **************************************************************************************************
2875  SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
2876  TYPE(fft_scratch_type), POINTER :: fft_scratch
2877  INTEGER, INTENT(IN) :: tf_type
2878  INTEGER, DIMENSION(:), INTENT(IN) :: n
2879  TYPE(fft_scratch_sizes), INTENT(IN), &
2880  OPTIONAL :: fft_sizes
2881 
2882  CHARACTER(len=*), PARAMETER :: routinen = 'get_fft_scratch'
2883 
2884  INTEGER :: coord(2), dim(2), handle, i, ix, iz, lg, lmax, m1, m2, &
2885  mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
2886  nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
2887  INTEGER, DIMENSION(3) :: pcoord
2888  LOGICAL :: equal
2889  LOGICAL, DIMENSION(2) :: dims
2890  TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, &
2891  fft_scratch_last, &
2892  fft_scratch_new
2893 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2894  INTEGER :: ierr
2895  INTEGER(KIND=C_SIZE_T) :: length
2896  TYPE(c_ptr) :: cptr_r1buf, cptr_tbuf, &
2897  cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
2898 #endif
2899  CALL timeset(routinen, handle)
2900 
2901  ! this is the place to check that the scratch_pool does not grow without limits
2902  ! before we add a new scratch check the size of the pool and release some of the list if needed
2903  CALL resize_fft_scratch_pool()
2904 
2905  ! get the required scratch
2907  fft_scratch_current => fft_scratch_first
2908  DO
2909  IF (ASSOCIATED(fft_scratch_current)) THEN
2910  IF (fft_scratch_current%fft_scratch%in_use) THEN
2911  fft_scratch_last => fft_scratch_current
2912  fft_scratch_current => fft_scratch_current%fft_scratch_next
2913  cycle
2914  END IF
2915  IF (tf_type /= fft_scratch_current%fft_scratch%tf_type) THEN
2916  fft_scratch_last => fft_scratch_current
2917  fft_scratch_current => fft_scratch_current%fft_scratch_next
2918  cycle
2919  END IF
2920  IF (.NOT. all(n == fft_scratch_current%fft_scratch%nfft)) THEN
2921  fft_scratch_last => fft_scratch_current
2922  fft_scratch_current => fft_scratch_current%fft_scratch_next
2923  cycle
2924  END IF
2925  IF (PRESENT(fft_sizes)) THEN
2926  IF (fft_sizes%gs_group /= fft_scratch_current%fft_scratch%group) THEN
2927  fft_scratch_last => fft_scratch_current
2928  fft_scratch_current => fft_scratch_current%fft_scratch_next
2929  cycle
2930  END IF
2931  CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
2932  IF (.NOT. equal) THEN
2933  fft_scratch_last => fft_scratch_current
2934  fft_scratch_current => fft_scratch_current%fft_scratch_next
2935  cycle
2936  END IF
2937  END IF
2938  ! Success
2939  fft_scratch => fft_scratch_current%fft_scratch
2940  fft_scratch_current%fft_scratch%in_use = .true.
2941  EXIT
2942  ELSE
2943  ! We cannot find the scratch type in this pool
2944  ! Generate a new scratch set
2945  ALLOCATE (fft_scratch_new)
2946  ALLOCATE (fft_scratch_new%fft_scratch)
2947 
2948  IF (tf_type .NE. 400) THEN
2949  fft_scratch_new%fft_scratch%sizes = fft_sizes
2950  np = fft_sizes%numtask
2951  ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
2952  fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
2953  fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
2954  END IF
2955 
2956  SELECT CASE (tf_type)
2957  CASE DEFAULT
2958  cpabort("")
2959  CASE (100) ! fft3d_pb: full cube distribution
2960  mx1 = fft_sizes%mx1
2961  my1 = fft_sizes%my1
2962  mx2 = fft_sizes%mx2
2963  mz2 = fft_sizes%mz2
2964  my3 = fft_sizes%my3
2965  mz3 = fft_sizes%mz3
2966  CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
2967  CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
2968  CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
2969  CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
2970  CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
2971  CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
2972  fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
2973 
2974  dim = fft_sizes%rs_group%num_pe_cart
2975  pos = fft_sizes%rs_group%mepos_cart
2976  fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
2977  fft_scratch_new%fft_scratch%dim = dim
2978  fft_scratch_new%fft_scratch%pos = pos
2979  mcz1 = fft_sizes%mcz1
2980  mcx2 = fft_sizes%mcx2
2981  mcz2 = fft_sizes%mcz2
2982  mcy3 = fft_sizes%mcy3
2983  ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
2984  ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
2985  ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:dim(1) - 1))
2986  ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:dim(1) - 1))
2987 
2988  dims = (/.true., .false./)
2989  CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
2990  dims = (/.false., .true./)
2991  CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
2992 
2993  !initialise pgcube
2994  DO i = 0, dim(1) - 1
2995  coord = (/i, pos(2)/)
2996  CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
2997  END DO
2998  DO i = 0, dim(2) - 1
2999  coord = (/pos(1), i/)
3000  CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3001  END DO
3002 
3003  !set up fft plans
3004  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., n(3), mx1*my1, &
3005  fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf, fft_plan_style)
3006  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., n(2), mx2*mz2, &
3007  fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3008  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), my3*mz3, &
3009  fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3010  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), my3*mz3, &
3011  fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3012  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., n(2), mx2*mz2, &
3013  fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3014  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., n(3), mx1*my1, &
3015  fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3016 
3017  CASE (101) ! fft3d_pb: full cube distribution (dim 1)
3018  mx1 = fft_sizes%mx1
3019  my1 = fft_sizes%my1
3020  mz1 = fft_sizes%mz1
3021  my3 = fft_sizes%my3
3022  mz3 = fft_sizes%mz3
3023  CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
3024  CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
3025  fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3026  CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
3027  CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
3028  CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
3029  CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
3030 
3031  dim = fft_sizes%rs_group%num_pe_cart
3032  pos = fft_sizes%rs_group%mepos_cart
3033  fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3034  fft_scratch_new%fft_scratch%dim = dim
3035  fft_scratch_new%fft_scratch%pos = pos
3036  mcy3 = fft_sizes%mcy3
3037  ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:dim(1) - 1))
3038  ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:dim(1) - 1))
3039 
3040  !set up fft plans
3041  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., n(3), mx1*my1, &
3042  fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3043  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., n(2), mx1*mz1, &
3044  fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3045  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), my3*mz3, &
3046  fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3047  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), my3*mz3, &
3048  fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3049  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., n(2), mx1*mz1, &
3050  fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3051  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., n(3), mx1*my1, &
3052  fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3053 
3054  CASE (200) ! fft3d_ps: plane distribution
3055  nx = fft_sizes%nx
3056  ny = fft_sizes%ny
3057  nz = fft_sizes%nz
3058  mx2 = fft_sizes%mx2
3059  lmax = fft_sizes%lmax
3060  mmax = fft_sizes%mmax
3061  lg = fft_sizes%lg
3062  mg = fft_sizes%mg
3063  np = fft_sizes%numtask
3064  nmray = fft_sizes%nmray
3065  nyzray = fft_sizes%nyzray
3066 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3067  length = int(2*dp_size*max(mmax, 1)*max(lmax, 1), kind=c_size_t)
3068  ierr = offload_malloc_pinned_mem(cptr_r1buf, length)
3069  cpassert(ierr == 0)
3070  CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/max(mmax, 1), max(lmax, 1)/))
3071  length = int(2*dp_size*max(ny, 1)*max(nz, 1)*max(nx, 1), kind=c_size_t)
3072  ierr = offload_malloc_pinned_mem(cptr_tbuf, length)
3073  cpassert(ierr == 0)
3074  CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/max(ny, 1), max(nz, 1), max(nx, 1)/))
3075 #else
3076  CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
3077  CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
3078 #endif
3079  fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3080  CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
3081  nm = nmray*mx2
3082  IF (alltoall_sgl) THEN
3083  ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
3084  ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
3085  ELSE
3086  ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
3087  END IF
3088 
3089  !set up fft plans
3090  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., nz, nx*ny, &
3091  fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3092  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., ny, nx*nz, &
3093  fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3094  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), nyzray, &
3095  fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf, fft_plan_style)
3096  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), nyzray, &
3097  fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3098  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., ny, nx*nz, &
3099  fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3100  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., nz, nx*ny, &
3101  fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3102 
3103  CASE (300) ! fft3d_ps: block distribution
3104  mx1 = fft_sizes%mx1
3105  mx2 = fft_sizes%mx2
3106  my1 = fft_sizes%my1
3107  mz2 = fft_sizes%mz2
3108  mcx2 = fft_sizes%mcx2
3109  lg = fft_sizes%lg
3110  mg = fft_sizes%mg
3111  nmax = fft_sizes%nmax
3112  nmray = fft_sizes%nmray
3113  nyzray = fft_sizes%nyzray
3114  m1 = fft_sizes%r_dim(1)
3115  m2 = fft_sizes%r_dim(2)
3116  nbx = fft_sizes%nbx
3117  nbz = fft_sizes%nbz
3118  CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
3119  CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
3120 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3121  length = int(2*dp_size*max(n(3), 1)*max(mx1*my1, 1), kind=c_size_t)
3122  ierr = offload_malloc_pinned_mem(cptr_p2buf, length)
3123  cpassert(ierr == 0)
3124  CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/max(n(3), 1), max(mx1*my1, 1)/))
3125  length = int(2*dp_size*max(mx2*mz2, 1)*max(n(2), 1), kind=c_size_t)
3126  ierr = offload_malloc_pinned_mem(cptr_p3buf, length)
3127  cpassert(ierr == 0)
3128  CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/max(mx2*mz2, 1), max(n(2), 1)/))
3129  length = int(2*dp_size*max(n(2), 1)*max(mx2*mz2, 1), kind=c_size_t)
3130  ierr = offload_malloc_pinned_mem(cptr_p4buf, length)
3131  cpassert(ierr == 0)
3132  CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/max(n(2), 1), max(mx2*mz2, 1)/))
3133  length = int(2*dp_size*max(nyzray, 1)*max(n(1), 1), kind=c_size_t)
3134  ierr = offload_malloc_pinned_mem(cptr_p5buf, length)
3135  cpassert(ierr == 0)
3136  CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/max(nyzray, 1), max(n(1), 1)/))
3137  length = int(2*dp_size*max(mg, 1)*max(lg, 1), kind=c_size_t)
3138  ierr = offload_malloc_pinned_mem(cptr_p7buf, length)
3139  cpassert(ierr == 0)
3140  CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/max(mg, 1), max(lg, 1)/))
3141 #else
3142  CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
3143  CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
3144  CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
3145  CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
3146  CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
3147 #endif
3148  IF (alltoall_sgl) THEN
3149  ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
3150  ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
3151  ELSE
3152  ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
3153  ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
3154  END IF
3155  ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
3156  ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
3157  ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
3158  ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
3159  ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
3160  fft_scratch_new%fft_scratch%yzcount(0:np - 1))
3161  ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
3162  fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
3163  fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3164 
3165  dim = fft_sizes%rs_group%num_pe_cart
3166  pos = fft_sizes%rs_group%mepos_cart
3167  fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3168  fft_scratch_new%fft_scratch%dim = dim
3169  fft_scratch_new%fft_scratch%pos = pos
3170  mcz1 = fft_sizes%mcz1
3171  mcz2 = fft_sizes%mcz2
3172  ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
3173  ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
3174 
3175  dims = (/.false., .true./)
3176  CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
3177 
3178  !initialise pgcube
3179  DO i = 0, dim(2) - 1
3180  coord = (/pos(1), i/)
3181  CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3182  END DO
3183 
3184  !initialise pgrid
3185  DO ix = 0, m1 - 1
3186  DO iz = 0, m2 - 1
3187  coord = (/ix, iz/)
3188  CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
3189  END DO
3190  END DO
3191 
3192  !initialise pzcoord
3193  DO i = 0, np - 1
3194  CALL fft_sizes%rs_group%coords(i, pcoord)
3195  fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
3196  END DO
3197 
3198  !set up fft plans
3199  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, fwfft, .true., n(3), mx1*my1, &
3200  fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf, fft_plan_style)
3201  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, fwfft, .true., n(2), mx2*mz2, &
3202  fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf, fft_plan_style)
3203  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, fwfft, .true., n(1), nyzray, &
3204  fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf, fft_plan_style)
3205  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, bwfft, .true., n(1), nyzray, &
3206  fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf, fft_plan_style)
3207  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, bwfft, .true., n(2), mx2*mz2, &
3208  fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf, fft_plan_style)
3209  CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, bwfft, .true., n(3), mx1*my1, &
3210  fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf, fft_plan_style)
3211 
3212  CASE (400) ! serial FFT
3213  np = 0
3214  CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
3215  CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
3216 
3217  !in place plans
3218  CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, .true., fwfft, n, &
3219  fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3220  CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, .true., bwfft, n, &
3221  fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3222  ! out of place plans
3223  CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, .false., fwfft, n, &
3224  fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3225  CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, .false., bwfft, n, &
3226  fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3227 
3228  END SELECT
3229 
3230  NULLIFY (fft_scratch_new%fft_scratch_next)
3231  fft_scratch_new%fft_scratch%fft_scratch_id = &
3232  fft_scratch_last%fft_scratch%fft_scratch_id + 1
3233  fft_scratch_new%fft_scratch%in_use = .true.
3234  fft_scratch_new%fft_scratch%nfft = n
3235  fft_scratch_last%fft_scratch_next => fft_scratch_new
3236  fft_scratch_new%fft_scratch%tf_type = tf_type
3237  fft_scratch => fft_scratch_new%fft_scratch
3238  EXIT
3239 
3240  END IF
3241  END DO
3242 
3243  fft_scratch%last_tick = tick_fft_pool
3244 
3245  CALL timestop(handle)
3246 
3247  END SUBROUTINE get_fft_scratch
3248 
3249 ! **************************************************************************************************
3250 !> \brief ...
3251 !> \param fft_scratch ...
3252 ! **************************************************************************************************
3253  SUBROUTINE release_fft_scratch(fft_scratch)
3254 
3255  TYPE(fft_scratch_type), POINTER :: fft_scratch
3256 
3257  INTEGER :: scratch_id
3258  TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current
3259 
3260  scratch_id = fft_scratch%fft_scratch_id
3261 
3262  fft_scratch_current => fft_scratch_first
3263  DO
3264  IF (ASSOCIATED(fft_scratch_current)) THEN
3265  IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id) THEN
3266  fft_scratch%in_use = .false.
3267  NULLIFY (fft_scratch)
3268  EXIT
3269  END IF
3270  fft_scratch_current => fft_scratch_current%fft_scratch_next
3271  ELSE
3272  ! We cannot find the scratch type in this pool
3273  cpabort("")
3274  EXIT
3275  END IF
3276  END DO
3277 
3278  END SUBROUTINE release_fft_scratch
3279 
3280 ! **************************************************************************************************
3281 !> \brief ...
3282 !> \param rs ...
3283 !> \param scount ...
3284 !> \param sdispl ...
3285 !> \param rq ...
3286 !> \param rcount ...
3287 !> \param rdispl ...
3288 !> \param group ...
3289 ! **************************************************************************************************
3290  SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
3291  COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rs
3292  INTEGER, DIMENSION(:), POINTER :: scount, sdispl
3293  COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rq
3294  INTEGER, DIMENSION(:), POINTER :: rcount, rdispl
3295 
3296  CLASS(mp_comm_type), INTENT(IN) :: group
3297 
3298  COMPLEX(KIND=dp), DIMENSION(:), POINTER :: msgin, msgout
3299  INTEGER :: ip, n, nr, ns, pos
3300  TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: rreq, sreq
3301 
3302  CALL group%sync()
3303  n = group%num_pe
3304  pos = group%mepos
3305  ALLOCATE (sreq(0:n - 1))
3306  ALLOCATE (rreq(0:n - 1))
3307  nr = 0
3308  DO ip = 0, n - 1
3309  IF (rcount(ip) == 0) cycle
3310  IF (ip == pos) cycle
3311  msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
3312  CALL group%irecv(msgout, ip, rreq(nr))
3313  nr = nr + 1
3314  END DO
3315  ns = 0
3316  DO ip = 0, n - 1
3317  IF (scount(ip) == 0) cycle
3318  IF (ip == pos) cycle
3319  msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
3320  CALL group%isend(msgin, ip, sreq(ns))
3321  ns = ns + 1
3322  END DO
3323  IF (rcount(pos) /= 0) THEN
3324  IF (rcount(pos) /= scount(pos)) cpabort("")
3325  rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
3326  END IF
3327  CALL mp_waitall(sreq(0:ns - 1))
3328  CALL mp_waitall(rreq(0:nr - 1))
3329  DEALLOCATE (sreq)
3330  DEALLOCATE (rreq)
3331  CALL group%sync()
3332 
3333  END SUBROUTINE sparse_alltoall
3334 
3335 ! **************************************************************************************************
3336 !> \brief test data structures for equality. It is assumed that if they are
3337 !> different for one mpi task they are different for all (??)
3338 !> \param fft_size_1 ...
3339 !> \param fft_size_2 ...
3340 !> \param equal ...
3341 ! **************************************************************************************************
3342  SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
3343  TYPE(fft_scratch_sizes) :: fft_size_1, fft_size_2
3344  LOGICAL :: equal
3345 
3346  equal = .true.
3347 
3348  equal = equal .AND. fft_size_1%nx == fft_size_2%nx
3349  equal = equal .AND. fft_size_1%ny == fft_size_2%ny
3350  equal = equal .AND. fft_size_1%nz == fft_size_2%nz
3351 
3352  equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
3353  equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
3354  equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
3355 
3356  equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
3357  equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
3358  equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
3359 
3360  equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
3361  equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
3362  equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
3363 
3364  equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
3365  equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
3366  equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
3367  equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
3368 
3369  equal = equal .AND. fft_size_1%lg == fft_size_2%lg
3370  equal = equal .AND. fft_size_1%mg == fft_size_2%mg
3371 
3372  equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
3373  equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
3374 
3375  equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
3376  equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
3377 
3378  equal = equal .AND. fft_size_1%gs_group == fft_size_2%gs_group
3379  equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
3380 
3381  equal = equal .AND. all(fft_size_1%g_pos == fft_size_2%g_pos)
3382  equal = equal .AND. all(fft_size_1%r_pos == fft_size_2%r_pos)
3383  equal = equal .AND. all(fft_size_1%r_dim == fft_size_2%r_dim)
3384 
3385  equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
3386 
3387  END SUBROUTINE is_equal
3388 
3389 END MODULE fft_tools
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
Definition: fft_lib.F:7
subroutine, public fft_do_init(fft_type, wisdom_file)
...
Definition: fft_lib.F:63
subroutine, public fft_3d(plan, scale, zin, zout, stat)
...
Definition: fft_lib.F:172
subroutine, public fft_create_plan_1dm(plan, fft_type, fsign, trans, n, m, zin, zout, plan_style)
...
Definition: fft_lib.F:212
subroutine, public fft_destroy_plan(plan)
...
Definition: fft_lib.F:241
subroutine, public fft_get_lengths(fft_type, DATA, max_length)
...
Definition: fft_lib.F:106
integer function, public fft_library(fftlib)
Interface to FFT libraries.
Definition: fft_lib.F:42
subroutine, public fft_1dm(plan, zin, zout, scale, stat)
...
Definition: fft_lib.F:261
subroutine, public fft_create_plan_3d(plan, fft_type, fft_in_place, fsign, n, zin, zout, plan_style)
...
Definition: fft_lib.F:136
subroutine, public fft_do_cleanup(fft_type, wisdom_file, ionode)
...
Definition: fft_lib.F:84
Type to store data about a (1D or 3D) FFT, including FFTW plan.
Definition: fft_plan.F:18
subroutine, public fft_radix_operations(radix_in, radix_out, operation)
Determine the allowed lengths of FFT's '''.
Definition: fft_tools.F:239
integer, save init_fft_pool
Definition: fft_tools.F:129
subroutine, public yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
...
Definition: fft_tools.F:1497
integer, parameter, public bwfft
Definition: fft_tools.F:145
subroutine, public yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
...
Definition: fft_tools.F:1613
integer, save, public fft_plan_style
Definition: fft_tools.F:156
subroutine, public cube_transpose_1(cin, boin, boout, sout, fft_scratch)
...
Definition: fft_tools.F:2017
integer, parameter, public fft_radix_closest
Definition: fft_tools.F:146
subroutine, public fft_fw1d(n, m, trans, zin, zout, scale, stat)
Performs m 1-D forward FFT-s of size n.
Definition: fft_tools.F:331
subroutine, public release_fft_scratch(fft_scratch)
...
Definition: fft_tools.F:3254
subroutine, public x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
...
Definition: fft_tools.F:1382
integer, save tick_fft_pool
Definition: fft_tools.F:131
subroutine, public init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, plan_style)
...
Definition: fft_tools.F:185
subroutine, public get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
...
Definition: fft_tools.F:2876
integer, parameter, public fwfft
Definition: fft_tools.F:145
integer, save, public fft_type
Definition: fft_tools.F:153
integer, parameter, public fft_radix_next_odd
Definition: fft_tools.F:148
subroutine, public xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
...
Definition: fft_tools.F:1823
subroutine, public cube_transpose_2(cin, boin, boout, sout, fft_scratch)
...
Definition: fft_tools.F:2107
type(fft_scratch_pool_type), pointer, save fft_scratch_first
Definition: fft_tools.F:134
integer, save fft_pool_scratch_limit
Definition: fft_tools.F:133
integer, parameter, public fft_radix_next
Definition: fft_tools.F:146
subroutine, public finalize_fft(para_env, wisdom_file)
does whatever is needed to finalize the current fft setup
Definition: fft_tools.F:216
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp_size
Definition: kinds.F:36
integer, parameter, public dp
Definition: kinds.F:34
integer, parameter, public sp
Definition: kinds.F:33
Definition of mathematical constants and functions.
Definition: mathconstants.F:16
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
type(mp_comm_type), parameter, public mp_comm_null
Fortran API for the offload package, which is written in C.
Definition: offload_api.F:12
integer function, public offload_free_pinned_mem(buffer)
free pinned memory
Definition: offload_api.F:75
integer function, public offload_malloc_pinned_mem(buffer, length)
allocate pinned memory.
Definition: offload_api.F:52