(git:ccc2433)
fftw3_lib.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 MODULE fftw3_lib
8 
9  USE iso_c_binding, ONLY: c_associated, &
10  c_char, &
11  c_double, &
12  c_double_complex, &
13  c_int, &
14  c_ptr
15 #if defined(__FFTW3)
16  USE iso_c_binding, ONLY: &
17  c_float, &
18  c_float_complex, &
19  c_funptr, &
20  c_int32_t, &
21  c_intptr_t, &
22  c_loc, &
23  c_null_char, &
24  c_size_t, c_f_pointer
25 #endif
26  USE cp_files, ONLY: get_unit_number
27  USE fft_kinds, ONLY: dp
28  USE fft_plan, ONLY: fft_plan_type
29 
30 !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
31 
32 #include "../../base/base_uses.f90"
33 
34  IMPLICIT NONE
35  PRIVATE
36 
39  PUBLIC :: fftw_alloc, fftw_dealloc
40 
41 #if defined ( __FFTW3 )
42 #if defined ( __FFTW3_MKL )
43 #include "fftw/fftw3.f03"
44 #else
45 #include "fftw3.f03"
46 #endif
47 #endif
48 
49  INTERFACE fftw_alloc
50  MODULE PROCEDURE :: fftw_alloc_complex_1d
51  MODULE PROCEDURE :: fftw_alloc_complex_2d
52  MODULE PROCEDURE :: fftw_alloc_complex_3d
53  END INTERFACE
54 
55  INTERFACE fftw_dealloc
56  MODULE PROCEDURE :: fftw_dealloc_complex_1d
57  MODULE PROCEDURE :: fftw_dealloc_complex_2d
58  MODULE PROCEDURE :: fftw_dealloc_complex_3d
59  END INTERFACE
60 
61 CONTAINS
62 
63 ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
64  SUBROUTINE fftw_alloc_complex_1d(array, n)
65  COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:), CONTIGUOUS, POINTER, INTENT(OUT) :: array
66  INTEGER, DIMENSION(1), INTENT(IN) :: n
67 
68 #if defined(__FFTW3)
69  TYPE(C_PTR) :: data_ptr
70  data_ptr = fftw_alloc_complex(int(product(n), kind=c_size_t))
71  CALL c_f_pointer(data_ptr, array, n)
72 #else
73 ! Just allocate the array
74  ALLOCATE (array(n(1)))
75 #endif
76 
77  END SUBROUTINE fftw_alloc_complex_1d
78 
79  SUBROUTINE fftw_dealloc_complex_1d(array)
80  COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
81 
82 #if defined(__FFTW3)
83  CALL fftw_free(c_loc(array))
84  NULLIFY (array)
85 #else
86 ! Just deallocate the array
87  DEALLOCATE (array)
88 #endif
89 
90  END SUBROUTINE fftw_dealloc_complex_1d
91 ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
92  SUBROUTINE fftw_alloc_complex_2d(array, n)
93  COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:, :), CONTIGUOUS, POINTER, INTENT(OUT) :: array
94  INTEGER, DIMENSION(2), INTENT(IN) :: n
95 
96 #if defined(__FFTW3)
97  TYPE(C_PTR) :: data_ptr
98  data_ptr = fftw_alloc_complex(int(product(n), kind=c_size_t))
99  CALL c_f_pointer(data_ptr, array, n)
100 #else
101 ! Just allocate the array
102  ALLOCATE (array(n(1), n(2)))
103 #endif
104 
105  END SUBROUTINE fftw_alloc_complex_2d
106 
107  SUBROUTINE fftw_dealloc_complex_2d(array)
108  COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:, :), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
109 
110 #if defined(__FFTW3)
111  CALL fftw_free(c_loc(array))
112  NULLIFY (array)
113 #else
114 ! Just deallocate the array
115  DEALLOCATE (array)
116 #endif
117 
118  END SUBROUTINE fftw_dealloc_complex_2d
119 ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
120  SUBROUTINE fftw_alloc_complex_3d(array, n)
121  COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:, :, :), CONTIGUOUS, POINTER, INTENT(OUT) :: array
122  INTEGER, DIMENSION(3), INTENT(IN) :: n
123 
124 #if defined(__FFTW3)
125  TYPE(C_PTR) :: data_ptr
126  data_ptr = fftw_alloc_complex(int(product(n), kind=c_size_t))
127  CALL c_f_pointer(data_ptr, array, n)
128 #else
129 ! Just allocate the array
130  ALLOCATE (array(n(1), n(2), n(3)))
131 #endif
132 
133  END SUBROUTINE fftw_alloc_complex_3d
134 
135  SUBROUTINE fftw_dealloc_complex_3d(array)
136  COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:, :, :), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
137 
138 #if defined(__FFTW3)
139  CALL fftw_free(c_loc(array))
140  NULLIFY (array)
141 #else
142 ! Just deallocate the array
143  DEALLOCATE (array)
144 #endif
145 
146  END SUBROUTINE fftw_dealloc_complex_3d
147 
148 #if defined ( __FFTW3 )
149 ! **************************************************************************************************
150 !> \brief A workaround that allows us to compile with -Werror=unused-parameter
151 ! **************************************************************************************************
152  SUBROUTINE dummy_routine_to_call_mark_used()
153  mark_used(fftw_r2hc)
154  mark_used(fftw_hc2r)
155  mark_used(fftw_dht)
156  mark_used(fftw_redft00)
157  mark_used(fftw_redft01)
158  mark_used(fftw_redft10)
159  mark_used(fftw_redft11)
160  mark_used(fftw_rodft00)
161  mark_used(fftw_rodft01)
162  mark_used(fftw_rodft10)
163  mark_used(fftw_rodft11)
164  mark_used(fftw_forward)
165  mark_used(fftw_backward)
166  mark_used(fftw_measure)
167  mark_used(fftw_destroy_input)
168  mark_used(fftw_unaligned)
169  mark_used(fftw_conserve_memory)
170  mark_used(fftw_exhaustive)
171  mark_used(fftw_preserve_input)
172  mark_used(fftw_patient)
173  mark_used(fftw_estimate)
174  mark_used(fftw_wisdom_only)
175  mark_used(fftw_estimate_patient)
176  mark_used(fftw_believe_pcost)
177  mark_used(fftw_no_dft_r2hc)
178  mark_used(fftw_no_nonthreaded)
179  mark_used(fftw_no_buffering)
180  mark_used(fftw_no_indirect_op)
181  mark_used(fftw_allow_large_generic)
182  mark_used(fftw_no_rank_splits)
183  mark_used(fftw_no_vrank_splits)
184  mark_used(fftw_no_vrecurse)
185  mark_used(fftw_no_simd)
186  mark_used(fftw_no_slow)
187  mark_used(fftw_no_fixed_radix_large_n)
188  mark_used(fftw_allow_pruning)
189  END SUBROUTINE
190 #endif
191 
192 ! **************************************************************************************************
193 !> \brief ...
194 !> \param wisdom_file ...
195 !> \param ionode ...
196 ! **************************************************************************************************
197  SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
198 
199  CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
200  LOGICAL :: ionode
201 
202 #if defined ( __FFTW3 )
203  INTEGER :: iunit, istat
204  INTEGER(KIND=C_INT) :: isuccess
205  ! Write out FFTW3 wisdom to file (if we can)
206  ! only the ionode updates the wisdom
207  IF (ionode) THEN
208  iunit = get_unit_number()
209  OPEN (unit=iunit, file=wisdom_file, status="UNKNOWN", form="FORMATTED", action="WRITE", iostat=istat)
210  IF (istat == 0) THEN
211  isuccess = fftw_export_wisdom_to_filename(wisdom_file//c_null_char)
212  IF (isuccess == 0) &
213  cpabort("Error exporting wisdom to file "//wisdom_file)
214  CLOSE (iunit)
215  END IF
216  END IF
217 
218  CALL fftw_cleanup()
219 #else
220  mark_used(wisdom_file)
221  mark_used(ionode)
222 #endif
223 
224  END SUBROUTINE
225 
226 ! **************************************************************************************************
227 !> \brief ...
228 !> \param wisdom_file ...
229 ! **************************************************************************************************
230  SUBROUTINE fftw3_do_init(wisdom_file)
231 
232  CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
233 
234 #if defined ( __FFTW3 )
235  INTEGER :: istat, iunit
236  INTEGER(KIND=C_INT) :: isuccess
237  LOGICAL :: exist
238 
239 #if defined(__MKL)
240 !$ LOGICAL :: mkl_is_safe
241 #endif
242 
243 ! If using the Intel compiler then we need to declare
244 ! a C interface to a global variable in MKL that sets
245 ! the number of threads which can concurrently execute
246 ! an FFT
247 ! We need __INTEL_COMPILER so we can be sure that the compiler
248 ! understands the !DEC$ version definitions
249 #if defined (__INTEL_COMPILER) && defined (__MKL)
250 !$ include "mkl.fi"
251 !DEC$ IF DEFINED (INTEL_MKL_VERSION)
252 !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
253 !DIR$ ATTRIBUTES ALIGN : 8 :: fftw3_mkl
254 !$ COMMON/fftw3_mkl/ignore(4), mkl_dft_number_of_user_threads, ignore2(7)
255 !$ INTEGER*4 :: ignore, mkl_dft_number_of_user_threads, ignore2
256 !$ BIND(c) :: /fftw3_mkl/
257 !DEC$ ENDIF
258 !DEC$ ENDIF
259 #elif defined (__MKL)
260 ! Preprocessing is enabled by default, and below header is not language specific
261 #include <mkl_version.h>
262 #endif
263 
264  ! Read FFTW wisdom (if available)
265  ! all nodes are opening the file here...
266  INQUIRE (file=wisdom_file, exist=exist)
267  IF (exist) THEN
268  iunit = get_unit_number()
269  OPEN (unit=iunit, file=wisdom_file, status="OLD", form="FORMATTED", position="REWIND", &
270  action="READ", iostat=istat)
271  IF (istat == 0) THEN
272  isuccess = fftw_import_wisdom_from_filename(wisdom_file//c_null_char)
273  IF (isuccess == 0) &
274  cpabort("Error importing wisdom from file "//wisdom_file)
275  ! write(*,*) "FFTW3 import wisdom from file ....",MERGE((/"OK "/),(/"NOT OK"/),(/isuccess==1/))
276  CLOSE (iunit)
277  END IF
278  END IF
279 
280 #if defined (__MKL)
281  ! Now check if we have a real FFTW3 library, or are using MKL wrappers
282 
283 !$ IF (fftw3_is_mkl_wrapper() .and. omp_get_max_threads() .gt. 1) THEN
284 ! If we are not using the Intel compiler, there is no way to tell which
285 ! MKL version is in use, so fail safe...
286 !$ mkl_is_safe = .FALSE.
287 #if defined(INTEL_MKL_VERSION) && (110100 < INTEL_MKL_VERSION)
288 !$ mkl_is_safe = .TRUE.
289 #elif defined (__INTEL_COMPILER)
290 ! If we have an Intel compiler (__INTEL_COMPILER is defined) then check the
291 ! MKL version and make the appropriate action
292 !DEC$ IF DEFINED (INTEL_MKL_VERSION)
293 !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
294 !$ mkl_dft_number_of_user_threads = omp_get_max_threads()
295 !DEC$ ENDIF
296 !DEC$ IF INTEL_MKL_VERSION .GE. 110100
297 !$ mkl_is_safe = .TRUE.
298 !DEC$ ENDIF
299 !DEC$ ENDIF
300 #endif
301 !$ IF (.NOT. mkl_is_safe) THEN
302 !$ STOP "Intel's FFTW3 interface to MKL is not "// &
303 !$ "thread-safe prior to MKL 11.1.0! Please "// &
304 !$ "rebuild CP2K, linking against FFTW 3 from "// &
305 !$ "www.fftw.org or a newer version of MKL. "// &
306 !$ "Now exiting..."
307 !$ END IF
308 !$ END IF
309 #endif
310 #else
311  mark_used(wisdom_file)
312 #endif
313 
314  END SUBROUTINE
315 
316 ! **************************************************************************************************
317 !> \brief ...
318 !> \param DATA ...
319 !> \param max_length ...
320 !> \par History
321 !> JGH 23-Jan-2006 : initial version
322 !> Adapted for new interface
323 !> IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
324 !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
325 !> IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
326 !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
327 !> IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
328 !> \author JGH
329 ! **************************************************************************************************
330  SUBROUTINE fftw3_get_lengths(DATA, max_length)
331 
332  INTEGER, DIMENSION(*) :: data
333  INTEGER, INTENT(INOUT) :: max_length
334 
335  INTEGER :: h, i, j, k, m, maxn, maxn_elevens, &
336  maxn_fives, maxn_sevens, &
337  maxn_thirteens, maxn_threes, &
338  maxn_twos, ndata, nmax, number
339  INTEGER, ALLOCATABLE, DIMENSION(:) :: dlocal, idx
340 
341 !------------------------------------------------------------------------------
342 ! compute ndata
343 !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
344 !! powers of small prime numbers though...
345 
346  maxn_twos = 15
347  maxn_threes = 3
348  maxn_fives = 2
349  maxn_sevens = 1
350  maxn_elevens = 1
351  maxn_thirteens = 0
352  maxn = 37748736
353 
354  ndata = 0
355  DO h = 0, maxn_twos
356  nmax = huge(0)/2**h
357  DO i = 0, maxn_threes
358  DO j = 0, maxn_fives
359  DO k = 0, maxn_sevens
360  DO m = 0, maxn_elevens
361  number = (3**i)*(5**j)*(7**k)*(11**m)
362 
363  IF (number > nmax) cycle
364 
365  number = number*2**h
366  IF (number >= maxn) cycle
367 
368  ndata = ndata + 1
369  END DO
370  END DO
371  END DO
372  END DO
373  END DO
374 
375  ALLOCATE (dlocal(ndata), idx(ndata))
376 
377  ndata = 0
378  dlocal(:) = 0
379  DO h = 0, maxn_twos
380  nmax = huge(0)/2**h
381  DO i = 0, maxn_threes
382  DO j = 0, maxn_fives
383  DO k = 0, maxn_sevens
384  DO m = 0, maxn_elevens
385  number = (3**i)*(5**j)*(7**k)*(11**m)
386 
387  IF (number > nmax) cycle
388 
389  number = number*2**h
390  IF (number >= maxn) cycle
391 
392  ndata = ndata + 1
393  dlocal(ndata) = number
394  END DO
395  END DO
396  END DO
397  END DO
398  END DO
399 
400  CALL sortint(dlocal, ndata, idx)
401  ndata = min(ndata, max_length)
402  DATA(1:ndata) = dlocal(1:ndata)
403  max_length = ndata
404 
405  DEALLOCATE (dlocal, idx)
406 
407  END SUBROUTINE fftw3_get_lengths
408 
409 ! **************************************************************************************************
410 !> \brief ...
411 !> \param iarr ...
412 !> \param n ...
413 !> \param index ...
414 ! **************************************************************************************************
415  SUBROUTINE sortint(iarr, n, index)
416 
417  INTEGER, INTENT(IN) :: n
418  INTEGER, INTENT(INOUT) :: iarr(1:n)
419  INTEGER, INTENT(OUT) :: index(1:n)
420 
421  INTEGER, PARAMETER :: m = 7, nstack = 50
422 
423  INTEGER :: a, i, ib, ir, istack(1:nstack), itemp, &
424  j, jstack, k, l, temp
425 
426 !------------------------------------------------------------------------------
427 
428  DO i = 1, n
429  index(i) = i
430  END DO
431  jstack = 0
432  l = 1
433  ir = n
434  DO WHILE (.true.)
435  IF (ir - l < m) THEN
436  DO j = l + 1, ir
437  a = iarr(j)
438  ib = index(j)
439  DO i = j - 1, 0, -1
440  IF (i == 0) EXIT
441  IF (iarr(i) <= a) EXIT
442  iarr(i + 1) = iarr(i)
443  index(i + 1) = index(i)
444  END DO
445  iarr(i + 1) = a
446  index(i + 1) = ib
447  END DO
448  IF (jstack == 0) RETURN
449  ir = istack(jstack)
450  l = istack(jstack - 1)
451  jstack = jstack - 2
452  ELSE
453  k = (l + ir)/2
454  temp = iarr(k)
455  iarr(k) = iarr(l + 1)
456  iarr(l + 1) = temp
457  itemp = index(k)
458  index(k) = index(l + 1)
459  index(l + 1) = itemp
460  IF (iarr(l + 1) > iarr(ir)) THEN
461  temp = iarr(l + 1)
462  iarr(l + 1) = iarr(ir)
463  iarr(ir) = temp
464  itemp = index(l + 1)
465  index(l + 1) = index(ir)
466  index(ir) = itemp
467  END IF
468  IF (iarr(l) > iarr(ir)) THEN
469  temp = iarr(l)
470  iarr(l) = iarr(ir)
471  iarr(ir) = temp
472  itemp = index(l)
473  index(l) = index(ir)
474  index(ir) = itemp
475  END IF
476  IF (iarr(l + 1) > iarr(l)) THEN
477  temp = iarr(l + 1)
478  iarr(l + 1) = iarr(l)
479  iarr(l) = temp
480  itemp = index(l + 1)
481  index(l + 1) = index(l)
482  index(l) = itemp
483  END IF
484  i = l + 1
485  j = ir
486  a = iarr(l)
487  ib = index(l)
488  DO WHILE (.true.)
489  i = i + 1
490  DO WHILE (iarr(i) < a)
491  i = i + 1
492  END DO
493  j = j - 1
494  DO WHILE (iarr(j) > a)
495  j = j - 1
496  END DO
497  IF (j < i) EXIT
498  temp = iarr(i)
499  iarr(i) = iarr(j)
500  iarr(j) = temp
501  itemp = index(i)
502  index(i) = index(j)
503  index(j) = itemp
504  END DO
505  iarr(l) = iarr(j)
506  iarr(j) = a
507  index(l) = index(j)
508  index(j) = ib
509  jstack = jstack + 2
510  IF (jstack > nstack) cpabort(" Nstack too small in sortr")
511  IF (ir - i + 1 >= j - l) THEN
512  istack(jstack) = ir
513  istack(jstack - 1) = i
514  ir = j - 1
515  ELSE
516  istack(jstack) = j - 1
517  istack(jstack - 1) = l
518  l = i
519  END IF
520  END IF
521 
522  END DO
523 
524  END SUBROUTINE sortint
525 
526 ! **************************************************************************************************
527 
528 ! **************************************************************************************************
529 !> \brief ...
530 !> \param plan ...
531 !> \param fft_rank ...
532 !> \param dim_n ...
533 !> \param dim_istride ...
534 !> \param dim_ostride ...
535 !> \param hm_rank ...
536 !> \param hm_n ...
537 !> \param hm_istride ...
538 !> \param hm_ostride ...
539 !> \param zin ...
540 !> \param zout ...
541 !> \param fft_direction ...
542 !> \param fftw_plan_type ...
543 !> \param valid ...
544 ! **************************************************************************************************
545  SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
546  dim_istride, dim_ostride, hm_rank, &
547  hm_n, hm_istride, hm_ostride, &
548  zin, zout, fft_direction, fftw_plan_type, &
549  valid)
550 
551  IMPLICIT NONE
552 
553  TYPE(c_ptr), INTENT(INOUT) :: plan
554  COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin, zout
555  INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
556  hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
557  fft_direction, fftw_plan_type, hm_rank
558  LOGICAL, INTENT(OUT) :: valid
559 
560 #if defined (__FFTW3)
561  TYPE(fftw_iodim) :: dim(2), hm(2)
562  INTEGER :: i
563 
564  DO i = 1, 2
565  dim(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
566  hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
567  END DO
568 
569  plan = fftw_plan_guru_dft(fft_rank, &
570  dim, hm_rank, hm, &
571  zin, zout, &
572  fft_direction, fftw_plan_type)
573 
574  valid = c_associated(plan)
575 
576 #else
577  mark_used(plan)
578  mark_used(fft_rank)
579  mark_used(dim_n)
580  mark_used(dim_istride)
581  mark_used(dim_ostride)
582  mark_used(hm_rank)
583  mark_used(hm_n)
584  mark_used(hm_istride)
585  mark_used(hm_ostride)
586  mark_used(fft_direction)
587  mark_used(fftw_plan_type)
588  !MARK_USED does not work with assumed size arguments
589  IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
590  valid = .false.
591 
592 #endif
593 
594  END SUBROUTINE
595 
596 ! **************************************************************************************************
597 
598 ! **************************************************************************************************
599 !> \brief ...
600 !> \return ...
601 ! **************************************************************************************************
602  FUNCTION fftw3_is_mkl_wrapper() RESULT(is_mkl)
603 
604  IMPLICIT NONE
605 
606  LOGICAL :: is_mkl
607 #if defined ( __FFTW3 )
608  LOGICAL :: guru_supported
609  INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
610  howmany_n(2), howmany_istride(2), howmany_ostride(2)
611  TYPE(c_ptr) :: test_plan
612  COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
613 
614  ! Attempt to create a plan with the guru interface for a 2d sub-space
615  ! If this fails (e.g. for MKL's FFTW3 interface), fall back to the
616  ! FFTW3 threaded 3D transform instead of the hand-optimised version
617  dim_n(1) = 1
618  dim_istride(1) = 1
619  dim_ostride(1) = 1
620  howmany_n(1) = 1
621  howmany_n(2) = 1
622  howmany_istride(1) = 1
623  howmany_istride(2) = 1
624  howmany_ostride(1) = 1
625  howmany_ostride(2) = 1
626  zin = cmplx(0.0_dp, 0.0_dp, kind=dp)
627  CALL fftw3_create_guru_plan(test_plan, 1, &
628  dim_n, dim_istride, dim_ostride, &
629  2, howmany_n, howmany_istride, howmany_ostride, &
630  zin, zin, &
631  fftw_forward, fftw_estimate, guru_supported)
632  IF (guru_supported) THEN
633  CALL fftw_destroy_plan(test_plan)
634  is_mkl = .false.
635  ELSE
636  is_mkl = .true.
637  END IF
638 
639 #else
640  is_mkl = .false.
641 #endif
642 
643  END FUNCTION
644 
645 ! **************************************************************************************************
646 
647 ! **************************************************************************************************
648 !> \brief ...
649 !> \param nrows ...
650 !> \param nt ...
651 !> \param rows_per_thread ...
652 !> \param rows_per_thread_r ...
653 !> \param th_planA ...
654 !> \param th_planB ...
655 ! **************************************************************************************************
656  SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
657  th_planA, th_planB)
658 
659  INTEGER, INTENT(IN) :: nrows, nt
660  INTEGER, INTENT(OUT) :: rows_per_thread, rows_per_thread_r, &
661  th_plana, th_planb
662 
663  IF (mod(nrows, nt) .EQ. 0) THEN
664  rows_per_thread = nrows/nt
665  rows_per_thread_r = 0
666  th_plana = nt
667  th_planb = 0
668  ELSE
669  rows_per_thread = nrows/nt + 1
670  rows_per_thread_r = nrows/nt
671  th_plana = mod(nrows, nt)
672  th_planb = nt - th_plana
673  END IF
674 
675  END SUBROUTINE
676 
677 ! **************************************************************************************************
678 
679 ! **************************************************************************************************
680 !> \brief ...
681 !> \param plan ...
682 !> \param plan_r ...
683 !> \param dim_n ...
684 !> \param dim_istride ...
685 !> \param dim_ostride ...
686 !> \param hm_n ...
687 !> \param hm_istride ...
688 !> \param hm_ostride ...
689 !> \param input ...
690 !> \param output ...
691 !> \param fft_direction ...
692 !> \param fftw_plan_type ...
693 !> \param rows_per_th ...
694 !> \param rows_per_th_r ...
695 ! **************************************************************************************************
696  SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
697  hm_n, hm_istride, hm_ostride, &
698  input, output, &
699  fft_direction, fftw_plan_type, rows_per_th, &
700  rows_per_th_r)
701 
702  TYPE(c_ptr), INTENT(INOUT) :: plan, plan_r
703  INTEGER, INTENT(INOUT) :: dim_n(2), dim_istride(2), &
704  dim_ostride(2), hm_n(2), &
705  hm_istride(2), hm_ostride(2)
706  COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
707  INTEGER, INTENT(INOUT) :: fft_direction, fftw_plan_type
708  INTEGER, INTENT(IN) :: rows_per_th, rows_per_th_r
709 
710  LOGICAL :: valid
711 
712 ! First plans will have an additional row
713 
714  hm_n(2) = rows_per_th
715  CALL fftw3_create_guru_plan(plan, 1, &
716  dim_n, dim_istride, dim_ostride, &
717  2, hm_n, hm_istride, hm_ostride, &
718  input, output, &
719  fft_direction, fftw_plan_type, valid)
720 
721  IF (.NOT. valid) THEN
722  cpabort("fftw3_create_plan")
723  END IF
724 
725  !!!! Remainder
726  hm_n(2) = rows_per_th_r
727  CALL fftw3_create_guru_plan(plan_r, 1, &
728  dim_n, dim_istride, dim_ostride, &
729  2, hm_n, hm_istride, hm_ostride, &
730  input, output, &
731  fft_direction, fftw_plan_type, valid)
732  IF (.NOT. valid) THEN
733  cpabort("fftw3_create_plan (remaining)")
734  END IF
735 
736  END SUBROUTINE
737 
738 ! **************************************************************************************************
739 
740 ! **************************************************************************************************
741 !> \brief ...
742 !> \param plan ...
743 !> \param zin ...
744 !> \param zout ...
745 !> \param plan_style ...
746 ! **************************************************************************************************
747  SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
748 
749  TYPE(fft_plan_type), INTENT(INOUT) :: plan
750  COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin
751  COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zout
752  INTEGER :: plan_style
753 #if defined ( __FFTW3 )
754  INTEGER :: n1, n2, n3
755  INTEGER :: nt
756  INTEGER :: rows_per_th
757  INTEGER :: rows_per_th_r
758  INTEGER :: fft_direction
759  INTEGER :: th_plana, th_planb
760  COMPLEX(KIND=dp), ALLOCATABLE :: tmp(:)
761 
762  ! GURU Interface
763  INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
764  howmany_n(2), howmany_istride(2), howmany_ostride(2)
765 
766  INTEGER :: fftw_plan_type
767  SELECT CASE (plan_style)
768  CASE (1)
769  fftw_plan_type = fftw_estimate
770  CASE (2)
771  fftw_plan_type = fftw_measure
772  CASE (3)
773  fftw_plan_type = fftw_patient
774  CASE (4)
775  fftw_plan_type = fftw_exhaustive
776  CASE DEFAULT
777  cpabort("fftw3_create_plan_3d")
778  END SELECT
779 
780 #if defined (__FFTW3_UNALIGNED)
781  fftw_plan_type = fftw_plan_type + fftw_unaligned
782 #endif
783 
784  IF (plan%fsign == +1) THEN
785  fft_direction = fftw_forward
786  ELSE
787  fft_direction = fftw_backward
788  END IF
789 
790  n1 = plan%n_3d(1)
791  n2 = plan%n_3d(2)
792  n3 = plan%n_3d(3)
793 
794  nt = 1
795 !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
796 !$OMP MASTER
797 !$ nt = omp_get_num_threads()
798 !$OMP END MASTER
799 !$OMP END PARALLEL
800 
801  IF ((fftw3_is_mkl_wrapper()) .OR. &
802  (.NOT. plan_style == 1) .OR. &
803  (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
804  ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
805  ! the grid size is small (and we are single-threaded) then
806  ! FFTW3 does a better job than handmade optimization
807  ! so plan a single 3D FFT which will execute using all the threads
808 
809  plan%separated_plans = .false.
810 !$ CALL fftw_plan_with_nthreads(nt)
811 
812  IF (plan%fft_in_place) THEN
813  plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
814  ELSE
815  plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
816  END IF
817  ELSE
818  ALLOCATE (tmp(n1*n2*n3))
819  ! ************************* PLANS WITH TRANSPOSITIONS ****************************
820  ! In the cases described above, we manually thread each stage of the 3D FFT.
821  !
822  ! The following plans replace the 3D FFT call by running 1D FFTW across all
823  ! 3 directions of the array.
824  !
825  ! Output of FFTW is transposed to ensure that the next round of FFTW access
826  ! contiguous information.
827  !
828  ! Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
829  ! M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
830  ! Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
831  ! will perform the final transposition. Performance evaluation showed that using an external
832  ! DO loop to do the final transposition performed better than directly transposing the output.
833  ! However, this might vary depending on the compiler/platform, so a potential tuning spot
834  ! is to perform the final transposition within the fftw library rather than using the external loop
835  ! See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
836  !
837  ! Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
838  !
839  ! OpenMP : Work is distributed on the Z plane.
840  ! All transpositions are out-of-place to facilitate multi-threading
841  !
842  !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
843  CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
844  th_plana, th_planb)
845 
846  dim_n(1) = n1
847  dim_istride(1) = 1
848  dim_ostride(1) = n2
849  howmany_n(1) = n2
850  howmany_n(2) = rows_per_th
851  howmany_istride(1) = n1
852  howmany_istride(2) = n1*n2
853  howmany_ostride(1) = 1
854  howmany_ostride(2) = n1*n2
855  CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
856  dim_n, dim_istride, dim_ostride, howmany_n, &
857  howmany_istride, howmany_ostride, &
858  zin, tmp, &
859  fft_direction, fftw_plan_type, rows_per_th, &
860  rows_per_th_r)
861 
862  !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
863  CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
864  th_plana, th_planb)
865  dim_n(1) = n2
866  dim_istride(1) = 1
867  dim_ostride(1) = n3
868  howmany_n(1) = n1
869  howmany_n(2) = rows_per_th
870  howmany_istride(1) = n2
871  howmany_istride(2) = n1*n2
872  !!! transposed Z axis on output
873  howmany_ostride(1) = n2*n3
874  howmany_ostride(2) = 1
875 
876  CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
877  dim_n, dim_istride, dim_ostride, &
878  howmany_n, howmany_istride, howmany_ostride, &
879  tmp, zin, &
880  fft_direction, fftw_plan_type, rows_per_th, &
881  rows_per_th_r)
882 
883  !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
884  CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
885  th_plana, th_planb)
886  dim_n(1) = n3
887  dim_istride(1) = 1
888  dim_ostride(1) = 1 ! To transpose: n2*n1
889  howmany_n(1) = n2
890  howmany_n(2) = rows_per_th
891  howmany_istride(1) = n3
892  howmany_istride(2) = n2*n3
893  howmany_ostride(1) = n3 ! To transpose: n1
894  howmany_ostride(2) = n2*n3 ! To transpose: 1
895 
896  CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
897  dim_n, dim_istride, dim_ostride, &
898  howmany_n, howmany_istride, howmany_ostride, &
899  zin, tmp, &
900  fft_direction, fftw_plan_type, rows_per_th, &
901  rows_per_th_r)
902 
903  plan%separated_plans = .true.
904 
905  DEALLOCATE (tmp)
906  END IF
907 
908 #else
909  mark_used(plan)
910  mark_used(plan_style)
911  !MARK_USED does not work with assumed size arguments
912  IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
913 #endif
914 
915  END SUBROUTINE fftw3_create_plan_3d
916 
917 ! **************************************************************************************************
918 
919 ! **************************************************************************************************
920 !> \brief ...
921 !> \param plan ...
922 !> \param plan_r ...
923 !> \param split_dim ...
924 !> \param nt ...
925 !> \param tid ...
926 !> \param input ...
927 !> \param istride ...
928 !> \param output ...
929 !> \param ostride ...
930 ! **************************************************************************************************
931  SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
932  input, istride, output, ostride)
933 
934  INTEGER, INTENT(IN) :: split_dim, nt, tid
935  INTEGER, INTENT(IN) :: istride, ostride
936  COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
937  TYPE(c_ptr) :: plan, plan_r
938 #if defined (__FFTW3)
939  INTEGER :: i_off, o_off
940  INTEGER :: th_plana, th_planb
941  INTEGER :: rows_per_thread, rows_per_thread_r
942 
943  CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
944  rows_per_thread_r, &
945  th_plana, th_planb)
946 
947  IF (th_planb .GT. 0) THEN
948  IF (tid .LT. th_plana) THEN
949  i_off = (tid)*(istride*(rows_per_thread)) + 1
950  o_off = (tid)*(ostride*(rows_per_thread)) + 1
951  IF (rows_per_thread .GT. 0) THEN
952  CALL fftw_execute_dft(plan, input(i_off), &
953  output(o_off))
954  END IF
955  ELSE IF ((tid - th_plana) < th_planb) THEN
956 
957  i_off = (th_plana)*istride*(rows_per_thread) + &
958  (tid - th_plana)*istride*(rows_per_thread_r) + 1
959  o_off = (th_plana)*ostride*(rows_per_thread) + &
960  (tid - th_plana)*ostride*(rows_per_thread_r) + 1
961 
962  CALL fftw_execute_dft(plan_r, input(i_off), &
963  output(o_off))
964  END IF
965 
966  ELSE
967  i_off = (tid)*(istride*(rows_per_thread)) + 1
968  o_off = (tid)*(ostride*(rows_per_thread)) + 1
969 
970  CALL fftw_execute_dft(plan, input(i_off), &
971  output(o_off))
972 
973  END IF
974 #else
975  mark_used(plan)
976  mark_used(plan_r)
977  mark_used(split_dim)
978  mark_used(nt)
979  mark_used(tid)
980  mark_used(istride)
981  mark_used(ostride)
982  !MARK_USED does not work with assumed size arguments
983  IF (.false.) then; do; IF (abs(input(1)) > abs(output(1))) exit; END do; END IF
984 #endif
985 
986  END SUBROUTINE
987 
988 ! **************************************************************************************************
989 
990 ! **************************************************************************************************
991 !> \brief ...
992 !> \param plan ...
993 !> \param scale ...
994 !> \param zin ...
995 !> \param zout ...
996 !> \param stat ...
997 ! **************************************************************************************************
998  SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
999 
1000  TYPE(fft_plan_type), INTENT(IN) :: plan
1001  REAL(kind=dp), INTENT(IN) :: scale
1002  COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
1003  COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
1004  INTEGER, INTENT(OUT) :: stat
1005 #if defined ( __FFTW3 )
1006  COMPLEX(KIND=dp), POINTER :: xout(:)
1007  COMPLEX(KIND=dp), ALLOCATABLE :: tmp1(:)
1008  INTEGER :: n1, n2, n3
1009  INTEGER :: tid, nt
1010  INTEGER :: i, j, k
1011 
1012  n1 = plan%n_3d(1)
1013  n2 = plan%n_3d(2)
1014  n3 = plan%n_3d(3)
1015 
1016  stat = 1
1017 
1018  ! We use a POINTER to the output array to avoid duplicating code
1019  IF (plan%fft_in_place) THEN
1020  xout => zin(:n1*n2*n3)
1021  ELSE
1022  xout => zout(:n1*n2*n3)
1023  END IF
1024 
1025  ! Either compute the full 3D FFT using a multithreaded plan
1026  IF (.NOT. plan%separated_plans) THEN
1027  CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
1028  ELSE
1029  ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
1030  ALLOCATE (tmp1(n1*n2*n3)) ! Temporary vector used for transpositions
1031  !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
1032  tid = 0
1033  nt = 1
1034 
1035 !$ tid = omp_get_thread_num()
1036 !$ nt = omp_get_num_threads()
1037  CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
1038  n3, nt, tid, &
1039  zin, n1*n2, tmp1, n1*n2)
1040 
1041  !$OMP BARRIER
1042  CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
1043  n3, nt, tid, &
1044  tmp1, n1*n2, xout, 1)
1045  !$OMP BARRIER
1046  CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
1047  n1, nt, tid, &
1048  xout, n2*n3, tmp1, n2*n3)
1049  !$OMP BARRIER
1050 
1051  !$OMP DO COLLAPSE(3)
1052  DO i = 1, n1
1053  DO j = 1, n2
1054  DO k = 1, n3
1055  xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
1056  tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
1057  END DO
1058  END DO
1059  END DO
1060  !$OMP END DO
1061 
1062  !$OMP END PARALLEL
1063  END IF
1064 
1065  IF (scale /= 1.0_dp) THEN
1066  CALL zdscal(n1*n2*n3, scale, xout, 1)
1067  END IF
1068 
1069 #else
1070  mark_used(plan)
1071  mark_used(scale)
1072  !MARK_USED does not work with assumed size arguments
1073  IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
1074  stat = 0
1075 
1076 #endif
1077 
1078  END SUBROUTINE fftw33d
1079 
1080 ! **************************************************************************************************
1081 
1082 ! **************************************************************************************************
1083 !> \brief ...
1084 !> \param plan ...
1085 !> \param zin ...
1086 !> \param zout ...
1087 !> \param plan_style ...
1088 ! **************************************************************************************************
1089  SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
1090 
1091  IMPLICIT NONE
1092 
1093  TYPE(fft_plan_type), INTENT(INOUT) :: plan
1094  COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zin
1095  COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zout
1096  INTEGER, INTENT(IN) :: plan_style
1097 #if defined ( __FFTW3 )
1098  INTEGER :: ii, di, io, do, num_threads, num_rows
1099 
1100  INTEGER :: fftw_plan_type
1101  SELECT CASE (plan_style)
1102  CASE (1)
1103  fftw_plan_type = fftw_estimate
1104  CASE (2)
1105  fftw_plan_type = fftw_measure
1106  CASE (3)
1107  fftw_plan_type = fftw_patient
1108  CASE (4)
1109  fftw_plan_type = fftw_exhaustive
1110  CASE DEFAULT
1111  cpabort("fftw3_create_plan_1dm")
1112  END SELECT
1113 
1114 #if defined (__FFTW3_UNALIGNED)
1115  fftw_plan_type = fftw_plan_type + fftw_unaligned
1116 #endif
1117  num_threads = 1
1118  plan%separated_plans = .false.
1119 !$OMP PARALLEL DEFAULT(NONE), &
1120 !$OMP SHARED(NUM_THREADS)
1121 !$OMP MASTER
1122 !$ num_threads = omp_get_num_threads()
1123 !$OMP END MASTER
1124 !$OMP END PARALLEL
1125 
1126  num_rows = plan%m/num_threads
1127 !$ plan%num_threads_needed = num_threads
1128 
1129 ! Check for number of rows less than num_threads
1130 !$ IF (plan%m < num_threads) THEN
1131 !$ num_rows = 1
1132 !$ plan%num_threads_needed = plan%m
1133 !$ END IF
1134 
1135 ! Check for total number of rows not divisible by num_threads
1136 !$ IF (num_rows*plan%num_threads_needed .NE. plan%m) THEN
1137 !$ plan%need_alt_plan = .TRUE.
1138 !$ END IF
1139 
1140 !$ plan%num_rows = num_rows
1141  ii = 1
1142  di = plan%n
1143  io = 1
1144  DO = plan%n
1145  IF (plan%fsign == +1 .AND. plan%trans) THEN
1146  ii = plan%m
1147  di = 1
1148  ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1149  io = plan%m
1150  DO = 1
1151  END IF
1152 
1153  IF (plan%fsign == +1) THEN
1154  CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
1155  zout, 0, io, DO, fftw_forward, fftw_plan_type)
1156  ELSE
1157  CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
1158  zout, 0, io, DO, fftw_backward, fftw_plan_type)
1159  END IF
1160 
1161 !$ IF (plan%need_alt_plan) THEN
1162 !$ plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
1163 !$ IF (plan%fsign == +1) THEN
1164 !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1165 !$ zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
1166 !$ ELSE
1167 !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1168 !$ zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
1169 !$ END IF
1170 !$ END IF
1171 
1172 #else
1173  mark_used(plan)
1174  mark_used(plan_style)
1175  !MARK_USED does not work with assumed size arguments
1176  IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
1177 #endif
1178 
1179  END SUBROUTINE fftw3_create_plan_1dm
1180 
1181 ! **************************************************************************************************
1182 !> \brief ...
1183 !> \param plan ...
1184 ! **************************************************************************************************
1185  SUBROUTINE fftw3_destroy_plan(plan)
1186 
1187  TYPE(fft_plan_type), INTENT(INOUT) :: plan
1188 
1189 #if defined ( __FFTW3 )
1190 !$ IF (plan%need_alt_plan) THEN
1191 !$ CALL fftw_destroy_plan(plan%alt_fftw_plan)
1192 !$ END IF
1193 
1194  IF (.NOT. plan%separated_plans) THEN
1195  CALL fftw_destroy_plan(plan%fftw_plan)
1196  ELSE
1197  ! If it is a separated plan then we have to destroy
1198  ! each dim plan individually
1199  CALL fftw_destroy_plan(plan%fftw_plan_nx)
1200  CALL fftw_destroy_plan(plan%fftw_plan_ny)
1201  CALL fftw_destroy_plan(plan%fftw_plan_nz)
1202  CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
1203  CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
1204  CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
1205  END IF
1206 
1207 #else
1208  mark_used(plan)
1209 #endif
1210 
1211  END SUBROUTINE fftw3_destroy_plan
1212 
1213 ! **************************************************************************************************
1214 !> \brief ...
1215 !> \param plan ...
1216 !> \param zin ...
1217 !> \param zout ...
1218 !> \param scale ...
1219 !> \param stat ...
1220 ! **************************************************************************************************
1221  SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
1222  TYPE(fft_plan_type), INTENT(IN) :: plan
1223  COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
1224  COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
1225  TARGET :: zout
1226  REAL(kind=dp), INTENT(IN) :: scale
1227  INTEGER, INTENT(OUT) :: stat
1228 
1229  INTEGER :: in_offset, my_id, num_rows, out_offset, &
1230  scal_offset
1231  TYPE(c_ptr) :: fftw_plan
1232 
1233 !------------------------------------------------------------------------------
1234 
1235  my_id = 0
1236  num_rows = plan%m
1237 
1238 !$OMP PARALLEL DEFAULT(NONE), &
1239 !$OMP PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
1240 !$OMP SHARED(zin,zout), &
1241 !$OMP SHARED(plan,scale,stat)
1242 !$ my_id = omp_get_thread_num()
1243 
1244 !$ if (my_id < plan%num_threads_needed) then
1245 
1246  fftw_plan = plan%fftw_plan
1247 
1248  in_offset = 1
1249  out_offset = 1
1250  scal_offset = 1
1251 
1252 !$ in_offset = 1 + plan%num_rows*my_id*plan%n
1253 !$ out_offset = 1 + plan%num_rows*my_id*plan%n
1254 !$ IF (plan%fsign == +1 .AND. plan%trans) THEN
1255 !$ in_offset = 1 + plan%num_rows*my_id
1256 !$ ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1257 !$ out_offset = 1 + plan%num_rows*my_id
1258 !$ END IF
1259 !$ scal_offset = 1 + plan%n*plan%num_rows*my_id
1260 !$ IF (plan%need_alt_plan .AND. my_id .EQ. plan%num_threads_needed - 1) THEN
1261 !$ num_rows = plan%alt_num_rows
1262 !$ fftw_plan = plan%alt_fftw_plan
1263 !$ ELSE
1264 !$ num_rows = plan%num_rows
1265 !$ END IF
1266 
1267 #if defined ( __FFTW3 )
1268 !$OMP MASTER
1269  stat = 1
1270 !$OMP END MASTER
1271  CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
1272 !$ end if
1273 ! all theads need to meet at this barrier
1274 !$OMP BARRIER
1275 !$ if (my_id < plan%num_threads_needed) then
1276  IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
1277 !$ end if
1278 
1279 #else
1280  mark_used(plan)
1281  mark_used(scale)
1282  !MARK_USED does not work with assumed size arguments
1283  IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
1284  stat = 0
1285 
1286 !$ else
1287 !$ end if
1288 
1289 #endif
1290 
1291 !$OMP END PARALLEL
1292 
1293  END SUBROUTINE fftw31dm
1294 
1295  END MODULE
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
Definition: grid_common.h:153
Utility routines to open and close files. Tracking of preconnections.
Definition: cp_files.F:16
integer function, public get_unit_number(file_name)
Returns the first logical unit that is not preconnected.
Definition: cp_files.F:237
Defines the basic variable types.
Definition: fft_kinds.F:13
integer, parameter, public dp
Definition: fft_kinds.F:18
Type to store data about a (1D or 3D) FFT, including FFTW plan.
Definition: fft_plan.F:18
subroutine, public fftw3_do_cleanup(wisdom_file, ionode)
...
Definition: fftw3_lib.F:198
subroutine, public fftw3_destroy_plan(plan)
...
Definition: fftw3_lib.F:1186
subroutine, public fftw3_create_plan_3d(plan, zin, zout, plan_style)
...
Definition: fftw3_lib.F:748
subroutine, public fftw3_get_lengths(DATA, max_length)
...
Definition: fftw3_lib.F:331
subroutine, public fftw33d(plan, scale, zin, zout, stat)
...
Definition: fftw3_lib.F:999
subroutine, public fftw31dm(plan, zin, zout, scale, stat)
...
Definition: fftw3_lib.F:1222
subroutine, public fftw3_do_init(wisdom_file)
...
Definition: fftw3_lib.F:231
subroutine, public fftw3_create_plan_1dm(plan, zin, zout, plan_style)
...
Definition: fftw3_lib.F:1090