(git:ed6f26b)
Loading...
Searching...
No Matches
fftw3_lib.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
8 USE iso_c_binding, ONLY: c_associated, &
9 c_char, &
10 c_double, &
11 c_double_complex, &
12 c_int, &
13 c_ptr
14#if defined(__FFTW3)
15 USE iso_c_binding, ONLY: &
16 c_float, &
17 c_float_complex, &
18 c_funptr, &
19 c_int32_t, &
20 c_intptr_t, &
21 c_loc, &
22 c_null_char, &
23 c_size_t, c_f_pointer
24#endif
25 USE cp_files, ONLY: get_unit_number
26 USE fft_kinds, ONLY: dp
27 USE fft_plan, ONLY: fft_plan_type
28
29!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
30
31#include "../../base/base_uses.f90"
32
33 IMPLICIT NONE
34 PRIVATE
35
38 PUBLIC :: fftw_alloc, fftw_dealloc
39
40#if defined ( __FFTW3 )
41#if defined ( __FFTW3_MKL )
42#include "fftw/fftw3.f03"
43#else
44#include "fftw3.f03"
45#endif
46#endif
47
48 INTERFACE fftw_alloc
49 MODULE PROCEDURE :: fftw_alloc_complex_1d
50 MODULE PROCEDURE :: fftw_alloc_complex_2d
51 MODULE PROCEDURE :: fftw_alloc_complex_3d
52 END INTERFACE
53
54 INTERFACE fftw_dealloc
55 MODULE PROCEDURE :: fftw_dealloc_complex_1d
56 MODULE PROCEDURE :: fftw_dealloc_complex_2d
57 MODULE PROCEDURE :: fftw_dealloc_complex_3d
58 END INTERFACE
59
60CONTAINS
61
62! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
63 SUBROUTINE fftw_alloc_complex_1d(array, n)
64 COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:), CONTIGUOUS, POINTER, INTENT(OUT) :: array
65 INTEGER, DIMENSION(1), INTENT(IN) :: n
66
67#if defined(__FFTW3)
68 TYPE(c_ptr) :: data_ptr
69 data_ptr = fftw_alloc_complex(int(product(n), kind=c_size_t))
70 CALL c_f_pointer(data_ptr, array, n)
71#else
72! Just allocate the array
73 ALLOCATE (array(n(1)))
74#endif
75
76 END SUBROUTINE fftw_alloc_complex_1d
77
78 SUBROUTINE fftw_dealloc_complex_1d(array)
79 COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
80
81#if defined(__FFTW3)
82 CALL fftw_free(c_loc(array))
83 NULLIFY (array)
84#else
85! Just deallocate the array
86 DEALLOCATE (array)
87#endif
88
89 END SUBROUTINE fftw_dealloc_complex_1d
90! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
91 SUBROUTINE fftw_alloc_complex_2d(array, n)
92 COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:, :), CONTIGUOUS, POINTER, INTENT(OUT) :: array
93 INTEGER, DIMENSION(2), INTENT(IN) :: n
94
95#if defined(__FFTW3)
96 TYPE(c_ptr) :: data_ptr
97 data_ptr = fftw_alloc_complex(int(product(n), kind=c_size_t))
98 CALL c_f_pointer(data_ptr, array, n)
99#else
100! Just allocate the array
101 ALLOCATE (array(n(1), n(2)))
102#endif
103
104 END SUBROUTINE fftw_alloc_complex_2d
105
106 SUBROUTINE fftw_dealloc_complex_2d(array)
107 COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:, :), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
108
109#if defined(__FFTW3)
110 CALL fftw_free(c_loc(array))
111 NULLIFY (array)
112#else
113! Just deallocate the array
114 DEALLOCATE (array)
115#endif
116
117 END SUBROUTINE fftw_dealloc_complex_2d
118! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
119 SUBROUTINE fftw_alloc_complex_3d(array, n)
120 COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:, :, :), CONTIGUOUS, POINTER, INTENT(OUT) :: array
121 INTEGER, DIMENSION(3), INTENT(IN) :: n
122
123#if defined(__FFTW3)
124 TYPE(c_ptr) :: data_ptr
125 data_ptr = fftw_alloc_complex(int(product(n), kind=c_size_t))
126 CALL c_f_pointer(data_ptr, array, n)
127#else
128! Just allocate the array
129 ALLOCATE (array(n(1), n(2), n(3)))
130#endif
131
132 END SUBROUTINE fftw_alloc_complex_3d
133
134 SUBROUTINE fftw_dealloc_complex_3d(array)
135 COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:, :, :), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
136
137#if defined(__FFTW3)
138 CALL fftw_free(c_loc(array))
139 NULLIFY (array)
140#else
141! Just deallocate the array
142 DEALLOCATE (array)
143#endif
144
145 END SUBROUTINE fftw_dealloc_complex_3d
146
147#if defined ( __FFTW3 )
148! **************************************************************************************************
149!> \brief A workaround that allows us to compile with -Werror=unused-parameter
150! **************************************************************************************************
151 SUBROUTINE dummy_routine_to_call_mark_used()
152 mark_used(fftw_r2hc)
153 mark_used(fftw_hc2r)
154 mark_used(fftw_dht)
155 mark_used(fftw_redft00)
156 mark_used(fftw_redft01)
157 mark_used(fftw_redft10)
158 mark_used(fftw_redft11)
159 mark_used(fftw_rodft00)
160 mark_used(fftw_rodft01)
161 mark_used(fftw_rodft10)
162 mark_used(fftw_rodft11)
163 mark_used(fftw_forward)
164 mark_used(fftw_backward)
165 mark_used(fftw_measure)
166 mark_used(fftw_destroy_input)
167 mark_used(fftw_unaligned)
168 mark_used(fftw_conserve_memory)
169 mark_used(fftw_exhaustive)
170 mark_used(fftw_preserve_input)
171 mark_used(fftw_patient)
172 mark_used(fftw_estimate)
173 mark_used(fftw_wisdom_only)
174 mark_used(fftw_estimate_patient)
175 mark_used(fftw_believe_pcost)
176 mark_used(fftw_no_dft_r2hc)
177 mark_used(fftw_no_nonthreaded)
178 mark_used(fftw_no_buffering)
179 mark_used(fftw_no_indirect_op)
180 mark_used(fftw_allow_large_generic)
181 mark_used(fftw_no_rank_splits)
182 mark_used(fftw_no_vrank_splits)
183 mark_used(fftw_no_vrecurse)
184 mark_used(fftw_no_simd)
185 mark_used(fftw_no_slow)
186 mark_used(fftw_no_fixed_radix_large_n)
187 mark_used(fftw_allow_pruning)
188 END SUBROUTINE
189#endif
190
191! **************************************************************************************************
192!> \brief ...
193!> \param wisdom_file ...
194!> \param ionode ...
195! **************************************************************************************************
196 SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
197
198 CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
199 LOGICAL :: ionode
200
201#if defined ( __FFTW3 )
202 CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
203 INTEGER :: file_name_length, i, iunit, istat
204 INTEGER(KIND=C_INT) :: isuccess
205 ! Write out FFTW3 wisdom to file (if we can)
206 ! only the ionode updates the wisdom
207 IF (ionode) THEN
208 iunit = get_unit_number()
209 ! Check whether the file can be opened in the necessary manner
210 OPEN (unit=iunit, file=wisdom_file, status="UNKNOWN", form="FORMATTED", action="WRITE", iostat=istat)
211 IF (istat == 0) THEN
212 CLOSE (iunit)
213 file_name_length = len_trim(wisdom_file)
214 ALLOCATE (wisdom_file_name_c(file_name_length + 1))
215 DO i = 1, file_name_length
216 wisdom_file_name_c(i) = wisdom_file(i:i)
217 END DO
218 wisdom_file_name_c(file_name_length + 1) = c_null_char
219 isuccess = fftw_export_wisdom_to_filename(wisdom_file_name_c)
220 IF (isuccess == 0) &
221 CALL cp_warn(__location__, "Error exporting wisdom to file "//trim(wisdom_file)//". "// &
222 "Wisdom was not exported.")
223 END IF
224 END IF
225
226 CALL fftw_cleanup()
227#else
228 mark_used(wisdom_file)
229 mark_used(ionode)
230#endif
231
232 END SUBROUTINE
233
234! **************************************************************************************************
235!> \brief ...
236!> \param wisdom_file ...
237! **************************************************************************************************
238 SUBROUTINE fftw3_do_init(wisdom_file)
239
240 CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
241
242#if defined ( __FFTW3 )
243 CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
244 INTEGER :: file_name_length, i, istat, iunit
245 INTEGER(KIND=C_INT) :: isuccess
246 LOGICAL :: file_exists
247
248#if defined(__MKL)
249!$ LOGICAL :: mkl_is_safe
250#endif
251
252! If using the Intel compiler then we need to declare
253! a C interface to a global variable in MKL that sets
254! the number of threads which can concurrently execute
255! an FFT
256! We need __INTEL_COMPILER so we can be sure that the compiler
257! understands the !DEC$ version definitions
258#if defined (__INTEL_COMPILER) && defined (__MKL)
259!$ include "mkl.fi"
260!DEC$ IF DEFINED (INTEL_MKL_VERSION)
261!DEC$ IF INTEL_MKL_VERSION .EQ. 110100
262!DIR$ ATTRIBUTES ALIGN : 8 :: fftw3_mkl
263!$ COMMON/fftw3_mkl/ignore(4), mkl_dft_number_of_user_threads, ignore2(7)
264!$ INTEGER*4 :: ignore, mkl_dft_number_of_user_threads, ignore2
265!$ BIND(c) :: /fftw3_mkl/
266!DEC$ ENDIF
267!DEC$ ENDIF
268#elif defined (__MKL)
269! Preprocessing is enabled by default, and below header is not language specific
270#include <mkl_version.h>
271#endif
272
273 isuccess = fftw_init_threads()
274 IF (isuccess == 0) &
275 cpabort("Error initializing FFTW with threads")
276
277 ! Read FFTW wisdom (if available)
278 ! all nodes are opening the file here...
279 INQUIRE (file=wisdom_file, exist=file_exists)
280 IF (file_exists) THEN
281 iunit = get_unit_number()
282 file_name_length = len_trim(wisdom_file)
283 ! Check whether the file can be opened in the necessary manner
284 OPEN (unit=iunit, file=wisdom_file, status="OLD", form="FORMATTED", position="REWIND", &
285 action="READ", iostat=istat)
286 IF (istat == 0) THEN
287 CLOSE (iunit)
288 file_name_length = len_trim(wisdom_file)
289 ALLOCATE (wisdom_file_name_c(file_name_length + 1))
290 DO i = 1, file_name_length
291 wisdom_file_name_c(i) = wisdom_file(i:i)
292 END DO
293 wisdom_file_name_c(file_name_length + 1) = c_null_char
294 isuccess = fftw_import_wisdom_from_filename(wisdom_file_name_c)
295 IF (isuccess == 0) &
296 CALL cp_warn(__location__, "Error importing wisdom from file "//trim(wisdom_file)//". "// &
297 "Maybe the file was created with a different configuration than CP2K is run with. "// &
298 "CP2K continues without importing wisdom.")
299 END IF
300 END IF
301
302#if defined (__MKL)
303 ! Now check if we have a real FFTW3 library, or are using MKL wrappers
304
305!$ IF (fftw3_is_mkl_wrapper() .and. omp_get_max_threads() .gt. 1) THEN
306! If we are not using the Intel compiler, there is no way to tell which
307! MKL version is in use, so fail safe...
308!$ mkl_is_safe = .FALSE.
309#if defined(INTEL_MKL_VERSION) && (110100 < INTEL_MKL_VERSION)
310!$ mkl_is_safe = .TRUE.
311#elif defined (__INTEL_COMPILER)
312! If we have an Intel compiler (__INTEL_COMPILER is defined) then check the
313! MKL version and make the appropriate action
314!DEC$ IF DEFINED (INTEL_MKL_VERSION)
315!DEC$ IF INTEL_MKL_VERSION .EQ. 110100
316!$ mkl_dft_number_of_user_threads = omp_get_max_threads()
317!DEC$ ENDIF
318!DEC$ IF INTEL_MKL_VERSION .GE. 110100
319!$ mkl_is_safe = .TRUE.
320!DEC$ ENDIF
321!DEC$ ENDIF
322#endif
323!$ IF (.NOT. mkl_is_safe) THEN
324!$ CALL cp_abort(__LOCATION__, "Intel's FFTW3 interface to MKL is not "// &
325!$ "thread-safe prior to MKL 11.1.0! Please "// &
326!$ "rebuild CP2K, linking against FFTW 3 from "// &
327!$ "www.fftw.org or a newer version of MKL. "// &
328!$ "Now exiting...")
329!$ END IF
330!$ END IF
331#endif
332#else
333 mark_used(wisdom_file)
334#endif
335
336 END SUBROUTINE
337
338! **************************************************************************************************
339!> \brief ...
340!> \param DATA ...
341!> \param max_length ...
342!> \par History
343!> JGH 23-Jan-2006 : initial version
344!> Adapted for new interface
345!> IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
346!> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
347!> IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
348!> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
349!> IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
350!> \author JGH
351! **************************************************************************************************
352 SUBROUTINE fftw3_get_lengths(DATA, max_length)
353
354 INTEGER, DIMENSION(*) :: data
355 INTEGER, INTENT(INOUT) :: max_length
356
357 INTEGER :: h, i, j, k, m, maxn, maxn_elevens, &
358 maxn_fives, maxn_sevens, &
359 maxn_thirteens, maxn_threes, &
360 maxn_twos, ndata, nmax, number
361 INTEGER, ALLOCATABLE, DIMENSION(:) :: dlocal, idx
362
363!------------------------------------------------------------------------------
364! compute ndata
365!! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
366!! powers of small prime numbers though...
367
368 maxn_twos = 15
369 maxn_threes = 3
370 maxn_fives = 2
371 maxn_sevens = 1
372 maxn_elevens = 1
373 maxn_thirteens = 0
374 maxn = 37748736
375
376 ndata = 0
377 DO h = 0, maxn_twos
378 nmax = huge(0)/2**h
379 DO i = 0, maxn_threes
380 DO j = 0, maxn_fives
381 DO k = 0, maxn_sevens
382 DO m = 0, maxn_elevens
383 number = (3**i)*(5**j)*(7**k)*(11**m)
384
385 IF (number > nmax) cycle
386
387 number = number*2**h
388 IF (number >= maxn) cycle
389
390 ndata = ndata + 1
391 END DO
392 END DO
393 END DO
394 END DO
395 END DO
396
397 ALLOCATE (dlocal(ndata), idx(ndata))
398
399 ndata = 0
400 dlocal(:) = 0
401 DO h = 0, maxn_twos
402 nmax = huge(0)/2**h
403 DO i = 0, maxn_threes
404 DO j = 0, maxn_fives
405 DO k = 0, maxn_sevens
406 DO m = 0, maxn_elevens
407 number = (3**i)*(5**j)*(7**k)*(11**m)
408
409 IF (number > nmax) cycle
410
411 number = number*2**h
412 IF (number >= maxn) cycle
413
414 ndata = ndata + 1
415 dlocal(ndata) = number
416 END DO
417 END DO
418 END DO
419 END DO
420 END DO
421
422 CALL sortint(dlocal, ndata, idx)
423 ndata = min(ndata, max_length)
424 DATA(1:ndata) = dlocal(1:ndata)
425 max_length = ndata
426
427 DEALLOCATE (dlocal, idx)
428
429 END SUBROUTINE fftw3_get_lengths
430
431! **************************************************************************************************
432!> \brief ...
433!> \param iarr ...
434!> \param n ...
435!> \param index ...
436! **************************************************************************************************
437 SUBROUTINE sortint(iarr, n, index)
438
439 INTEGER, INTENT(IN) :: n
440 INTEGER, INTENT(INOUT) :: iarr(1:n)
441 INTEGER, INTENT(OUT) :: index(1:n)
442
443 INTEGER, PARAMETER :: m = 7, nstack = 50
444
445 INTEGER :: a, i, ib, ir, istack(1:nstack), itemp, &
446 j, jstack, k, l, temp
447
448!------------------------------------------------------------------------------
449
450 DO i = 1, n
451 index(i) = i
452 END DO
453 jstack = 0
454 l = 1
455 ir = n
456 DO WHILE (.true.)
457 IF (ir - l < m) THEN
458 DO j = l + 1, ir
459 a = iarr(j)
460 ib = index(j)
461 DO i = j - 1, 0, -1
462 IF (i == 0) EXIT
463 IF (iarr(i) <= a) EXIT
464 iarr(i + 1) = iarr(i)
465 index(i + 1) = index(i)
466 END DO
467 iarr(i + 1) = a
468 index(i + 1) = ib
469 END DO
470 IF (jstack == 0) RETURN
471 ir = istack(jstack)
472 l = istack(jstack - 1)
473 jstack = jstack - 2
474 ELSE
475 k = (l + ir)/2
476 temp = iarr(k)
477 iarr(k) = iarr(l + 1)
478 iarr(l + 1) = temp
479 itemp = index(k)
480 index(k) = index(l + 1)
481 index(l + 1) = itemp
482 IF (iarr(l + 1) > iarr(ir)) THEN
483 temp = iarr(l + 1)
484 iarr(l + 1) = iarr(ir)
485 iarr(ir) = temp
486 itemp = index(l + 1)
487 index(l + 1) = index(ir)
488 index(ir) = itemp
489 END IF
490 IF (iarr(l) > iarr(ir)) THEN
491 temp = iarr(l)
492 iarr(l) = iarr(ir)
493 iarr(ir) = temp
494 itemp = index(l)
495 index(l) = index(ir)
496 index(ir) = itemp
497 END IF
498 IF (iarr(l + 1) > iarr(l)) THEN
499 temp = iarr(l + 1)
500 iarr(l + 1) = iarr(l)
501 iarr(l) = temp
502 itemp = index(l + 1)
503 index(l + 1) = index(l)
504 index(l) = itemp
505 END IF
506 i = l + 1
507 j = ir
508 a = iarr(l)
509 ib = index(l)
510 DO WHILE (.true.)
511 i = i + 1
512 DO WHILE (iarr(i) < a)
513 i = i + 1
514 END DO
515 j = j - 1
516 DO WHILE (iarr(j) > a)
517 j = j - 1
518 END DO
519 IF (j < i) EXIT
520 temp = iarr(i)
521 iarr(i) = iarr(j)
522 iarr(j) = temp
523 itemp = index(i)
524 index(i) = index(j)
525 index(j) = itemp
526 END DO
527 iarr(l) = iarr(j)
528 iarr(j) = a
529 index(l) = index(j)
530 index(j) = ib
531 jstack = jstack + 2
532 IF (jstack > nstack) cpabort(" Nstack too small in sortr")
533 IF (ir - i + 1 >= j - l) THEN
534 istack(jstack) = ir
535 istack(jstack - 1) = i
536 ir = j - 1
537 ELSE
538 istack(jstack) = j - 1
539 istack(jstack - 1) = l
540 l = i
541 END IF
542 END IF
543
544 END DO
545
546 END SUBROUTINE sortint
547
548! **************************************************************************************************
549
550! **************************************************************************************************
551!> \brief ...
552!> \param plan ...
553!> \param fft_rank ...
554!> \param dim_n ...
555!> \param dim_istride ...
556!> \param dim_ostride ...
557!> \param hm_rank ...
558!> \param hm_n ...
559!> \param hm_istride ...
560!> \param hm_ostride ...
561!> \param zin ...
562!> \param zout ...
563!> \param fft_direction ...
564!> \param fftw_plan_type ...
565!> \param valid ...
566! **************************************************************************************************
567 SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
568 dim_istride, dim_ostride, hm_rank, &
569 hm_n, hm_istride, hm_ostride, &
570 zin, zout, fft_direction, fftw_plan_type, &
571 valid)
572
573 IMPLICIT NONE
574
575 TYPE(c_ptr), INTENT(INOUT) :: plan
576 COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin, zout
577 INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
578 hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
579 fft_direction, fftw_plan_type, hm_rank
580 LOGICAL, INTENT(OUT) :: valid
581
582#if defined (__FFTW3)
583 TYPE(fftw_iodim) :: dim(2), hm(2)
584 INTEGER :: i
585
586 DO i = 1, 2
587 dim(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
588 hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
589 END DO
590
591 plan = fftw_plan_guru_dft(fft_rank, &
592 dim, hm_rank, hm, &
593 zin, zout, &
594 fft_direction, fftw_plan_type)
595
596 valid = c_associated(plan)
597
598#else
599 mark_used(plan)
600 mark_used(fft_rank)
601 mark_used(dim_n)
602 mark_used(dim_istride)
603 mark_used(dim_ostride)
604 mark_used(hm_rank)
605 mark_used(hm_n)
606 mark_used(hm_istride)
607 mark_used(hm_ostride)
608 mark_used(fft_direction)
609 mark_used(fftw_plan_type)
610 !MARK_USED does not work with assumed size arguments
611 IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
612 valid = .false.
613
614#endif
615
616 END SUBROUTINE
617
618! **************************************************************************************************
619
620! **************************************************************************************************
621!> \brief ...
622!> \return ...
623! **************************************************************************************************
624 FUNCTION fftw3_is_mkl_wrapper() RESULT(is_mkl)
625
626 IMPLICIT NONE
627
628 LOGICAL :: is_mkl
629#if defined ( __FFTW3 )
630 LOGICAL :: guru_supported
631 INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
632 howmany_n(2), howmany_istride(2), howmany_ostride(2)
633 TYPE(c_ptr) :: test_plan
634 COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
635
636 ! Attempt to create a plan with the guru interface for a 2d sub-space
637 ! If this fails (e.g. for MKL's FFTW3 interface), fall back to the
638 ! FFTW3 threaded 3D transform instead of the hand-optimised version
639 dim_n(1) = 1
640 dim_n(2) = 1
641 dim_istride(1) = 1
642 dim_istride(2) = 1
643 dim_ostride(1) = 1
644 dim_ostride(2) = 1
645 howmany_n(1) = 1
646 howmany_n(2) = 1
647 howmany_istride(1) = 1
648 howmany_istride(2) = 1
649 howmany_ostride(1) = 1
650 howmany_ostride(2) = 1
651 zin = cmplx(0.0_dp, 0.0_dp, kind=dp)
652 CALL fftw3_create_guru_plan(test_plan, 1, &
653 dim_n, dim_istride, dim_ostride, &
654 2, howmany_n, howmany_istride, howmany_ostride, &
655 zin, zin, &
656 fftw_forward, fftw_estimate, guru_supported)
657 IF (guru_supported) THEN
658 CALL fftw_destroy_plan(test_plan)
659 is_mkl = .false.
660 ELSE
661 is_mkl = .true.
662 END IF
663
664#else
665 is_mkl = .false.
666#endif
667
668 END FUNCTION
669
670! **************************************************************************************************
671
672! **************************************************************************************************
673!> \brief ...
674!> \param nrows ...
675!> \param nt ...
676!> \param rows_per_thread ...
677!> \param rows_per_thread_r ...
678!> \param th_planA ...
679!> \param th_planB ...
680! **************************************************************************************************
681 SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
682 th_planA, th_planB)
683
684 INTEGER, INTENT(IN) :: nrows, nt
685 INTEGER, INTENT(OUT) :: rows_per_thread, rows_per_thread_r, &
686 th_plana, th_planb
687
688 IF (mod(nrows, nt) .EQ. 0) THEN
689 rows_per_thread = nrows/nt
690 rows_per_thread_r = 0
691 th_plana = nt
692 th_planb = 0
693 ELSE
694 rows_per_thread = nrows/nt + 1
695 rows_per_thread_r = nrows/nt
696 th_plana = mod(nrows, nt)
697 th_planb = nt - th_plana
698 END IF
699
700 END SUBROUTINE
701
702! **************************************************************************************************
703
704! **************************************************************************************************
705!> \brief ...
706!> \param plan ...
707!> \param plan_r ...
708!> \param dim_n ...
709!> \param dim_istride ...
710!> \param dim_ostride ...
711!> \param hm_n ...
712!> \param hm_istride ...
713!> \param hm_ostride ...
714!> \param input ...
715!> \param output ...
716!> \param fft_direction ...
717!> \param fftw_plan_type ...
718!> \param rows_per_th ...
719!> \param rows_per_th_r ...
720! **************************************************************************************************
721 SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
722 hm_n, hm_istride, hm_ostride, &
723 input, output, &
724 fft_direction, fftw_plan_type, rows_per_th, &
725 rows_per_th_r)
726
727 TYPE(c_ptr), INTENT(INOUT) :: plan, plan_r
728 INTEGER, INTENT(INOUT) :: dim_n(2), dim_istride(2), &
729 dim_ostride(2), hm_n(2), &
730 hm_istride(2), hm_ostride(2)
731 COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
732 INTEGER, INTENT(INOUT) :: fft_direction, fftw_plan_type
733 INTEGER, INTENT(IN) :: rows_per_th, rows_per_th_r
734
735 LOGICAL :: valid
736
737! First plans will have an additional row
738
739 hm_n(2) = rows_per_th
740 CALL fftw3_create_guru_plan(plan, 1, &
741 dim_n, dim_istride, dim_ostride, &
742 2, hm_n, hm_istride, hm_ostride, &
743 input, output, &
744 fft_direction, fftw_plan_type, valid)
745
746 IF (.NOT. valid) THEN
747 cpabort("fftw3_create_plan")
748 END IF
749
750 !!!! Remainder
751 hm_n(2) = rows_per_th_r
752 CALL fftw3_create_guru_plan(plan_r, 1, &
753 dim_n, dim_istride, dim_ostride, &
754 2, hm_n, hm_istride, hm_ostride, &
755 input, output, &
756 fft_direction, fftw_plan_type, valid)
757 IF (.NOT. valid) THEN
758 cpabort("fftw3_create_plan (remaining)")
759 END IF
760
761 END SUBROUTINE
762
763! **************************************************************************************************
764
765! **************************************************************************************************
766!> \brief ...
767!> \param plan ...
768!> \param zin ...
769!> \param zout ...
770!> \param plan_style ...
771! **************************************************************************************************
772 SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
773
774 TYPE(fft_plan_type), INTENT(INOUT) :: plan
775 COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin
776 COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zout
777 INTEGER :: plan_style
778#if defined ( __FFTW3 )
779 INTEGER :: n1, n2, n3
780 INTEGER :: nt
781 INTEGER :: rows_per_th
782 INTEGER :: rows_per_th_r
783 INTEGER :: fft_direction
784 INTEGER :: th_plana, th_planb
785 COMPLEX(KIND=dp), ALLOCATABLE :: tmp(:)
786
787 ! GURU Interface
788 INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
789 howmany_n(2), howmany_istride(2), howmany_ostride(2)
790
791 INTEGER :: fftw_plan_type
792 SELECT CASE (plan_style)
793 CASE (1)
794 fftw_plan_type = fftw_estimate
795 CASE (2)
796 fftw_plan_type = fftw_measure
797 CASE (3)
798 fftw_plan_type = fftw_patient
799 CASE (4)
800 fftw_plan_type = fftw_exhaustive
801 CASE DEFAULT
802 cpabort("fftw3_create_plan_3d")
803 END SELECT
804
805#if defined (__FFTW3_UNALIGNED)
806 fftw_plan_type = fftw_plan_type + fftw_unaligned
807#endif
808
809 IF (plan%fsign == +1) THEN
810 fft_direction = fftw_forward
811 ELSE
812 fft_direction = fftw_backward
813 END IF
814
815 n1 = plan%n_3d(1)
816 n2 = plan%n_3d(2)
817 n3 = plan%n_3d(3)
818
819 nt = 1
820!$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
821!$OMP MASTER
822!$ nt = omp_get_num_threads()
823!$OMP END MASTER
824!$OMP END PARALLEL
825
826 IF ((fftw3_is_mkl_wrapper()) .OR. &
827 (.NOT. plan_style == 1) .OR. &
828 (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
829 ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
830 ! the grid size is small (and we are single-threaded) then
831 ! FFTW3 does a better job than handmade optimization
832 ! so plan a single 3D FFT which will execute using all the threads
833
834 plan%separated_plans = .false.
835!$ CALL fftw_plan_with_nthreads(nt)
836
837 IF (plan%fft_in_place) THEN
838 plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
839 ELSE
840 plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
841 END IF
842 ELSE
843 ALLOCATE (tmp(n1*n2*n3))
844 ! ************************* PLANS WITH TRANSPOSITIONS ****************************
845 ! In the cases described above, we manually thread each stage of the 3D FFT.
846 !
847 ! The following plans replace the 3D FFT call by running 1D FFTW across all
848 ! 3 directions of the array.
849 !
850 ! Output of FFTW is transposed to ensure that the next round of FFTW access
851 ! contiguous information.
852 !
853 ! Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
854 ! M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
855 ! Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
856 ! will perform the final transposition. Performance evaluation showed that using an external
857 ! DO loop to do the final transposition performed better than directly transposing the output.
858 ! However, this might vary depending on the compiler/platform, so a potential tuning spot
859 ! is to perform the final transposition within the fftw library rather than using the external loop
860 ! See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
861 !
862 ! Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
863 !
864 ! OpenMP : Work is distributed on the Z plane.
865 ! All transpositions are out-of-place to facilitate multi-threading
866 !
867 !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
868 CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
869 th_plana, th_planb)
870
871 dim_n(1) = n1
872 dim_istride(1) = 1
873 dim_ostride(1) = n2
874 howmany_n(1) = n2
875 howmany_n(2) = rows_per_th
876 howmany_istride(1) = n1
877 howmany_istride(2) = n1*n2
878 howmany_ostride(1) = 1
879 howmany_ostride(2) = n1*n2
880 CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
881 dim_n, dim_istride, dim_ostride, howmany_n, &
882 howmany_istride, howmany_ostride, &
883 zin, tmp, &
884 fft_direction, fftw_plan_type, rows_per_th, &
885 rows_per_th_r)
886
887 !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
888 CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
889 th_plana, th_planb)
890 dim_n(1) = n2
891 dim_istride(1) = 1
892 dim_ostride(1) = n3
893 howmany_n(1) = n1
894 howmany_n(2) = rows_per_th
895 howmany_istride(1) = n2
896 howmany_istride(2) = n1*n2
897 !!! transposed Z axis on output
898 howmany_ostride(1) = n2*n3
899 howmany_ostride(2) = 1
900
901 CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
902 dim_n, dim_istride, dim_ostride, &
903 howmany_n, howmany_istride, howmany_ostride, &
904 tmp, zin, &
905 fft_direction, fftw_plan_type, rows_per_th, &
906 rows_per_th_r)
907
908 !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
909 CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
910 th_plana, th_planb)
911 dim_n(1) = n3
912 dim_istride(1) = 1
913 dim_ostride(1) = 1 ! To transpose: n2*n1
914 howmany_n(1) = n2
915 howmany_n(2) = rows_per_th
916 howmany_istride(1) = n3
917 howmany_istride(2) = n2*n3
918 howmany_ostride(1) = n3 ! To transpose: n1
919 howmany_ostride(2) = n2*n3 ! To transpose: 1
920
921 CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
922 dim_n, dim_istride, dim_ostride, &
923 howmany_n, howmany_istride, howmany_ostride, &
924 zin, tmp, &
925 fft_direction, fftw_plan_type, rows_per_th, &
926 rows_per_th_r)
927
928 plan%separated_plans = .true.
929
930 DEALLOCATE (tmp)
931 END IF
932
933#else
934 mark_used(plan)
935 mark_used(plan_style)
936 !MARK_USED does not work with assumed size arguments
937 IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
938#endif
939
940 END SUBROUTINE fftw3_create_plan_3d
941
942! **************************************************************************************************
943
944! **************************************************************************************************
945!> \brief ...
946!> \param plan ...
947!> \param plan_r ...
948!> \param split_dim ...
949!> \param nt ...
950!> \param tid ...
951!> \param input ...
952!> \param istride ...
953!> \param output ...
954!> \param ostride ...
955! **************************************************************************************************
956 SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
957 input, istride, output, ostride)
958
959 INTEGER, INTENT(IN) :: split_dim, nt, tid
960 INTEGER, INTENT(IN) :: istride, ostride
961 COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
962 TYPE(c_ptr) :: plan, plan_r
963#if defined (__FFTW3)
964 INTEGER :: i_off, o_off
965 INTEGER :: th_plana, th_planb
966 INTEGER :: rows_per_thread, rows_per_thread_r
967
968 CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
969 rows_per_thread_r, &
970 th_plana, th_planb)
971
972 IF (th_planb .GT. 0) THEN
973 IF (tid .LT. th_plana) THEN
974 i_off = (tid)*(istride*(rows_per_thread)) + 1
975 o_off = (tid)*(ostride*(rows_per_thread)) + 1
976 IF (rows_per_thread .GT. 0) THEN
977 CALL fftw_execute_dft(plan, input(i_off), &
978 output(o_off))
979 END IF
980 ELSE IF ((tid - th_plana) < th_planb) THEN
981
982 i_off = (th_plana)*istride*(rows_per_thread) + &
983 (tid - th_plana)*istride*(rows_per_thread_r) + 1
984 o_off = (th_plana)*ostride*(rows_per_thread) + &
985 (tid - th_plana)*ostride*(rows_per_thread_r) + 1
986
987 CALL fftw_execute_dft(plan_r, input(i_off), &
988 output(o_off))
989 END IF
990
991 ELSE
992 i_off = (tid)*(istride*(rows_per_thread)) + 1
993 o_off = (tid)*(ostride*(rows_per_thread)) + 1
994
995 CALL fftw_execute_dft(plan, input(i_off), &
996 output(o_off))
997
998 END IF
999#else
1000 mark_used(plan)
1001 mark_used(plan_r)
1002 mark_used(split_dim)
1003 mark_used(nt)
1004 mark_used(tid)
1005 mark_used(istride)
1006 mark_used(ostride)
1007 !MARK_USED does not work with assumed size arguments
1008 IF (.false.) then; do; IF (abs(input(1)) > abs(output(1))) exit; END do; END IF
1009#endif
1010
1011 END SUBROUTINE
1012
1013! **************************************************************************************************
1014
1015! **************************************************************************************************
1016!> \brief ...
1017!> \param plan ...
1018!> \param scale ...
1019!> \param zin ...
1020!> \param zout ...
1021!> \param stat ...
1022! **************************************************************************************************
1023 SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
1024
1025 TYPE(fft_plan_type), INTENT(IN) :: plan
1026 REAL(kind=dp), INTENT(IN) :: scale
1027 COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
1028 COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
1029 INTEGER, INTENT(OUT) :: stat
1030#if defined ( __FFTW3 )
1031 COMPLEX(KIND=dp), POINTER :: xout(:)
1032 COMPLEX(KIND=dp), ALLOCATABLE :: tmp1(:)
1033 INTEGER :: n1, n2, n3
1034 INTEGER :: tid, nt
1035 INTEGER :: i, j, k
1036
1037 n1 = plan%n_3d(1)
1038 n2 = plan%n_3d(2)
1039 n3 = plan%n_3d(3)
1040
1041 stat = 1
1042
1043 ! We use a POINTER to the output array to avoid duplicating code
1044 IF (plan%fft_in_place) THEN
1045 xout => zin(:n1*n2*n3)
1046 ELSE
1047 xout => zout(:n1*n2*n3)
1048 END IF
1049
1050 ! Either compute the full 3D FFT using a multithreaded plan
1051 IF (.NOT. plan%separated_plans) THEN
1052 CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
1053 ELSE
1054 ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
1055 ALLOCATE (tmp1(n1*n2*n3)) ! Temporary vector used for transpositions
1056 !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
1057 tid = 0
1058 nt = 1
1059
1060!$ tid = omp_get_thread_num()
1061!$ nt = omp_get_num_threads()
1062 CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
1063 n3, nt, tid, &
1064 zin, n1*n2, tmp1, n1*n2)
1065
1066 !$OMP BARRIER
1067 CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
1068 n3, nt, tid, &
1069 tmp1, n1*n2, xout, 1)
1070 !$OMP BARRIER
1071 CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
1072 n1, nt, tid, &
1073 xout, n2*n3, tmp1, n2*n3)
1074 !$OMP BARRIER
1075
1076 !$OMP DO COLLAPSE(3)
1077 DO i = 1, n1
1078 DO j = 1, n2
1079 DO k = 1, n3
1080 xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
1081 tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
1082 END DO
1083 END DO
1084 END DO
1085 !$OMP END DO
1086
1087 !$OMP END PARALLEL
1088 END IF
1089
1090 IF (scale /= 1.0_dp) THEN
1091 CALL zdscal(n1*n2*n3, scale, xout, 1)
1092 END IF
1093
1094#else
1095 mark_used(plan)
1096 mark_used(scale)
1097 !MARK_USED does not work with assumed size arguments
1098 IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
1099 stat = 0
1100
1101#endif
1102
1103 END SUBROUTINE fftw33d
1104
1105! **************************************************************************************************
1106
1107! **************************************************************************************************
1108!> \brief ...
1109!> \param plan ...
1110!> \param zin ...
1111!> \param zout ...
1112!> \param plan_style ...
1113! **************************************************************************************************
1114 SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
1115
1116 IMPLICIT NONE
1117
1118 TYPE(fft_plan_type), INTENT(INOUT) :: plan
1119 COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zin
1120 COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zout
1121 INTEGER, INTENT(IN) :: plan_style
1122#if defined ( __FFTW3 )
1123 INTEGER :: ii, di, io, do, num_threads, num_rows
1124
1125 INTEGER :: fftw_plan_type
1126 SELECT CASE (plan_style)
1127 CASE (1)
1128 fftw_plan_type = fftw_estimate
1129 CASE (2)
1130 fftw_plan_type = fftw_measure
1131 CASE (3)
1132 fftw_plan_type = fftw_patient
1133 CASE (4)
1134 fftw_plan_type = fftw_exhaustive
1135 CASE DEFAULT
1136 cpabort("fftw3_create_plan_1dm")
1137 END SELECT
1138
1139#if defined (__FFTW3_UNALIGNED)
1140 fftw_plan_type = fftw_plan_type + fftw_unaligned
1141#endif
1142 num_threads = 1
1143 plan%separated_plans = .false.
1144!$OMP PARALLEL DEFAULT(NONE), &
1145!$OMP SHARED(NUM_THREADS)
1146!$OMP MASTER
1147!$ num_threads = omp_get_num_threads()
1148!$OMP END MASTER
1149!$OMP END PARALLEL
1150
1151 num_rows = plan%m/num_threads
1152!$ plan%num_threads_needed = num_threads
1153
1154! Check for number of rows less than num_threads
1155!$ IF (plan%m < num_threads) THEN
1156!$ num_rows = 1
1157!$ plan%num_threads_needed = plan%m
1158!$ END IF
1159
1160! Check for total number of rows not divisible by num_threads
1161!$ IF (num_rows*plan%num_threads_needed .NE. plan%m) THEN
1162!$ plan%need_alt_plan = .TRUE.
1163!$ END IF
1164
1165!$ plan%num_rows = num_rows
1166 ii = 1
1167 di = plan%n
1168 io = 1
1169 DO = plan%n
1170 IF (plan%fsign == +1 .AND. plan%trans) THEN
1171 ii = plan%m
1172 di = 1
1173 ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1174 io = plan%m
1175 DO = 1
1176 END IF
1177
1178 IF (plan%fsign == +1) THEN
1179 CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
1180 zout, 0, io, DO, fftw_forward, fftw_plan_type)
1181 ELSE
1182 CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
1183 zout, 0, io, DO, fftw_backward, fftw_plan_type)
1184 END IF
1185
1186!$ IF (plan%need_alt_plan) THEN
1187!$ plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
1188!$ IF (plan%fsign == +1) THEN
1189!$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1190!$ zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
1191!$ ELSE
1192!$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1193!$ zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
1194!$ END IF
1195!$ END IF
1196
1197#else
1198 mark_used(plan)
1199 mark_used(plan_style)
1200 !MARK_USED does not work with assumed size arguments
1201 IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
1202#endif
1203
1204 END SUBROUTINE fftw3_create_plan_1dm
1205
1206! **************************************************************************************************
1207!> \brief ...
1208!> \param plan ...
1209! **************************************************************************************************
1210 SUBROUTINE fftw3_destroy_plan(plan)
1211
1212 TYPE(fft_plan_type), INTENT(INOUT) :: plan
1213
1214#if defined ( __FFTW3 )
1215!$ IF (plan%need_alt_plan) THEN
1216!$ CALL fftw_destroy_plan(plan%alt_fftw_plan)
1217!$ END IF
1218
1219 IF (.NOT. plan%separated_plans) THEN
1220 CALL fftw_destroy_plan(plan%fftw_plan)
1221 ELSE
1222 ! If it is a separated plan then we have to destroy
1223 ! each dim plan individually
1224 CALL fftw_destroy_plan(plan%fftw_plan_nx)
1225 CALL fftw_destroy_plan(plan%fftw_plan_ny)
1226 CALL fftw_destroy_plan(plan%fftw_plan_nz)
1227 CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
1228 CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
1229 CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
1230 END IF
1231
1232#else
1233 mark_used(plan)
1234#endif
1235
1236 END SUBROUTINE fftw3_destroy_plan
1237
1238! **************************************************************************************************
1239!> \brief ...
1240!> \param plan ...
1241!> \param zin ...
1242!> \param zout ...
1243!> \param scale ...
1244!> \param stat ...
1245! **************************************************************************************************
1246 SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
1247 TYPE(fft_plan_type), INTENT(IN) :: plan
1248 COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
1249 COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
1250 TARGET :: zout
1251 REAL(kind=dp), INTENT(IN) :: scale
1252 INTEGER, INTENT(OUT) :: stat
1253
1254 INTEGER :: in_offset, my_id, num_rows, out_offset, &
1255 scal_offset
1256 TYPE(c_ptr) :: fftw_plan
1257
1258!------------------------------------------------------------------------------
1259
1260 my_id = 0
1261 num_rows = plan%m
1262
1263!$OMP PARALLEL DEFAULT(NONE), &
1264!$OMP PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
1265!$OMP SHARED(zin,zout), &
1266!$OMP SHARED(plan,scale,stat)
1267!$ my_id = omp_get_thread_num()
1268
1269!$ if (my_id < plan%num_threads_needed) then
1270
1271 fftw_plan = plan%fftw_plan
1272
1273 in_offset = 1
1274 out_offset = 1
1275 scal_offset = 1
1276
1277!$ in_offset = 1 + plan%num_rows*my_id*plan%n
1278!$ out_offset = 1 + plan%num_rows*my_id*plan%n
1279!$ IF (plan%fsign == +1 .AND. plan%trans) THEN
1280!$ in_offset = 1 + plan%num_rows*my_id
1281!$ ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1282!$ out_offset = 1 + plan%num_rows*my_id
1283!$ END IF
1284!$ scal_offset = 1 + plan%n*plan%num_rows*my_id
1285!$ IF (plan%need_alt_plan .AND. my_id .EQ. plan%num_threads_needed - 1) THEN
1286!$ num_rows = plan%alt_num_rows
1287!$ fftw_plan = plan%alt_fftw_plan
1288!$ ELSE
1289!$ num_rows = plan%num_rows
1290!$ END IF
1291
1292#if defined ( __FFTW3 )
1293!$OMP MASTER
1294 stat = 1
1295!$OMP END MASTER
1296 CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
1297!$ end if
1298! all theads need to meet at this barrier
1299!$OMP BARRIER
1300!$ if (my_id < plan%num_threads_needed) then
1301 IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
1302!$ end if
1303
1304#else
1305 mark_used(plan)
1306 mark_used(scale)
1307 !MARK_USED does not work with assumed size arguments
1308 IF (.false.) then; do; IF (abs(zin(1)) > abs(zout(1))) exit; END do; END IF
1309 stat = 0
1310
1311!$ else
1312!$ end if
1313
1314#endif
1315
1316!$OMP END PARALLEL
1317
1318 END SUBROUTINE fftw31dm
1319
1320 END MODULE
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
Utility routines to open and close files. Tracking of preconnections.
Definition cp_files.F:16
integer function, public get_unit_number(file_name)
Returns the first logical unit that is not preconnected.
Definition cp_files.F:237
logical function, public file_exists(file_name)
Checks if file exists, considering also the file discovery mechanism.
Definition cp_files.F:501
Defines the basic variable types.
Definition fft_kinds.F:13
integer, parameter, public dp
Definition fft_kinds.F:18
Type to store data about a (1D or 3D) FFT, including FFTW plan.
Definition fft_plan.F:18
subroutine, public fftw3_do_cleanup(wisdom_file, ionode)
...
Definition fftw3_lib.F:197
subroutine, public fftw3_destroy_plan(plan)
...
Definition fftw3_lib.F:1211
subroutine, public fftw3_create_plan_3d(plan, zin, zout, plan_style)
...
Definition fftw3_lib.F:773
subroutine, public fftw3_get_lengths(data, max_length)
...
Definition fftw3_lib.F:353
subroutine, public fftw33d(plan, scale, zin, zout, stat)
...
Definition fftw3_lib.F:1024
subroutine, public fftw31dm(plan, zin, zout, scale, stat)
...
Definition fftw3_lib.F:1247
subroutine, public fftw3_do_init(wisdom_file)
...
Definition fftw3_lib.F:239
subroutine, public fftw3_create_plan_1dm(plan, zin, zout, plan_style)
...
Definition fftw3_lib.F:1115