29 USE iso_c_binding,
ONLY: c_f_pointer,&
52 #include "../base/base_uses.f90"
56 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'fft_tools'
62 INTEGER :: nx = 0, ny = 0, nz = 0
63 INTEGER :: lmax = 0, mmax = 0, nmax = 0
64 INTEGER :: mx1 = 0, mx2 = 0, mx3 = 0
65 INTEGER :: my1 = 0, my2 = 0, my3 = 0
66 INTEGER :: mz1 = 0, mz2 = 0, mz3 = 0
67 INTEGER :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
68 INTEGER :: lg = 0, mg = 0
69 INTEGER :: nbx = 0, nbz = 0
70 INTEGER :: nmray = 0, nyzray = 0
71 TYPE(mp_comm_type) :: gs_group = mp_comm_type()
72 TYPE(mp_cart_type) :: rs_group = mp_cart_type()
73 INTEGER,
DIMENSION(2) :: g_pos = 0, r_pos = 0, r_dim = 0
74 INTEGER :: numtask = 0
78 INTEGER :: fft_scratch_id = 0
79 INTEGER :: tf_type = -1
80 LOGICAL :: in_use = .true.
81 TYPE(mp_comm_type) :: group = mp_comm_type()
82 INTEGER,
DIMENSION(3) :: nfft = -1
84 TYPE(mp_cart_type),
DIMENSION(2) :: cart_sub_comm = mp_cart_type()
85 INTEGER,
DIMENSION(2) :: dim = -1, pos = -1
87 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
POINTER,
CONTIGUOUS &
88 :: ziptr => null(), zoptr => null()
90 COMPLEX(KIND=dp),
DIMENSION(:, :),
CONTIGUOUS,
POINTER &
91 :: p1buf => null(), p2buf => null(), p3buf => null(), p4buf => null(), &
92 p5buf => null(), p6buf => null(), p7buf => null()
94 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS &
95 :: r1buf => null(), r2buf => null()
96 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
POINTER,
CONTIGUOUS &
99 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS &
100 :: a1buf => null(), a2buf => null(), a3buf => null(), &
101 a4buf => null(), a5buf => null(), a6buf => null()
103 INTEGER,
DIMENSION(:),
CONTIGUOUS,
POINTER :: scount => null(), rcount => null(), sdispl => null(), rdispl => null()
104 INTEGER,
DIMENSION(:, :),
CONTIGUOUS,
POINTER :: pgcube => null()
105 INTEGER,
DIMENSION(:),
CONTIGUOUS,
POINTER :: xzcount => null(), yzcount => null(), xzdispl => null(), yzdispl => null()
106 INTEGER :: in = 0, mip = -1
107 REAL(kind=
dp) :: rsratio = 1.0_dp
108 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER,
CONTIGUOUS &
109 :: xzbuf => null(), yzbuf => null()
110 COMPLEX(KIND=sp),
DIMENSION(:),
POINTER,
CONTIGUOUS &
111 :: xzbuf_sgl => null(), yzbuf_sgl => null()
112 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS &
113 :: rbuf1 => null(), rbuf2 => null(), rbuf3 => null(), rbuf4 => null(), &
114 rbuf5 => null(), rbuf6 => null(), rr => null()
115 COMPLEX(KIND=sp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS &
116 :: ss => null(), tt => null()
117 INTEGER,
DIMENSION(:, :),
POINTER,
CONTIGUOUS :: pgrid => null()
118 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: xcor => null(), zcor => null(), pzcoord => null()
120 TYPE(fft_plan_type),
DIMENSION(6) ::
fft_plan = fft_plan_type()
121 INTEGER :: last_tick = -1
139 PUBLIC :: fft_alloc, fft_dealloc
147 INTEGER,
PARAMETER :: fft_radix_allowed = 495, fft_radix_disallowed = 496
150 REAL(kind=
dp),
PARAMETER :: ratio_sparse_alltoall = 0.5_dp
154 LOGICAL,
SAVE :: alltoall_sgl = .false.
155 LOGICAL,
SAVE :: use_fftsg_sizes = .true.
166 MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
183 SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
186 CHARACTER(LEN=*),
INTENT(IN) :: fftlib
187 LOGICAL,
INTENT(IN) :: alltoall, fftsg_sizes
188 INTEGER,
INTENT(IN) :: pool_limit
189 CHARACTER(LEN=*),
INTENT(IN) :: wisdom_file
190 INTEGER,
INTENT(IN) :: plan_style
192 use_fftsg_sizes = fftsg_sizes
193 alltoall_sgl = alltoall
198 IF (
fft_type <= 0) cpabort(
"Unknown FFT library: "//trim(fftlib))
203 CALL release_fft_scratch_pool()
204 CALL init_fft_scratch_pool()
216 CLASS(mp_comm_type) :: para_env
217 CHARACTER(LEN=*),
INTENT(IN) :: wisdom_file
221 CALL release_fft_scratch_pool()
240 INTEGER,
INTENT(IN) :: radix_in
241 INTEGER,
INTENT(OUT) :: radix_out
242 INTEGER,
INTENT(IN) :: operation
244 INTEGER,
PARAMETER :: fft_type_sg = 1
246 INTEGER :: i, iloc, ldata
247 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data
250 ALLOCATE (
DATA(ldata))
254 IF (use_fftsg_sizes)
THEN
262 IF (
DATA(i) == radix_in)
THEN
266 IF (operation == fft_radix_allowed)
THEN
268 ELSE IF (
DATA(i) > radix_in)
THEN
276 cpabort(
"Index to radix array not found.")
279 IF (operation == fft_radix_allowed)
THEN
280 IF (
DATA(iloc) == radix_in)
THEN
281 radix_out = fft_radix_allowed
283 radix_out = fft_radix_disallowed
287 IF (
DATA(iloc) == radix_in)
THEN
288 radix_out =
DATA(iloc)
290 IF (abs(
DATA(iloc - 1) - radix_in) <= &
291 abs(
DATA(iloc) - radix_in))
THEN
292 radix_out =
DATA(iloc - 1)
294 radix_out =
DATA(iloc)
299 radix_out =
DATA(iloc)
303 IF (mod(
DATA(i), 2) == 1)
THEN
308 IF (mod(radix_out, 2) == 0)
THEN
309 cpabort(
"No odd radix found.")
313 cpabort(
"Disallowed radix operation.")
330 SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
331 INTEGER,
INTENT(in) :: n, m
332 LOGICAL,
INTENT(in) :: trans
333 COMPLEX(kind=dp),
DIMENSION(*),
INTENT(inout) :: zin, zout
334 REAL(kind=
dp),
INTENT(in) :: scale
335 INTEGER,
INTENT(out) :: stat
337 CHARACTER(len=*),
PARAMETER :: routinen =
'fft_fw1d'
342 CALL timeset(routinen, handle)
349 CALL cp_warn(__location__, &
350 "FFT library in use cannot handle transformation of an arbitrary length.")
354 CALL timestop(handle)
370 SUBROUTINE fft3d_s(fsign, n, zin, zout, scale, status, debug)
372 INTEGER,
INTENT(IN) :: fsign
373 INTEGER,
DIMENSION(:),
INTENT(INOUT) :: n
374 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
376 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
377 INTENT(INOUT),
OPTIONAL,
TARGET :: zout
378 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: scale
379 INTEGER,
INTENT(OUT),
OPTIONAL :: status
380 LOGICAL,
INTENT(IN),
OPTIONAL :: debug
382 CHARACTER(len=*),
PARAMETER :: routinen =
'fft3d_s'
384 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
386 COMPLEX(KIND=dp),
DIMENSION(1, 1, 1),
TARGET :: zdum
387 INTEGER :: handle, ld(3), lo(3), output_unit, sign, &
389 LOGICAL :: fft_in_place, test
390 REAL(kind=
dp) :: in_sum, norm, out_sum
393 CALL timeset(routinen, handle)
396 IF (
PRESENT(scale))
THEN
402 IF (
PRESENT(debug))
THEN
408 IF (
PRESENT(zout))
THEN
409 fft_in_place = .false.
411 fft_in_place = .true.
415 in_sum = sum(abs(zin))
422 IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3))
THEN
423 cpabort(
"Size and dimension (zin) have to be the same.")
429 IF (fft_in_place)
THEN
431 IF (fsign ==
fwfft)
THEN
432 CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
434 CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
437 IF (fsign ==
fwfft)
THEN
438 CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
440 CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
446 IF (
PRESENT(zout))
THEN
447 lo(1) =
SIZE(zout, 1)
448 lo(2) =
SIZE(zout, 2)
449 lo(3) =
SIZE(zout, 3)
450 IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3))
THEN
451 cpabort(
"Size and dimension (zout) have to be the same.")
455 IF (
PRESENT(status))
THEN
459 IF (test .AND. output_unit > 0)
THEN
460 IF (
PRESENT(zout))
THEN
461 out_sum = sum(abs(zout))
462 WRITE (output_unit,
'(A)')
" Out of place 3D FFT (local) : fft3d_s"
463 WRITE (output_unit,
'(A,T60,3I7)')
" Transform lengths ", n
464 WRITE (output_unit,
'(A,T60,3I7)')
" Input array dimensions ", ld
465 WRITE (output_unit,
'(A,T60,3I7)')
" Output array dimensions ", lo
466 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of input data ", in_sum
467 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of output data ", out_sum
469 out_sum = sum(abs(zin))
470 WRITE (output_unit,
'(A)')
" In place 3D FFT (local) : fft3d_s"
471 WRITE (output_unit,
'(A,T60,3I7)')
" Transform lengths ", n
472 WRITE (output_unit,
'(A,T60,3I7)')
" Input/output array dimensions ", ld
473 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of input data ", in_sum
474 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of output data ", out_sum
478 CALL timestop(handle)
480 END SUBROUTINE fft3d_s
497 SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, &
498 bo, scale, status, debug)
500 INTEGER,
INTENT(IN) :: fsign
501 INTEGER,
DIMENSION(:),
INTENT(IN) :: n
502 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
504 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
506 TYPE(mp_comm_type),
INTENT(IN) :: gs_group
507 TYPE(mp_cart_type),
INTENT(IN) :: rs_group
508 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
510 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nyzray
511 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:, :), &
513 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: scale
514 INTEGER,
INTENT(OUT),
OPTIONAL :: status
515 LOGICAL,
INTENT(IN),
OPTIONAL :: debug
517 CHARACTER(len=*),
PARAMETER :: routinen =
'fft3d_ps'
519 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
520 POINTER :: pbuf, qbuf, rbuf, sbuf
521 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
523 INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
524 nmax, numtask, numtask_g, numtask_r, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, &
526 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: p2p
528 REAL(kind=
dp) :: norm, sum_data
532 CALL timeset(routinen, handle)
535 IF (
PRESENT(debug))
THEN
541 numtask_g = gs_group%num_pe
542 g_pos = gs_group%mepos
543 numtask_r = rs_group%num_pe
544 r_dim = rs_group%num_pe_cart
545 r_pos = rs_group%mepos_cart
546 IF (numtask_g /= numtask_r)
THEN
547 cpabort(
"Real space and G space groups are different.")
551 IF (
PRESENT(scale))
THEN
571 lmax = max(lg, (nx*ny*nz)/mmax + 1)
573 ALLOCATE (p2p(0:numtask - 1))
575 CALL gs_group%rank_compare(rs_group, p2p)
578 mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
579 my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
580 mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
581 mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
583 n1 = maxval(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
584 n2 = maxval(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
585 nmax = max((2*n2)/numtask, 2)*mx2*mz2
586 nmax = max(nmax, n1*maxval(nyzray))
587 n1 = maxval(bo(2, 1, :, 2))
588 n2 = maxval(bo(2, 3, :, 2))
590 fft_scratch_size%nx = nx
591 fft_scratch_size%ny = ny
592 fft_scratch_size%nz = nz
593 fft_scratch_size%lmax = lmax
594 fft_scratch_size%mmax = mmax
595 fft_scratch_size%mx1 = mx1
596 fft_scratch_size%mx2 = mx2
597 fft_scratch_size%my1 = my1
598 fft_scratch_size%mz2 = mz2
599 fft_scratch_size%lg = lg
600 fft_scratch_size%mg = mg
601 fft_scratch_size%nbx = n1
602 fft_scratch_size%nbz = n2
603 mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
604 mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
605 mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
606 fft_scratch_size%mcz1 = mcz1
607 fft_scratch_size%mcx2 = mcx2
608 fft_scratch_size%mcz2 = mcz2
609 fft_scratch_size%nmax = nmax
610 fft_scratch_size%nmray = maxval(nyzray)
611 fft_scratch_size%nyzray = nyzray(g_pos)
612 fft_scratch_size%gs_group = gs_group
613 fft_scratch_size%rs_group = rs_group
614 fft_scratch_size%g_pos = g_pos
615 fft_scratch_size%r_pos = r_pos
616 fft_scratch_size%r_dim = r_dim
617 fft_scratch_size%numtask = numtask
620 IF (g_pos == 0 .AND. output_unit > 0)
THEN
621 WRITE (output_unit,
'(A)')
" Parallel 3D FFT : fft3d_ps"
622 WRITE (output_unit,
'(A,T60,3I7)')
" Transform lengths ", n
623 WRITE (output_unit,
'(A,T67,2I7)')
" Array dimensions (gin) ", lg, mg
624 WRITE (output_unit,
'(A,T60,3I7)')
" Array dimensions (cin) ", nx, ny, nz
628 IF (r_dim(2) > 1)
THEN
635 IF (r_dim(1) == 1)
THEN
636 cpabort(
"This processor distribution is not supported.")
638 CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
640 IF (sign ==
fwfft)
THEN
644 sum_data = abs(sum(cin))
645 CALL gs_group%sum(sum_data)
646 IF (g_pos == 0 .AND. output_unit > 0)
THEN
647 WRITE (output_unit,
'(A)')
" Two step communication algorithm "
648 WRITE (output_unit,
'(A,T60,3I7)')
" Transform Z ", n(3), mx1*my1
649 WRITE (output_unit,
'(A,T60,3I7)')
" Transform Y ", n(2), mx2*mz2
650 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), nyzray(g_pos)
651 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
655 pbuf => fft_scratch%p1buf
656 qbuf => fft_scratch%p2buf
659 CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
661 rbuf => fft_scratch%p3buf
664 sum_data = abs(sum(qbuf))
665 CALL gs_group%sum(sum_data)
666 IF (g_pos == 0 .AND. output_unit > 0)
THEN
667 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
672 CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
675 sum_data = abs(sum(rbuf))
676 CALL gs_group%sum(sum_data)
677 IF (g_pos == 0 .AND. output_unit > 0)
THEN
678 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) T", sum_data
682 pbuf => fft_scratch%p4buf
685 CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
687 qbuf => fft_scratch%p5buf
690 sum_data = abs(sum(pbuf))
691 CALL gs_group%sum(sum_data)
692 IF (g_pos == 0 .AND. output_unit > 0)
THEN
693 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) TS", sum_data
698 CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
699 bo(:, :, :, 2), qbuf, fft_scratch)
702 sum_data = abs(sum(qbuf))
703 CALL gs_group%sum(sum_data)
704 IF (g_pos == 0 .AND. output_unit > 0)
THEN
705 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(5) TS", sum_data
710 CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
713 sum_data = abs(sum(gin))
714 CALL gs_group%sum(sum_data)
715 IF (g_pos == 0 .AND. output_unit > 0)
THEN
716 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(6) ", sum_data
720 ELSE IF (sign ==
bwfft)
THEN
724 sum_data = abs(sum(gin))
725 CALL gs_group%sum(sum_data)
726 IF (g_pos == 0 .AND. output_unit > 0)
THEN
727 WRITE (output_unit,
'(A)')
" Two step communication algorithm "
728 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), nyzray(g_pos)
729 WRITE (output_unit,
'(A,T60,3I7)')
" Transform Y ", n(2), mx2*mz2
730 WRITE (output_unit,
'(A,T60,3I7)')
" Transform Z ", n(3), mx1*my1
731 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
735 pbuf => fft_scratch%p7buf
738 CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
740 qbuf => fft_scratch%p4buf
743 sum_data = abs(sum(pbuf))
744 CALL gs_group%sum(sum_data)
745 IF (g_pos == 0 .AND. output_unit > 0)
THEN
746 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) TS", sum_data
751 CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
752 bo(:, :, :, 2), qbuf, fft_scratch)
755 sum_data = abs(sum(qbuf))
756 CALL gs_group%sum(sum_data)
757 IF (g_pos == 0 .AND. output_unit > 0)
THEN
758 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) TS", sum_data
762 rbuf => fft_scratch%p3buf
765 CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
767 pbuf => fft_scratch%p2buf
770 sum_data = abs(sum(rbuf))
771 CALL gs_group%sum(sum_data)
772 IF (g_pos == 0 .AND. output_unit > 0)
THEN
773 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) T", sum_data
778 CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
781 sum_data = abs(sum(pbuf))
782 CALL gs_group%sum(sum_data)
783 IF (g_pos == 0 .AND. output_unit > 0)
THEN
784 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(5) T", sum_data
788 qbuf => fft_scratch%p1buf
791 CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
794 sum_data = abs(sum(cin))
795 CALL gs_group%sum(sum_data)
796 IF (g_pos == 0 .AND. output_unit > 0)
THEN
797 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(6) ", sum_data
803 cpabort(
"Illegal fsign parameter.")
817 CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
819 sbuf => fft_scratch%r1buf
820 tbuf => fft_scratch%tbuf
825 IF (sign ==
fwfft)
THEN
829 sum_data = abs(sum(cin))
830 CALL gs_group%sum(sum_data)
831 IF (g_pos == 0 .AND. output_unit > 0)
THEN
832 WRITE (output_unit,
'(A)')
" One step communication algorithm "
833 WRITE (output_unit,
'(A,T60,3I7)')
" Transform YZ ", n(2), n(3), nx
834 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), nyzray(g_pos)
835 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
840 CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
841 CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
844 sum_data = abs(sum(tbuf))
845 CALL gs_group%sum(sum_data)
846 IF (g_pos == 0 .AND. output_unit > 0)
THEN
847 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) TS", sum_data
852 CALL yz_to_x(tbuf, gs_group, g_pos, p2p, yzp, nyzray, &
853 bo(:, :, :, 2), sbuf, fft_scratch)
856 sum_data = abs(sum(sbuf))
857 CALL gs_group%sum(sum_data)
858 IF (g_pos == 0 .AND. output_unit > 0)
THEN
859 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) TS", sum_data
863 CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
866 sum_data = abs(sum(gin))
867 CALL gs_group%sum(sum_data)
868 IF (g_pos == 0 .AND. output_unit > 0)
THEN
869 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) ", sum_data
873 ELSE IF (sign ==
bwfft)
THEN
877 sum_data = abs(sum(gin))
878 CALL gs_group%sum(sum_data)
879 IF (g_pos == 0 .AND. output_unit > 0)
THEN
880 WRITE (output_unit,
'(A)')
" One step communication algorithm "
881 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), nyzray(g_pos)
882 WRITE (output_unit,
'(A,T60,3I7)')
" Transform YZ ", n(2), n(3), nx
883 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
888 CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
891 sum_data = abs(sum(sbuf))
892 CALL gs_group%sum(sum_data)
893 IF (g_pos == 0 .AND. output_unit > 0)
THEN
894 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) TS", sum_data
899 CALL x_to_yz(sbuf, gs_group, g_pos, p2p, yzp, nyzray, &
900 bo(:, :, :, 2), tbuf, fft_scratch)
903 sum_data = abs(sum(tbuf))
904 CALL gs_group%sum(sum_data)
905 IF (g_pos == 0 .AND. output_unit > 0)
THEN
906 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) TS", sum_data
911 CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
912 CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
915 sum_data = abs(sum(cin))
916 CALL gs_group%sum(sum_data)
917 IF (g_pos == 0 .AND. output_unit > 0)
THEN
918 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) ", sum_data
922 cpabort(
"Illegal fsign parameter.")
931 IF (
PRESENT(status))
THEN
934 CALL timestop(handle)
936 END SUBROUTINE fft3d_ps
950 SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, scale, status, debug)
952 INTEGER,
INTENT(IN) :: fsign
953 INTEGER,
DIMENSION(3),
INTENT(IN) :: n
954 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
956 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
958 TYPE(mp_cart_type),
INTENT(IN) :: group
959 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:, :), &
961 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: scale
962 INTEGER,
INTENT(OUT),
OPTIONAL :: status
963 LOGICAL,
INTENT(IN),
OPTIONAL :: debug
965 CHARACTER(len=*),
PARAMETER :: routinen =
'fft3d_pb'
967 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
968 POINTER :: abuf, bbuf
969 INTEGER :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
970 mcz2, mx1, mx2, mx3, my1, my2, my3, &
971 my_pos, mz1, mz2, mz3, output_unit, &
973 INTEGER,
DIMENSION(2) :: dim
975 REAL(kind=
dp) :: norm, sum_data
999 CALL timeset(routinen, handle)
1002 dim = group%num_pe_cart
1003 my_pos = group%mepos
1005 IF (
PRESENT(debug))
THEN
1011 IF (
PRESENT(scale))
THEN
1020 lg(1) =
SIZE(gin, 1)
1021 lg(2) =
SIZE(gin, 2)
1022 lz(1) =
SIZE(zin, 1)
1023 lz(2) =
SIZE(zin, 2)
1024 lz(3) =
SIZE(zin, 3)
1025 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1026 WRITE (output_unit,
'(A)')
" Parallel 3D FFT : fft3d_pb"
1027 WRITE (output_unit,
'(A,T60,3I7)')
" Transform lengths ", n
1028 WRITE (output_unit,
'(A,T67,2I7)')
" Array dimensions (gin) ", lg
1029 WRITE (output_unit,
'(A,T60,3I7)')
" Array dimensions (cin) ", lz
1033 mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
1034 my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
1035 mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
1036 mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
1037 my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
1038 mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
1039 mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
1040 my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
1041 mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
1042 fft_scratch_size%mx1 = mx1
1043 fft_scratch_size%mx2 = mx2
1044 fft_scratch_size%mx3 = mx3
1045 fft_scratch_size%my1 = my1
1046 fft_scratch_size%my2 = my2
1047 fft_scratch_size%my3 = my3
1048 fft_scratch_size%mz1 = mz1
1049 fft_scratch_size%mz2 = mz2
1050 fft_scratch_size%mz3 = mz3
1051 mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
1052 mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
1053 mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
1054 mcy3 = maxval(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
1055 fft_scratch_size%mcz1 = mcz1
1056 fft_scratch_size%mcx2 = mcx2
1057 fft_scratch_size%mcz2 = mcz2
1058 fft_scratch_size%mcy3 = mcy3
1059 fft_scratch_size%gs_group = group
1060 fft_scratch_size%rs_group = group
1061 fft_scratch_size%g_pos = my_pos
1062 fft_scratch_size%numtask = dim(1)*dim(2)
1064 IF (dim(1) > 1 .AND. dim(2) > 1)
THEN
1070 CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
1072 IF (sign ==
fwfft)
THEN
1075 bbuf => fft_scratch%a2buf
1078 sum_data = abs(sum(zin))
1079 CALL group%sum(sum_data)
1080 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1081 WRITE (output_unit,
'(A)')
" Two step communication algorithm "
1082 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Z ", n(3), mx1*my1
1083 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
1088 CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
1090 abuf => fft_scratch%a3buf
1093 sum_data = abs(sum(bbuf))
1094 CALL group%sum(sum_data)
1095 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1096 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
1100 CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
1102 bbuf => fft_scratch%a4buf
1105 sum_data = abs(sum(abuf))
1106 CALL group%sum(sum_data)
1107 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1108 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Y ", n(2), mx2*mz2
1109 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) ", sum_data
1114 CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1116 abuf => fft_scratch%a5buf
1119 sum_data = abs(sum(bbuf))
1120 CALL group%sum(sum_data)
1121 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1122 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) T", sum_data
1126 CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
1129 sum_data = abs(sum(abuf))
1130 CALL group%sum(sum_data)
1131 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1132 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), my3*mz3
1133 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(5) ", sum_data
1138 CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1141 sum_data = abs(sum(gin))
1142 CALL group%sum(sum_data)
1143 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1144 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(6) ", sum_data
1148 ELSEIF (sign ==
bwfft)
THEN
1151 bbuf => fft_scratch%a5buf
1154 sum_data = abs(sum(gin))
1155 CALL group%sum(sum_data)
1156 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1157 WRITE (output_unit,
'(A)')
" Two step communication algorithm "
1158 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), my3*mz3
1159 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
1164 CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1166 abuf => fft_scratch%a4buf
1169 sum_data = abs(sum(bbuf))
1170 CALL group%sum(sum_data)
1171 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1172 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
1176 CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
1178 bbuf => fft_scratch%a3buf
1181 sum_data = abs(sum(abuf))
1182 CALL group%sum(sum_data)
1183 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1184 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Y ", n(2), mx2*mz2
1185 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) ", sum_data
1190 CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1192 abuf => fft_scratch%a2buf
1195 sum_data = abs(sum(bbuf))
1196 CALL group%sum(sum_data)
1197 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1198 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) T", sum_data
1202 CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
1205 sum_data = abs(sum(abuf))
1206 CALL group%sum(sum_data)
1207 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1208 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Z ", n(3), mx1*my1
1209 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(5) ", sum_data
1214 CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
1217 sum_data = abs(sum(zin))
1218 CALL group%sum(sum_data)
1219 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1220 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(6) ", sum_data
1225 cpabort(
"Illegal fsign parameter.")
1230 ELSEIF (dim(2) == 1)
THEN
1236 CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
1238 IF (sign ==
fwfft)
THEN
1242 sum_data = abs(sum(zin))
1243 CALL group%sum(sum_data)
1244 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1245 WRITE (output_unit,
'(A)')
" one step communication algorithm "
1246 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Z ", n(3), mx1*my1
1247 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Y ", n(2), mx1*mz1
1248 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
1252 abuf => fft_scratch%a3buf
1253 bbuf => fft_scratch%a4buf
1255 CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
1256 CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1258 abuf => fft_scratch%a5buf
1261 sum_data = abs(sum(bbuf))
1262 CALL group%sum(sum_data)
1263 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1264 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
1268 CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
1271 sum_data = abs(sum(abuf))
1272 CALL group%sum(sum_data)
1273 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1274 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), my3*mz3
1275 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) ", sum_data
1280 CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1283 sum_data = abs(sum(gin))
1284 CALL group%sum(sum_data)
1285 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1286 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) ", sum_data
1290 ELSEIF (sign ==
bwfft)
THEN
1294 sum_data = abs(sum(gin))
1295 CALL group%sum(sum_data)
1296 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1297 WRITE (output_unit,
'(A)')
" one step communication algorithm "
1298 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), my3*mz3
1299 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
1303 bbuf => fft_scratch%a5buf
1306 CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1308 abuf => fft_scratch%a4buf
1311 sum_data = abs(sum(bbuf))
1312 CALL group%sum(sum_data)
1313 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1314 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
1318 CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
1320 bbuf => fft_scratch%a3buf
1323 sum_data = abs(sum(abuf))
1324 CALL group%sum(sum_data)
1325 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1326 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Y ", n(2), mx1*mz1
1327 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Z ", n(3), mx1*my1
1328 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) ", sum_data
1333 CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1336 CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
1339 sum_data = abs(sum(zin))
1340 CALL group%sum(sum_data)
1341 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1342 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) ", sum_data
1347 cpabort(
"Illegal fsign parameter.")
1354 cpabort(
"This partition not implemented.")
1358 IF (
PRESENT(status))
THEN
1362 CALL timestop(handle)
1364 END SUBROUTINE fft3d_pb
1381 SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1383 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1385 TYPE(mp_comm_type),
INTENT(IN) :: group
1386 INTEGER,
INTENT(IN) :: my_pos
1387 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: p2p
1388 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
1390 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nray
1391 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
1393 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
1397 CHARACTER(len=*),
PARAMETER :: routinen =
'x_to_yz'
1399 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1401 COMPLEX(KIND=sp),
CONTIGUOUS,
DIMENSION(:, :), &
1403 INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1405 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
1407 CALL timeset(routinen, handle)
1410 scount => fft_scratch%scount
1411 rcount => fft_scratch%rcount
1412 sdispl => fft_scratch%sdispl
1413 rdispl => fft_scratch%rdispl
1415 IF (alltoall_sgl)
THEN
1416 ss => fft_scratch%ss
1417 tt => fft_scratch%tt
1418 ss(:, :) = cmplx(sb(:, :), kind=
sp)
1421 rr => fft_scratch%rr
1425 nm = maxval(nray(0:np - 1))
1432 nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1434 sdispl(ip) = nr*(bo(1, 1, ix) - 1)
1437 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1444 rdispl(ip) = nm*nx*ip
1447 IF (alltoall_sgl)
THEN
1448 CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
1450 CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
1453 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1459 ixx = nray(ip)*(ix - 1)
1460 IF (alltoall_sgl)
THEN
1464 tb(iy, iz, ix) = tt(ir + ixx, ip)
1470 tb(iy, iz, ix) = rr(ir + ixx, ip)
1477 CALL timestop(handle)
1496 SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
1498 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
1500 TYPE(mp_comm_type),
INTENT(IN) :: group
1501 INTEGER,
INTENT(IN) :: my_pos
1502 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: p2p
1503 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
1505 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nray
1506 INTEGER,
DIMENSION(:, :, 0:),
INTENT(IN) :: bo
1507 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1511 CHARACTER(len=*),
PARAMETER :: routinen =
'yz_to_x'
1513 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1515 COMPLEX(KIND=sp),
CONTIGUOUS,
DIMENSION(:, :), &
1517 INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1519 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
1521 CALL timeset(routinen, handle)
1525 scount => fft_scratch%scount
1526 rcount => fft_scratch%rcount
1527 sdispl => fft_scratch%sdispl
1528 rdispl => fft_scratch%rdispl
1530 IF (alltoall_sgl)
THEN
1531 ss => fft_scratch%ss
1532 tt => fft_scratch%tt
1536 rr => fft_scratch%rr
1539 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1545 ixx = nray(ip)*(ix - 1)
1546 IF (alltoall_sgl)
THEN
1550 tt(ir + ixx, ip) = cmplx(tb(iy, iz, ix), kind=
sp)
1556 rr(ir + ixx, ip) = tb(iy, iz, ix)
1562 nm = maxval(nray(0:np - 1))
1569 nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1571 rdispl(ip) = nr*(bo(1, 1, ix) - 1)
1574 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1581 sdispl(ip) = nm*nx*ip
1585 IF (alltoall_sgl)
THEN
1586 CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
1589 CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
1592 CALL timestop(handle)
1612 SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1614 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1617 CLASS(mp_comm_type),
INTENT(IN) :: group
1618 INTEGER,
DIMENSION(2),
INTENT(IN) :: dims
1619 INTEGER,
INTENT(IN) :: my_pos
1620 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: p2p
1621 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: yzp
1622 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nray
1623 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: bo
1624 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT),
CONTIGUOUS :: tb
1627 CHARACTER(len=*),
PARAMETER :: routinen =
'yz_to_xz'
1629 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER,
CONTIGUOUS :: xzbuf, yzbuf
1630 COMPLEX(KIND=sp),
DIMENSION(:),
POINTER,
CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1631 INTEGER :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
1632 jj, jx, jy, jz, myx, myz, np, npx, &
1634 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1636 INTEGER,
DIMENSION(:, :),
CONTIGUOUS,
POINTER :: pgrid
1638 CALL timeset(routinen, handle)
1642 rs_pos = p2p(my_pos)
1644 IF (alltoall_sgl)
THEN
1645 yzbuf_sgl => fft_scratch%yzbuf_sgl
1646 xzbuf_sgl => fft_scratch%xzbuf_sgl
1648 yzbuf => fft_scratch%yzbuf
1649 xzbuf => fft_scratch%xzbuf
1653 pgrid => fft_scratch%pgrid
1654 xcor => fft_scratch%xcor
1655 zcor => fft_scratch%zcor
1656 pzcoord => fft_scratch%pzcoord
1657 scount => fft_scratch%scount
1658 rcount => fft_scratch%rcount
1659 sdispl => fft_scratch%sdispl
1660 rdispl => fft_scratch%rdispl
1666 IF (fft_scratch%in == 0)
THEN
1672 xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1676 zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1679 IF (alltoall_sgl)
THEN
1680 DO ir = 1, nray(my_pos)
1681 jy = yzp(1, ir, my_pos)
1682 jz = yzp(2, ir, my_pos)
1683 ip = pgrid(xcor(jx), zcor(jz))
1684 scount(ip) = scount(ip) + 1
1687 DO ir = 1, nray(my_pos)
1688 jy = yzp(1, ir, my_pos)
1689 jz = yzp(2, ir, my_pos)
1690 ip = pgrid(xcor(jx), zcor(jz))
1691 scount(ip) = scount(ip) + 1
1696 CALL group%alltoall(scount, rcount, 1)
1697 fft_scratch%yzcount = scount
1698 fft_scratch%xzcount = rcount
1704 sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1705 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1708 fft_scratch%yzdispl = sdispl
1709 fft_scratch%xzdispl = rdispl
1713 IF (scount(ip) /= 0) icrs = icrs + 1
1714 IF (rcount(ip) /= 0) icrs = icrs + 1
1716 CALL group%sum(icrs)
1717 fft_scratch%rsratio = real(icrs, kind=
dp)/(real(2*np, kind=
dp)*real(np, kind=
dp))
1721 scount = fft_scratch%yzcount
1722 rcount = fft_scratch%xzcount
1723 sdispl = fft_scratch%yzdispl
1724 rdispl = fft_scratch%xzdispl
1734 IF (scount(ip) == 0) cycle
1737 nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1738 DO ir = 1, nray(my_pos)
1739 jz = yzp(2, ir, my_pos)
1740 IF (zcor(jz) == pzcoord(ipl))
THEN
1742 jy = yzp(1, ir, my_pos)
1743 IF (alltoall_sgl)
THEN
1745 yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = cmplx(sb(ir, jx + bo(1, 1, ipl)), kind=
sp)
1749 yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
1757 IF (alltoall_sgl)
THEN
1758 CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
1760 IF (fft_scratch%rsratio < ratio_sparse_alltoall)
THEN
1761 CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
1763 CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
1767 myx = fft_scratch%sizes%r_pos(1)
1768 myz = fft_scratch%sizes%r_pos(2)
1769 nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
1779 DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
1782 IF (alltoall_sgl)
THEN
1783 IF (zcor(jz) == myz)
THEN
1786 jz = jz - bo(1, 3, rs_pos) + 1
1787 tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
1790 IF (zcor(jz) == myz)
THEN
1793 jz = jz - bo(1, 3, rs_pos) + 1
1794 tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
1802 CALL timestop(handle)
1822 SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1824 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1827 CLASS(mp_comm_type),
INTENT(IN) :: group
1828 INTEGER,
DIMENSION(2),
INTENT(IN) :: dims
1829 INTEGER,
INTENT(IN) :: my_pos
1830 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: p2p
1831 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: yzp
1832 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nray
1833 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: bo
1834 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT),
CONTIGUOUS :: tb
1837 CHARACTER(len=*),
PARAMETER :: routinen =
'xz_to_yz'
1839 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER,
CONTIGUOUS :: xzbuf, yzbuf
1840 COMPLEX(KIND=sp),
DIMENSION(:),
POINTER,
CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1841 INTEGER :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
1842 jj, jx, jy, jz, mp, myx, myz, np, npx, &
1844 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1846 INTEGER,
DIMENSION(:, :),
CONTIGUOUS,
POINTER :: pgrid
1848 CALL timeset(routinen, handle)
1852 IF (alltoall_sgl)
THEN
1853 yzbuf_sgl => fft_scratch%yzbuf_sgl
1854 xzbuf_sgl => fft_scratch%xzbuf_sgl
1856 yzbuf => fft_scratch%yzbuf
1857 xzbuf => fft_scratch%xzbuf
1861 pgrid => fft_scratch%pgrid
1862 xcor => fft_scratch%xcor
1863 zcor => fft_scratch%zcor
1864 pzcoord => fft_scratch%pzcoord
1865 scount => fft_scratch%scount
1866 rcount => fft_scratch%rcount
1867 sdispl => fft_scratch%sdispl
1868 rdispl => fft_scratch%rdispl
1872 IF (fft_scratch%in == 0)
THEN
1875 nx = maxval(bo(2, 1, :))
1879 xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1883 zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1886 DO ir = 1, nray(my_pos)
1887 jy = yzp(1, ir, my_pos)
1888 jz = yzp(2, ir, my_pos)
1889 ip = pgrid(xcor(jx), zcor(jz))
1890 rcount(ip) = rcount(ip) + 1
1894 CALL group%alltoall(rcount, scount, 1)
1895 fft_scratch%xzcount = scount
1896 fft_scratch%yzcount = rcount
1902 sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1903 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1906 fft_scratch%xzdispl = sdispl
1907 fft_scratch%yzdispl = rdispl
1911 IF (scount(ip) /= 0) icrs = icrs + 1
1912 IF (rcount(ip) /= 0) icrs = icrs + 1
1914 CALL group%sum(icrs)
1915 fft_scratch%rsratio = real(icrs, kind=
dp)/(real(2*np, kind=
dp)*real(np, kind=
dp))
1919 scount = fft_scratch%xzcount
1920 rcount = fft_scratch%yzcount
1921 sdispl = fft_scratch%xzdispl
1922 rdispl = fft_scratch%yzdispl
1926 myx = fft_scratch%sizes%r_pos(1)
1927 myz = fft_scratch%sizes%r_pos(2)
1929 nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
1930 nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
1942 IF (zcor(jz) == myz)
THEN
1945 jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
1946 IF (alltoall_sgl)
THEN
1948 ixx = jj + jx*scount(ipl)/nx
1949 xzbuf_sgl(ixx + sdispl(ipl)) = cmplx(sb(jy, jz + jx*nz), kind=
sp)
1953 ixx = jj + jx*scount(ipl)/nx
1954 xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
1962 IF (alltoall_sgl)
THEN
1963 CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
1965 IF (fft_scratch%rsratio < ratio_sparse_alltoall)
THEN
1966 CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
1968 CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
1978 IF (rcount(ip) == 0) cycle
1981 nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1982 DO ir = 1, nray(my_pos)
1983 jz = yzp(2, ir, my_pos)
1984 IF (zcor(jz) == pzcoord(ipl))
THEN
1986 jy = yzp(1, ir, my_pos)
1987 IF (alltoall_sgl)
THEN
1989 tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
1993 tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
2001 CALL timestop(handle)
2018 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2020 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
2021 INTENT(IN) :: boin, boout
2022 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2026 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_1'
2028 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2030 INTEGER :: handle, ip, ipl, ir, is, ixy, iz, mip, &
2032 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
2033 INTEGER,
CONTIGUOUS,
DIMENSION(:, :),
POINTER :: pgrid
2034 INTEGER,
DIMENSION(2) :: dim, pos
2036 CALL timeset(routinen, handle)
2038 mip = fft_scratch%mip
2039 dim = fft_scratch%dim
2040 pos = fft_scratch%pos
2041 scount => fft_scratch%scount
2042 rcount => fft_scratch%rcount
2043 sdispl => fft_scratch%sdispl
2044 rdispl => fft_scratch%rdispl
2045 pgrid => fft_scratch%pgcube
2048 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2049 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2056 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2057 scount(ip) = nx*nz*ny
2058 sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
2061 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2062 mz = maxval(boin(2, 3, :) - boin(1, 3, :) + 1)
2068 nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2069 rcount(ip) = nx*nz*ny
2070 rdispl(ip) = nx*ny*mz*ip
2074 rbuf => fft_scratch%rbuf1
2076 CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2084 nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2086 is = boin(1, 3, ipl) + iz - 1
2087 ir = iz + nz*(ixy - 1)
2088 sout(is, ixy) = rbuf(ir, ip)
2094 CALL timestop(handle)
2108 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2110 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
2111 INTENT(IN) :: boin, boout
2112 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2116 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_2'
2118 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2120 INTEGER :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
2122 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
2123 INTEGER,
CONTIGUOUS,
DIMENSION(:, :),
POINTER :: pgrid
2124 INTEGER,
DIMENSION(2) :: dim, pos
2125 TYPE(mp_comm_type) :: sub_group
2127 CALL timeset(routinen, handle)
2129 sub_group = fft_scratch%cart_sub_comm(2)
2130 mip = fft_scratch%mip
2131 dim = fft_scratch%dim
2132 pos = fft_scratch%pos
2133 scount => fft_scratch%scount
2134 rcount => fft_scratch%rcount
2135 sdispl => fft_scratch%sdispl
2136 rdispl => fft_scratch%rdispl
2137 pgrid => fft_scratch%pgcube
2140 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2141 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2142 mz = maxval(boout(2, 3, :) - boout(1, 3, :) + 1)
2144 rbuf => fft_scratch%rbuf2
2153 nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2154 DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
2155 ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
2156 rbuf(ir, ip) = cin(iz, ixy)
2164 nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2165 scount(ip) = nx*ny*nz
2166 sdispl(ip) = nx*ny*mz*ip
2170 nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
2176 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2177 rcount(ip) = nx*ny*nz
2178 rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
2182 CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2184 CALL timestop(handle)
2196 SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
2198 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2200 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
2201 INTENT(IN) :: boin, boout
2202 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2206 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_3'
2208 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2210 INTEGER :: handle, ip, ipl, ir, is, ixz, iy, lb, &
2211 mip, my, my_id, np, num_threads, nx, &
2213 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
2214 INTEGER,
CONTIGUOUS,
DIMENSION(:, :),
POINTER :: pgrid
2215 INTEGER,
DIMENSION(2) :: dim, pos
2216 TYPE(mp_comm_type) :: sub_group
2218 CALL timeset(routinen, handle)
2220 sub_group = fft_scratch%cart_sub_comm(1)
2221 mip = fft_scratch%mip
2222 dim = fft_scratch%dim
2223 pos = fft_scratch%pos
2225 scount => fft_scratch%scount
2226 rcount => fft_scratch%rcount
2227 sdispl => fft_scratch%sdispl
2228 rdispl => fft_scratch%rdispl
2229 pgrid => fft_scratch%pgcube
2231 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2232 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2238 nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
2239 scount(ip) = nx*nz*ny
2240 sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
2243 nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2244 my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2250 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2251 rcount(ip) = nx*nz*ny
2252 rdispl(ip) = nx*my*nz*ip
2256 rbuf => fft_scratch%rbuf3
2264 IF (my_id < num_threads)
THEN
2265 lb = (
SIZE(rbuf, 2)*my_id)/num_threads
2266 ub = (
SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2267 rbuf(:, lb:ub) = 0.0_dp
2271 CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2279 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2281 is = boin(1, 2, ipl) + iy - 1
2282 ir = iy + ny*(ixz - 1)
2283 sout(is, ixz) = rbuf(ir, ip)
2289 CALL timestop(handle)
2291 END SUBROUTINE cube_transpose_3
2301 SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
2303 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2305 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
2306 INTENT(IN) :: boin, boout
2307 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2311 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_4'
2313 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2315 INTEGER :: handle, ip, ipl, ir, iy, izx, lb, mip, &
2316 my, my_id, np, num_threads, nx, ny, &
2318 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
2319 INTEGER,
CONTIGUOUS,
DIMENSION(:, :),
POINTER :: pgrid
2320 INTEGER,
DIMENSION(2) :: dim, pos
2321 TYPE(mp_comm_type) :: sub_group
2323 CALL timeset(routinen, handle)
2325 sub_group = fft_scratch%cart_sub_comm(1)
2326 mip = fft_scratch%mip
2327 dim = fft_scratch%dim
2328 pos = fft_scratch%pos
2330 scount => fft_scratch%scount
2331 rcount => fft_scratch%rcount
2332 sdispl => fft_scratch%sdispl
2333 rdispl => fft_scratch%rdispl
2334 pgrid => fft_scratch%pgcube
2336 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2337 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2338 my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2340 rbuf => fft_scratch%rbuf4
2348 IF (my_id < num_threads)
THEN
2349 lb = (
SIZE(rbuf, 2)*my_id)/num_threads
2350 ub = (
SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2351 rbuf(:, lb:ub) = 0.0_dp
2359 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2360 DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
2361 ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
2362 rbuf(ir, ip) = cin(iy, izx)
2370 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2371 scount(ip) = nx*ny*nz
2372 sdispl(ip) = nx*nz*my*ip
2376 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2382 nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
2383 rcount(ip) = nx*ny*nz
2384 rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
2388 CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2390 CALL timestop(handle)
2392 END SUBROUTINE cube_transpose_4
2403 SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
2405 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2408 CLASS(mp_comm_type),
INTENT(IN) :: group
2409 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: boin, boout
2410 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(OUT),
CONTIGUOUS :: sout
2413 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_5'
2415 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS :: rbuf
2416 INTEGER :: handle, ip, ir, is, ixz, iy, lb, mip, &
2417 my, my_id, np, num_threads, nx, ny, &
2419 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: rcount, rdispl, scount, sdispl
2421 CALL timeset(routinen, handle)
2423 np = fft_scratch%sizes%numtask
2424 mip = fft_scratch%mip
2425 scount => fft_scratch%scount
2426 rcount => fft_scratch%rcount
2427 sdispl => fft_scratch%sdispl
2428 rdispl => fft_scratch%rdispl
2430 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2431 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2436 nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
2437 scount(ip) = nx*nz*ny
2438 sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
2441 nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2442 my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2447 ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2448 rcount(ip) = nx*nz*ny
2449 rdispl(ip) = nx*my*nz*ip
2453 rbuf => fft_scratch%rbuf5
2461 IF (my_id < num_threads)
THEN
2462 lb = (
SIZE(rbuf, 2)*my_id)/num_threads
2463 ub = (
SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2464 rbuf(:, lb:ub) = 0.0_dp
2468 CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2475 ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2477 is = boin(1, 2, ip) + iy - 1
2478 ir = iy + ny*(ixz - 1)
2479 sout(is, ixz) = rbuf(ir, ip)
2485 CALL timestop(handle)
2487 END SUBROUTINE cube_transpose_5
2498 SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
2500 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2503 CLASS(mp_comm_type),
INTENT(IN) :: group
2504 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: boin, boout
2505 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(OUT),
CONTIGUOUS :: sout
2508 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_6'
2510 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS :: rbuf
2511 INTEGER :: handle, ip, ir, iy, izx, lb, mip, my, &
2512 my_id, np, num_threads, nx, ny, nz, ub
2513 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: rcount, rdispl, scount, sdispl
2515 CALL timeset(routinen, handle)
2517 np = fft_scratch%sizes%numtask
2518 mip = fft_scratch%mip
2519 scount => fft_scratch%scount
2520 rcount => fft_scratch%rcount
2521 sdispl => fft_scratch%sdispl
2522 rdispl => fft_scratch%rdispl
2524 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2525 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2526 my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2528 rbuf => fft_scratch%rbuf5
2536 IF (my_id < num_threads)
THEN
2537 lb = (
SIZE(rbuf, 2)*my_id)/num_threads
2538 ub = (
SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2539 rbuf(:, lb:ub) = 0.0_dp
2546 ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2547 DO iy = boout(1, 2, ip), boout(2, 2, ip)
2548 ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
2549 rbuf(ir, ip) = cin(iy, izx)
2556 ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2557 scount(ip) = nx*ny*nz
2558 sdispl(ip) = nx*nz*my*ip
2562 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2567 nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
2568 rcount(ip) = nx*ny*nz
2569 rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
2573 CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2575 CALL timestop(handle)
2577 END SUBROUTINE cube_transpose_6
2582 SUBROUTINE init_fft_scratch_pool()
2584 CALL release_fft_scratch_pool()
2594 END SUBROUTINE init_fft_scratch_pool
2600 SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
2603 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2605 COMPLEX(KIND=dp),
POINTER :: dummy_ptr_z
2609 IF (
ASSOCIATED(fft_scratch%ziptr))
THEN
2610 CALL fft_dealloc(fft_scratch%ziptr)
2612 IF (
ASSOCIATED(fft_scratch%zoptr))
THEN
2613 CALL fft_dealloc(fft_scratch%zoptr)
2615 IF (
ASSOCIATED(fft_scratch%p1buf))
THEN
2616 CALL fft_dealloc(fft_scratch%p1buf)
2618 IF (
ASSOCIATED(fft_scratch%p2buf))
THEN
2619 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2620 dummy_ptr_z => fft_scratch%p2buf(1, 1)
2623 CALL fft_dealloc(fft_scratch%p2buf)
2626 IF (
ASSOCIATED(fft_scratch%p3buf))
THEN
2627 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2628 dummy_ptr_z => fft_scratch%p3buf(1, 1)
2631 CALL fft_dealloc(fft_scratch%p3buf)
2634 IF (
ASSOCIATED(fft_scratch%p4buf))
THEN
2635 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2636 dummy_ptr_z => fft_scratch%p4buf(1, 1)
2639 CALL fft_dealloc(fft_scratch%p4buf)
2642 IF (
ASSOCIATED(fft_scratch%p5buf))
THEN
2643 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2644 dummy_ptr_z => fft_scratch%p5buf(1, 1)
2647 CALL fft_dealloc(fft_scratch%p5buf)
2650 IF (
ASSOCIATED(fft_scratch%p6buf))
THEN
2651 CALL fft_dealloc(fft_scratch%p6buf)
2653 IF (
ASSOCIATED(fft_scratch%p7buf))
THEN
2654 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2655 dummy_ptr_z => fft_scratch%p7buf(1, 1)
2658 CALL fft_dealloc(fft_scratch%p7buf)
2661 IF (
ASSOCIATED(fft_scratch%r1buf))
THEN
2662 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2663 dummy_ptr_z => fft_scratch%r1buf(1, 1)
2666 CALL fft_dealloc(fft_scratch%r1buf)
2669 IF (
ASSOCIATED(fft_scratch%r2buf))
THEN
2670 CALL fft_dealloc(fft_scratch%r2buf)
2672 IF (
ASSOCIATED(fft_scratch%tbuf))
THEN
2673 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2674 dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
2677 CALL fft_dealloc(fft_scratch%tbuf)
2680 IF (
ASSOCIATED(fft_scratch%a1buf))
THEN
2681 CALL fft_dealloc(fft_scratch%a1buf)
2683 IF (
ASSOCIATED(fft_scratch%a2buf))
THEN
2684 CALL fft_dealloc(fft_scratch%a2buf)
2686 IF (
ASSOCIATED(fft_scratch%a3buf))
THEN
2687 CALL fft_dealloc(fft_scratch%a3buf)
2689 IF (
ASSOCIATED(fft_scratch%a4buf))
THEN
2690 CALL fft_dealloc(fft_scratch%a4buf)
2692 IF (
ASSOCIATED(fft_scratch%a5buf))
THEN
2693 CALL fft_dealloc(fft_scratch%a5buf)
2695 IF (
ASSOCIATED(fft_scratch%a6buf))
THEN
2696 CALL fft_dealloc(fft_scratch%a6buf)
2698 IF (
ASSOCIATED(fft_scratch%scount))
THEN
2699 DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
2700 fft_scratch%sdispl, fft_scratch%rdispl)
2702 IF (
ASSOCIATED(fft_scratch%rr))
THEN
2703 DEALLOCATE (fft_scratch%rr)
2705 IF (
ASSOCIATED(fft_scratch%xzbuf))
THEN
2706 DEALLOCATE (fft_scratch%xzbuf)
2708 IF (
ASSOCIATED(fft_scratch%yzbuf))
THEN
2709 DEALLOCATE (fft_scratch%yzbuf)
2711 IF (
ASSOCIATED(fft_scratch%xzbuf_sgl))
THEN
2712 DEALLOCATE (fft_scratch%xzbuf_sgl)
2714 IF (
ASSOCIATED(fft_scratch%yzbuf_sgl))
THEN
2715 DEALLOCATE (fft_scratch%yzbuf_sgl)
2717 IF (
ASSOCIATED(fft_scratch%ss))
THEN
2718 DEALLOCATE (fft_scratch%ss)
2720 IF (
ASSOCIATED(fft_scratch%tt))
THEN
2721 DEALLOCATE (fft_scratch%tt)
2723 IF (
ASSOCIATED(fft_scratch%pgrid))
THEN
2724 DEALLOCATE (fft_scratch%pgrid)
2726 IF (
ASSOCIATED(fft_scratch%pgcube))
THEN
2727 DEALLOCATE (fft_scratch%pgcube)
2729 IF (
ASSOCIATED(fft_scratch%xcor))
THEN
2730 DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
2732 IF (
ASSOCIATED(fft_scratch%pzcoord))
THEN
2733 DEALLOCATE (fft_scratch%pzcoord)
2735 IF (
ASSOCIATED(fft_scratch%xzcount))
THEN
2736 DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
2737 DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
2739 fft_scratch%rsratio = 1._dp
2741 IF (
ASSOCIATED(fft_scratch%rbuf1))
THEN
2742 DEALLOCATE (fft_scratch%rbuf1)
2744 IF (
ASSOCIATED(fft_scratch%rbuf2))
THEN
2745 DEALLOCATE (fft_scratch%rbuf2)
2747 IF (
ASSOCIATED(fft_scratch%rbuf3))
THEN
2748 DEALLOCATE (fft_scratch%rbuf3)
2750 IF (
ASSOCIATED(fft_scratch%rbuf4))
THEN
2751 DEALLOCATE (fft_scratch%rbuf4)
2753 IF (
ASSOCIATED(fft_scratch%rbuf5))
THEN
2754 DEALLOCATE (fft_scratch%rbuf5)
2756 IF (
ASSOCIATED(fft_scratch%rbuf6))
THEN
2757 DEALLOCATE (fft_scratch%rbuf6)
2761 CALL fft_scratch%cart_sub_comm(1)%free()
2764 CALL fft_scratch%cart_sub_comm(2)%free()
2774 END SUBROUTINE deallocate_fft_scratch_type
2779 SUBROUTINE release_fft_scratch_pool()
2787 IF (
ASSOCIATED(fft_scratch))
THEN
2788 fft_scratch_current => fft_scratch
2789 fft_scratch => fft_scratch_current%fft_scratch_next
2790 NULLIFY (fft_scratch_current%fft_scratch_next)
2792 CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
2794 DEALLOCATE (fft_scratch_current%fft_scratch)
2795 DEALLOCATE (fft_scratch_current)
2803 END SUBROUTINE release_fft_scratch_pool
2808 SUBROUTINE resize_fft_scratch_pool()
2810 INTEGER :: last_tick, nscratch
2815 last_tick = huge(last_tick)
2816 NULLIFY (fft_scratch_old)
2821 IF (
ASSOCIATED(fft_scratch_current))
THEN
2822 nscratch = nscratch + 1
2824 IF (.NOT. fft_scratch_current%fft_scratch%in_use)
THEN
2825 IF (fft_scratch_current%fft_scratch%last_tick < last_tick)
THEN
2826 last_tick = fft_scratch_current%fft_scratch%last_tick
2827 fft_scratch_old => fft_scratch_current
2830 fft_scratch_current => fft_scratch_current%fft_scratch_next
2839 IF (
ASSOCIATED(fft_scratch_old))
THEN
2842 IF (
ASSOCIATED(fft_scratch_current))
THEN
2844 IF (
ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old))
THEN
2846 fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
2849 CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
2850 DEALLOCATE (fft_scratch_old%fft_scratch)
2851 DEALLOCATE (fft_scratch_old)
2854 fft_scratch_current => fft_scratch_current%fft_scratch_next
2862 cpwarn(
"The number of the scratches exceeded the limit, but none could be deallocated")
2866 END SUBROUTINE resize_fft_scratch_pool
2877 INTEGER,
INTENT(IN) :: tf_type
2878 INTEGER,
DIMENSION(:),
INTENT(IN) :: n
2880 OPTIONAL :: fft_sizes
2882 CHARACTER(len=*),
PARAMETER :: routinen =
'get_fft_scratch'
2884 INTEGER :: coord(2), dim(2), handle, i, ix, iz, lg, lmax, m1, m2, &
2885 mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
2886 nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
2887 INTEGER,
DIMENSION(3) :: pcoord
2889 LOGICAL,
DIMENSION(2) :: dims
2893 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2895 INTEGER(KIND=C_SIZE_T) :: length
2896 TYPE(c_ptr) :: cptr_r1buf, cptr_tbuf, &
2897 cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
2899 CALL timeset(routinen, handle)
2903 CALL resize_fft_scratch_pool()
2909 IF (
ASSOCIATED(fft_scratch_current))
THEN
2910 IF (fft_scratch_current%fft_scratch%in_use)
THEN
2911 fft_scratch_last => fft_scratch_current
2912 fft_scratch_current => fft_scratch_current%fft_scratch_next
2915 IF (tf_type /= fft_scratch_current%fft_scratch%tf_type)
THEN
2916 fft_scratch_last => fft_scratch_current
2917 fft_scratch_current => fft_scratch_current%fft_scratch_next
2920 IF (.NOT. all(n == fft_scratch_current%fft_scratch%nfft))
THEN
2921 fft_scratch_last => fft_scratch_current
2922 fft_scratch_current => fft_scratch_current%fft_scratch_next
2925 IF (
PRESENT(fft_sizes))
THEN
2926 IF (fft_sizes%gs_group /= fft_scratch_current%fft_scratch%group)
THEN
2927 fft_scratch_last => fft_scratch_current
2928 fft_scratch_current => fft_scratch_current%fft_scratch_next
2931 CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
2932 IF (.NOT. equal)
THEN
2933 fft_scratch_last => fft_scratch_current
2934 fft_scratch_current => fft_scratch_current%fft_scratch_next
2939 fft_scratch => fft_scratch_current%fft_scratch
2940 fft_scratch_current%fft_scratch%in_use = .true.
2945 ALLOCATE (fft_scratch_new)
2946 ALLOCATE (fft_scratch_new%fft_scratch)
2948 IF (tf_type .NE. 400)
THEN
2949 fft_scratch_new%fft_scratch%sizes = fft_sizes
2950 np = fft_sizes%numtask
2951 ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
2952 fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
2953 fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
2956 SELECT CASE (tf_type)
2966 CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
2967 CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
2968 CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
2969 CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
2970 CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
2971 CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
2972 fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
2974 dim = fft_sizes%rs_group%num_pe_cart
2975 pos = fft_sizes%rs_group%mepos_cart
2976 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
2977 fft_scratch_new%fft_scratch%dim = dim
2978 fft_scratch_new%fft_scratch%pos = pos
2979 mcz1 = fft_sizes%mcz1
2980 mcx2 = fft_sizes%mcx2
2981 mcz2 = fft_sizes%mcz2
2982 mcy3 = fft_sizes%mcy3
2983 ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
2984 ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
2985 ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:dim(1) - 1))
2986 ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:dim(1) - 1))
2988 dims = (/.true., .false./)
2989 CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
2990 dims = (/.false., .true./)
2991 CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
2994 DO i = 0, dim(1) - 1
2995 coord = (/i, pos(2)/)
2996 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
2998 DO i = 0, dim(2) - 1
2999 coord = (/pos(1), i/)
3000 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3005 fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf,
fft_plan_style)
3007 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf,
fft_plan_style)
3009 fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf,
fft_plan_style)
3011 fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf,
fft_plan_style)
3013 fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf,
fft_plan_style)
3015 fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf,
fft_plan_style)
3023 CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
3024 CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
3025 fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3026 CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
3027 CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
3028 CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
3029 CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
3031 dim = fft_sizes%rs_group%num_pe_cart
3032 pos = fft_sizes%rs_group%mepos_cart
3033 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3034 fft_scratch_new%fft_scratch%dim = dim
3035 fft_scratch_new%fft_scratch%pos = pos
3036 mcy3 = fft_sizes%mcy3
3037 ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:dim(1) - 1))
3038 ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:dim(1) - 1))
3042 fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf,
fft_plan_style)
3044 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf,
fft_plan_style)
3046 fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf,
fft_plan_style)
3048 fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf,
fft_plan_style)
3050 fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf,
fft_plan_style)
3052 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf,
fft_plan_style)
3059 lmax = fft_sizes%lmax
3060 mmax = fft_sizes%mmax
3063 np = fft_sizes%numtask
3064 nmray = fft_sizes%nmray
3065 nyzray = fft_sizes%nyzray
3066 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3067 length = int(2*
dp_size*max(mmax, 1)*max(lmax, 1), kind=c_size_t)
3070 CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/max(mmax, 1), max(lmax, 1)/))
3071 length = int(2*
dp_size*max(ny, 1)*max(nz, 1)*max(nx, 1), kind=c_size_t)
3074 CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/max(ny, 1), max(nz, 1), max(nx, 1)/))
3076 CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
3077 CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
3079 fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3080 CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
3082 IF (alltoall_sgl)
THEN
3083 ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
3084 ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
3086 ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
3091 fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf,
fft_plan_style)
3093 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf,
fft_plan_style)
3095 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf,
fft_plan_style)
3097 fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf,
fft_plan_style)
3099 fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf,
fft_plan_style)
3101 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf,
fft_plan_style)
3108 mcx2 = fft_sizes%mcx2
3111 nmax = fft_sizes%nmax
3112 nmray = fft_sizes%nmray
3113 nyzray = fft_sizes%nyzray
3114 m1 = fft_sizes%r_dim(1)
3115 m2 = fft_sizes%r_dim(2)
3118 CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
3119 CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
3120 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3121 length = int(2*
dp_size*max(n(3), 1)*max(mx1*my1, 1), kind=c_size_t)
3124 CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/max(n(3), 1), max(mx1*my1, 1)/))
3125 length = int(2*
dp_size*max(mx2*mz2, 1)*max(n(2), 1), kind=c_size_t)
3128 CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/max(mx2*mz2, 1), max(n(2), 1)/))
3129 length = int(2*
dp_size*max(n(2), 1)*max(mx2*mz2, 1), kind=c_size_t)
3132 CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/max(n(2), 1), max(mx2*mz2, 1)/))
3133 length = int(2*
dp_size*max(nyzray, 1)*max(n(1), 1), kind=c_size_t)
3136 CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/max(nyzray, 1), max(n(1), 1)/))
3137 length = int(2*
dp_size*max(mg, 1)*max(lg, 1), kind=c_size_t)
3140 CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/max(mg, 1), max(lg, 1)/))
3142 CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
3143 CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
3144 CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
3145 CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
3146 CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
3148 IF (alltoall_sgl)
THEN
3149 ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
3150 ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
3152 ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
3153 ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
3155 ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
3156 ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
3157 ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
3158 ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
3159 ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
3160 fft_scratch_new%fft_scratch%yzcount(0:np - 1))
3161 ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
3162 fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
3163 fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3165 dim = fft_sizes%rs_group%num_pe_cart
3166 pos = fft_sizes%rs_group%mepos_cart
3167 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3168 fft_scratch_new%fft_scratch%dim = dim
3169 fft_scratch_new%fft_scratch%pos = pos
3170 mcz1 = fft_sizes%mcz1
3171 mcz2 = fft_sizes%mcz2
3172 ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
3173 ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
3175 dims = (/.false., .true./)
3176 CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
3179 DO i = 0, dim(2) - 1
3180 coord = (/pos(1), i/)
3181 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3188 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
3194 CALL fft_sizes%rs_group%coords(i, pcoord)
3195 fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
3200 fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf,
fft_plan_style)
3202 fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf,
fft_plan_style)
3204 fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf,
fft_plan_style)
3206 fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf,
fft_plan_style)
3208 fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf,
fft_plan_style)
3210 fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf,
fft_plan_style)
3214 CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
3215 CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
3219 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr,
fft_plan_style)
3221 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr,
fft_plan_style)
3224 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr,
fft_plan_style)
3226 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr,
fft_plan_style)
3230 NULLIFY (fft_scratch_new%fft_scratch_next)
3231 fft_scratch_new%fft_scratch%fft_scratch_id = &
3232 fft_scratch_last%fft_scratch%fft_scratch_id + 1
3233 fft_scratch_new%fft_scratch%in_use = .true.
3234 fft_scratch_new%fft_scratch%nfft = n
3235 fft_scratch_last%fft_scratch_next => fft_scratch_new
3236 fft_scratch_new%fft_scratch%tf_type = tf_type
3237 fft_scratch => fft_scratch_new%fft_scratch
3245 CALL timestop(handle)
3257 INTEGER :: scratch_id
3260 scratch_id = fft_scratch%fft_scratch_id
3264 IF (
ASSOCIATED(fft_scratch_current))
THEN
3265 IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id)
THEN
3266 fft_scratch%in_use = .false.
3267 NULLIFY (fft_scratch)
3270 fft_scratch_current => fft_scratch_current%fft_scratch_next
3290 SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
3291 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: rs
3292 INTEGER,
DIMENSION(:),
POINTER :: scount, sdispl
3293 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: rq
3294 INTEGER,
DIMENSION(:),
POINTER :: rcount, rdispl
3296 CLASS(mp_comm_type),
INTENT(IN) :: group
3298 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: msgin, msgout
3299 INTEGER :: ip, n, nr, ns, pos
3300 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: rreq, sreq
3305 ALLOCATE (sreq(0:n - 1))
3306 ALLOCATE (rreq(0:n - 1))
3309 IF (rcount(ip) == 0) cycle
3310 IF (ip == pos) cycle
3311 msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
3312 CALL group%irecv(msgout, ip, rreq(nr))
3317 IF (scount(ip) == 0) cycle
3318 IF (ip == pos) cycle
3319 msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
3320 CALL group%isend(msgin, ip, sreq(ns))
3323 IF (rcount(pos) /= 0)
THEN
3324 IF (rcount(pos) /= scount(pos)) cpabort(
"")
3325 rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
3327 CALL mp_waitall(sreq(0:ns - 1))
3328 CALL mp_waitall(rreq(0:nr - 1))
3333 END SUBROUTINE sparse_alltoall
3342 SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
3348 equal = equal .AND. fft_size_1%nx == fft_size_2%nx
3349 equal = equal .AND. fft_size_1%ny == fft_size_2%ny
3350 equal = equal .AND. fft_size_1%nz == fft_size_2%nz
3352 equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
3353 equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
3354 equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
3356 equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
3357 equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
3358 equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
3360 equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
3361 equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
3362 equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
3364 equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
3365 equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
3366 equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
3367 equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
3369 equal = equal .AND. fft_size_1%lg == fft_size_2%lg
3370 equal = equal .AND. fft_size_1%mg == fft_size_2%mg
3372 equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
3373 equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
3375 equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
3376 equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
3378 equal = equal .AND. fft_size_1%gs_group == fft_size_2%gs_group
3379 equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
3381 equal = equal .AND. all(fft_size_1%g_pos == fft_size_2%g_pos)
3382 equal = equal .AND. all(fft_size_1%r_pos == fft_size_2%r_pos)
3383 equal = equal .AND. all(fft_size_1%r_dim == fft_size_2%r_dim)
3385 equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
3387 END SUBROUTINE is_equal
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
subroutine, public fft_do_init(fft_type, wisdom_file)
...
subroutine, public fft_3d(plan, scale, zin, zout, stat)
...
subroutine, public fft_create_plan_1dm(plan, fft_type, fsign, trans, n, m, zin, zout, plan_style)
...
subroutine, public fft_destroy_plan(plan)
...
subroutine, public fft_get_lengths(fft_type, DATA, max_length)
...
integer function, public fft_library(fftlib)
Interface to FFT libraries.
subroutine, public fft_1dm(plan, zin, zout, scale, stat)
...
subroutine, public fft_create_plan_3d(plan, fft_type, fft_in_place, fsign, n, zin, zout, plan_style)
...
subroutine, public fft_do_cleanup(fft_type, wisdom_file, ionode)
...
Type to store data about a (1D or 3D) FFT, including FFTW plan.
Defines the basic variable types.
integer, parameter, public dp_size
integer, parameter, public dp
integer, parameter, public sp
Definition of mathematical constants and functions.
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
type(mp_comm_type), parameter, public mp_comm_null
Fortran API for the offload package, which is written in C.
integer function, public offload_free_pinned_mem(buffer)
free pinned memory
integer function, public offload_malloc_pinned_mem(buffer, length)
allocate pinned memory.