30 USE iso_c_binding,
ONLY: c_f_pointer,&
53#include "../base/base_uses.f90"
57 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'fft_tools'
63 INTEGER :: nx = 0, ny = 0, nz = 0
64 INTEGER :: lmax = 0, mmax = 0, nmax = 0
65 INTEGER :: mx1 = 0, mx2 = 0, mx3 = 0
66 INTEGER :: my1 = 0, my2 = 0, my3 = 0
67 INTEGER :: mz1 = 0, mz2 = 0, mz3 = 0
68 INTEGER :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
69 INTEGER :: lg = 0, mg = 0
70 INTEGER :: nbx = 0, nbz = 0
71 INTEGER :: nmray = 0, nyzray = 0
73 INTEGER,
DIMENSION(2) :: g_pos = 0, r_pos = 0, r_dim = 0
74 INTEGER :: numtask = 0
78 INTEGER :: fft_scratch_id = 0
79 INTEGER :: tf_type = -1
80 LOGICAL :: in_use = .true.
82 INTEGER,
DIMENSION(3) :: nfft = -1
85 INTEGER,
DIMENSION(2) :: dim = -1, pos = -1
87 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
POINTER,
CONTIGUOUS &
88 :: ziptr => null(), zoptr => null()
90 COMPLEX(KIND=dp),
DIMENSION(:, :),
CONTIGUOUS,
POINTER &
91 :: p1buf => null(), p2buf => null(), p3buf => null(), p4buf => null(), &
92 p5buf => null(), p6buf => null(), p7buf => null()
94 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS &
95 :: r1buf => null(), r2buf => null()
96 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
POINTER,
CONTIGUOUS &
99 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS &
100 :: a1buf => null(), a2buf => null(), a3buf => null(), &
101 a4buf => null(), a5buf => null(), a6buf => null()
103 INTEGER,
DIMENSION(:),
CONTIGUOUS,
POINTER :: scount => null(), rcount => null(), sdispl => null(), rdispl => null()
104 INTEGER,
DIMENSION(:, :),
CONTIGUOUS,
POINTER :: pgcube => null()
105 INTEGER,
DIMENSION(:),
CONTIGUOUS,
POINTER :: xzcount => null(), yzcount => null(), xzdispl => null(), yzdispl => null()
106 INTEGER :: in = 0, mip = -1
107 REAL(kind=
dp) :: rsratio = 1.0_dp
108 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER,
CONTIGUOUS &
109 :: xzbuf => null(), yzbuf => null()
110 COMPLEX(KIND=sp),
DIMENSION(:),
POINTER,
CONTIGUOUS &
111 :: xzbuf_sgl => null(), yzbuf_sgl => null()
112 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS &
113 :: rbuf1 => null(), rbuf2 => null(), rbuf3 => null(), rbuf4 => null(), &
114 rbuf5 => null(), rbuf6 => null(), rr => null()
115 COMPLEX(KIND=sp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS &
116 :: ss => null(), tt => null()
117 INTEGER,
DIMENSION(:, :),
POINTER,
CONTIGUOUS :: pgrid => null()
118 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: xcor => null(), zcor => null(), pzcoord => null()
121 INTEGER :: last_tick = -1
139 PUBLIC :: fft_alloc, fft_dealloc
148 INTEGER,
PARAMETER :: fft_radix_allowed = 495, fft_radix_disallowed = 496
151 REAL(kind=
dp),
PARAMETER :: ratio_sparse_alltoall = 0.5_dp
155 LOGICAL,
SAVE :: alltoall_sgl = .false.
156 LOGICAL,
SAVE :: use_fftsg_sizes = .true.
167 MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
184 SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
187 CHARACTER(LEN=*),
INTENT(IN) :: fftlib
188 LOGICAL,
INTENT(IN) :: alltoall, fftsg_sizes
189 INTEGER,
INTENT(IN) :: pool_limit
190 CHARACTER(LEN=*),
INTENT(IN) :: wisdom_file
191 INTEGER,
INTENT(IN) :: plan_style
193 use_fftsg_sizes = fftsg_sizes
194 alltoall_sgl = alltoall
199 IF (
fft_type <= 0) cpabort(
"Unknown FFT library: "//trim(fftlib))
217 CHARACTER(LEN=*),
INTENT(IN) :: wisdom_file
221 CALL release_fft_scratch_pool()
240 INTEGER,
INTENT(IN) :: radix_in
241 INTEGER,
INTENT(OUT) :: radix_out
242 INTEGER,
INTENT(IN) :: operation
244 INTEGER,
PARAMETER :: fft_type_sg = 1
246 INTEGER :: i, iloc, ldata
247 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data
250 ALLOCATE (
DATA(ldata))
254 IF (use_fftsg_sizes)
THEN
262 IF (
DATA(i) == radix_in)
THEN
266 IF (operation == fft_radix_allowed)
THEN
268 ELSE IF (
DATA(i) > radix_in)
THEN
276 cpabort(
"Index to radix array not found.")
279 IF (operation == fft_radix_allowed)
THEN
280 IF (
DATA(iloc) == radix_in)
THEN
281 radix_out = fft_radix_allowed
283 radix_out = fft_radix_disallowed
287 IF (
DATA(iloc) == radix_in)
THEN
288 radix_out =
DATA(iloc)
290 IF (abs(
DATA(iloc - 1) - radix_in) <= &
291 abs(
DATA(iloc) - radix_in))
THEN
292 radix_out =
DATA(iloc - 1)
294 radix_out =
DATA(iloc)
299 radix_out =
DATA(iloc)
303 IF (mod(
DATA(i), 2) == 1)
THEN
308 IF (mod(radix_out, 2) == 0)
THEN
309 cpabort(
"No odd radix found.")
313 cpabort(
"Disallowed radix operation.")
330 SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
331 INTEGER,
INTENT(in) :: n, m
332 LOGICAL,
INTENT(in) :: trans
333 COMPLEX(kind=dp),
DIMENSION(*),
INTENT(inout) :: zin, zout
334 REAL(kind=
dp),
INTENT(in) :: scale
335 INTEGER,
INTENT(out) :: stat
337 CHARACTER(len=*),
PARAMETER :: routinen =
'fft_fw1d'
342 CALL timeset(routinen, handle)
349 CALL cp_warn(__location__, &
350 "FFT library in use cannot handle transformation of an arbitrary length.")
354 CALL timestop(handle)
369 SUBROUTINE fft3d_s(fsign, n, zin, zout, status, debug)
371 INTEGER,
INTENT(IN) :: fsign
372 INTEGER,
DIMENSION(:),
INTENT(INOUT) :: n
373 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
375 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
376 INTENT(INOUT),
OPTIONAL,
TARGET :: zout
377 INTEGER,
INTENT(OUT),
OPTIONAL :: status
378 LOGICAL,
INTENT(IN),
OPTIONAL :: debug
380 CHARACTER(len=*),
PARAMETER :: routineN =
'fft3d_s'
382 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
384 COMPLEX(KIND=dp),
DIMENSION(1, 1, 1),
TARGET :: zdum
385 INTEGER :: handle, ld(3), lo(3), output_unit, sign, &
387 LOGICAL :: fft_in_place, test
388 REAL(KIND=
dp) :: in_sum, norm, out_sum
391 CALL timeset(routinen, handle)
394 IF (fsign ==
fwfft)
THEN
395 norm = 1.0_dp/real(product(n), kind=
dp)
396 ELSE IF (fsign ==
bwfft)
THEN
399 cpabort(
"Unknown FFT direction!")
402 IF (
PRESENT(debug))
THEN
408 IF (
PRESENT(zout))
THEN
409 fft_in_place = .false.
411 fft_in_place = .true.
415 in_sum = sum(abs(zin))
422 IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3))
THEN
423 cpabort(
"Size and dimension (zin) have to be the same.")
429 IF (fft_in_place)
THEN
431 IF (fsign ==
fwfft)
THEN
432 CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
434 CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
437 IF (fsign ==
fwfft)
THEN
438 CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
440 CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
446 IF (
PRESENT(zout))
THEN
447 lo(1) =
SIZE(zout, 1)
448 lo(2) =
SIZE(zout, 2)
449 lo(3) =
SIZE(zout, 3)
450 IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3))
THEN
451 cpabort(
"Size and dimension (zout) have to be the same.")
455 IF (
PRESENT(status))
THEN
459 IF (test .AND. output_unit > 0)
THEN
460 IF (
PRESENT(zout))
THEN
461 out_sum = sum(abs(zout))
462 WRITE (output_unit,
'(A)')
" Out of place 3D FFT (local) : fft3d_s"
463 WRITE (output_unit,
'(A,T60,3I7)')
" Transform lengths ", n
464 WRITE (output_unit,
'(A,T60,3I7)')
" Input array dimensions ", ld
465 WRITE (output_unit,
'(A,T60,3I7)')
" Output array dimensions ", lo
466 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of input data ", in_sum
467 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of output data ", out_sum
469 out_sum = sum(abs(zin))
470 WRITE (output_unit,
'(A)')
" In place 3D FFT (local) : fft3d_s"
471 WRITE (output_unit,
'(A,T60,3I7)')
" Transform lengths ", n
472 WRITE (output_unit,
'(A,T60,3I7)')
" Input/output array dimensions ", ld
473 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of input data ", in_sum
474 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of output data ", out_sum
478 CALL timestop(handle)
480 END SUBROUTINE fft3d_s
495 SUBROUTINE fft3d_ps(fsign, n, cin, gin, rs_group, yzp, nyzray, &
498 INTEGER,
INTENT(IN) :: fsign
499 INTEGER,
DIMENSION(:),
INTENT(IN) :: n
500 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
502 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
505 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
507 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nyzray
508 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:, :), &
510 INTEGER,
INTENT(OUT),
OPTIONAL :: status
511 LOGICAL,
INTENT(IN),
OPTIONAL :: debug
513 CHARACTER(len=*),
PARAMETER :: routineN =
'fft3d_ps'
515 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
516 POINTER :: pbuf, qbuf, rbuf, sbuf
517 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
519 INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
520 nmax, numtask, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, sign, stat
521 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: p2p
523 REAL(KIND=
dp) :: norm, sum_data
527 CALL timeset(routinen, handle)
530 IF (
PRESENT(debug))
THEN
536 g_pos = rs_group%mepos
537 numtask = rs_group%num_pe
538 r_dim = rs_group%num_pe_cart
539 r_pos = rs_group%mepos_cart
541 IF (fsign ==
fwfft)
THEN
542 norm = 1.0_dp/real(product(n), kind=
dp)
543 ELSE IF (fsign ==
bwfft)
THEN
546 cpabort(
"Unknown FFT direction!")
563 lmax = max(lg, (nx*ny*nz)/mmax + 1)
565 ALLOCATE (p2p(0:numtask - 1))
567 CALL rs_group%rank_compare(rs_group, p2p)
570 mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
571 my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
572 mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
573 mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
575 n1 = maxval(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
576 n2 = maxval(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
577 nmax = max((2*n2)/numtask, 2)*mx2*mz2
578 nmax = max(nmax, n1*maxval(nyzray))
579 n1 = maxval(bo(2, 1, :, 2))
580 n2 = maxval(bo(2, 3, :, 2))
582 fft_scratch_size%nx = nx
583 fft_scratch_size%ny = ny
584 fft_scratch_size%nz = nz
585 fft_scratch_size%lmax = lmax
586 fft_scratch_size%mmax = mmax
587 fft_scratch_size%mx1 = mx1
588 fft_scratch_size%mx2 = mx2
589 fft_scratch_size%my1 = my1
590 fft_scratch_size%mz2 = mz2
591 fft_scratch_size%lg = lg
592 fft_scratch_size%mg = mg
593 fft_scratch_size%nbx = n1
594 fft_scratch_size%nbz = n2
595 mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
596 mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
597 mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
598 fft_scratch_size%mcz1 = mcz1
599 fft_scratch_size%mcx2 = mcx2
600 fft_scratch_size%mcz2 = mcz2
601 fft_scratch_size%nmax = nmax
602 fft_scratch_size%nmray = maxval(nyzray)
603 fft_scratch_size%nyzray = nyzray(g_pos)
604 fft_scratch_size%rs_group = rs_group
605 fft_scratch_size%g_pos = g_pos
606 fft_scratch_size%r_pos = r_pos
607 fft_scratch_size%r_dim = r_dim
608 fft_scratch_size%numtask = numtask
611 IF (g_pos == 0 .AND. output_unit > 0)
THEN
612 WRITE (output_unit,
'(A)')
" Parallel 3D FFT : fft3d_ps"
613 WRITE (output_unit,
'(A,T60,3I7)')
" Transform lengths ", n
614 WRITE (output_unit,
'(A,T67,2I7)')
" Array dimensions (gin) ", lg, mg
615 WRITE (output_unit,
'(A,T60,3I7)')
" Array dimensions (cin) ", nx, ny, nz
619 IF (r_dim(2) > 1)
THEN
626 IF (r_dim(1) == 1)
THEN
627 cpabort(
"This processor distribution is not supported.")
629 CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
631 IF (sign ==
fwfft)
THEN
635 sum_data = abs(sum(cin))
636 CALL rs_group%sum(sum_data)
637 IF (g_pos == 0 .AND. output_unit > 0)
THEN
638 WRITE (output_unit,
'(A)')
" Two step communication algorithm "
639 WRITE (output_unit,
'(A,T60,3I7)')
" Transform Z ", n(3), mx1*my1
640 WRITE (output_unit,
'(A,T60,3I7)')
" Transform Y ", n(2), mx2*mz2
641 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), nyzray(g_pos)
642 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
646 pbuf => fft_scratch%p1buf
647 qbuf => fft_scratch%p2buf
650 CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
652 rbuf => fft_scratch%p3buf
655 sum_data = abs(sum(qbuf))
656 CALL rs_group%sum(sum_data)
657 IF (g_pos == 0 .AND. output_unit > 0)
THEN
658 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
663 CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
666 sum_data = abs(sum(rbuf))
667 CALL rs_group%sum(sum_data)
668 IF (g_pos == 0 .AND. output_unit > 0)
THEN
669 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) T", sum_data
673 pbuf => fft_scratch%p4buf
676 CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
678 qbuf => fft_scratch%p5buf
681 sum_data = abs(sum(pbuf))
682 CALL rs_group%sum(sum_data)
683 IF (g_pos == 0 .AND. output_unit > 0)
THEN
684 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) TS", sum_data
689 CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
690 bo(:, :, :, 2), qbuf, fft_scratch)
693 sum_data = abs(sum(qbuf))
694 CALL rs_group%sum(sum_data)
695 IF (g_pos == 0 .AND. output_unit > 0)
THEN
696 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(5) TS", sum_data
701 CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
704 sum_data = abs(sum(gin))
705 CALL rs_group%sum(sum_data)
706 IF (g_pos == 0 .AND. output_unit > 0)
THEN
707 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(6) ", sum_data
711 ELSE IF (sign ==
bwfft)
THEN
715 sum_data = abs(sum(gin))
716 CALL rs_group%sum(sum_data)
717 IF (g_pos == 0 .AND. output_unit > 0)
THEN
718 WRITE (output_unit,
'(A)')
" Two step communication algorithm "
719 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), nyzray(g_pos)
720 WRITE (output_unit,
'(A,T60,3I7)')
" Transform Y ", n(2), mx2*mz2
721 WRITE (output_unit,
'(A,T60,3I7)')
" Transform Z ", n(3), mx1*my1
722 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
726 pbuf => fft_scratch%p7buf
729 CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
731 qbuf => fft_scratch%p4buf
734 sum_data = abs(sum(pbuf))
735 CALL rs_group%sum(sum_data)
736 IF (g_pos == 0 .AND. output_unit > 0)
THEN
737 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) TS", sum_data
742 CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
743 bo(:, :, :, 2), qbuf, fft_scratch)
746 sum_data = abs(sum(qbuf))
747 CALL rs_group%sum(sum_data)
748 IF (g_pos == 0 .AND. output_unit > 0)
THEN
749 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) TS", sum_data
753 rbuf => fft_scratch%p3buf
756 CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
758 pbuf => fft_scratch%p2buf
761 sum_data = abs(sum(rbuf))
762 CALL rs_group%sum(sum_data)
763 IF (g_pos == 0 .AND. output_unit > 0)
THEN
764 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) T", sum_data
769 CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
772 sum_data = abs(sum(pbuf))
773 CALL rs_group%sum(sum_data)
774 IF (g_pos == 0 .AND. output_unit > 0)
THEN
775 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(5) T", sum_data
779 qbuf => fft_scratch%p1buf
782 CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
785 sum_data = abs(sum(cin))
786 CALL rs_group%sum(sum_data)
787 IF (g_pos == 0 .AND. output_unit > 0)
THEN
788 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(6) ", sum_data
794 cpabort(
"Illegal fsign parameter.")
808 CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
810 sbuf => fft_scratch%r1buf
811 tbuf => fft_scratch%tbuf
816 IF (sign ==
fwfft)
THEN
820 sum_data = abs(sum(cin))
821 CALL rs_group%sum(sum_data)
822 IF (g_pos == 0 .AND. output_unit > 0)
THEN
823 WRITE (output_unit,
'(A)')
" One step communication algorithm "
824 WRITE (output_unit,
'(A,T60,3I7)')
" Transform YZ ", n(2), n(3), nx
825 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), nyzray(g_pos)
826 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
831 CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
832 CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
835 sum_data = abs(sum(tbuf))
836 CALL rs_group%sum(sum_data)
837 IF (g_pos == 0 .AND. output_unit > 0)
THEN
838 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) TS", sum_data
843 CALL yz_to_x(tbuf, rs_group, g_pos, p2p, yzp, nyzray, &
844 bo(:, :, :, 2), sbuf, fft_scratch)
847 sum_data = abs(sum(sbuf))
848 CALL rs_group%sum(sum_data)
849 IF (g_pos == 0 .AND. output_unit > 0)
THEN
850 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) TS", sum_data
854 CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
857 sum_data = abs(sum(gin))
858 CALL rs_group%sum(sum_data)
859 IF (g_pos == 0 .AND. output_unit > 0)
THEN
860 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) ", sum_data
864 ELSE IF (sign ==
bwfft)
THEN
868 sum_data = abs(sum(gin))
869 CALL rs_group%sum(sum_data)
870 IF (g_pos == 0 .AND. output_unit > 0)
THEN
871 WRITE (output_unit,
'(A)')
" One step communication algorithm "
872 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), nyzray(g_pos)
873 WRITE (output_unit,
'(A,T60,3I7)')
" Transform YZ ", n(2), n(3), nx
874 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
879 CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
882 sum_data = abs(sum(sbuf))
883 CALL rs_group%sum(sum_data)
884 IF (g_pos == 0 .AND. output_unit > 0)
THEN
885 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) TS", sum_data
890 CALL x_to_yz(sbuf, rs_group, g_pos, p2p, yzp, nyzray, &
891 bo(:, :, :, 2), tbuf, fft_scratch)
894 sum_data = abs(sum(tbuf))
895 CALL rs_group%sum(sum_data)
896 IF (g_pos == 0 .AND. output_unit > 0)
THEN
897 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) TS", sum_data
902 CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
903 CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
906 sum_data = abs(sum(cin))
907 CALL rs_group%sum(sum_data)
908 IF (g_pos == 0 .AND. output_unit > 0)
THEN
909 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) ", sum_data
913 cpabort(
"Illegal fsign parameter.")
922 IF (
PRESENT(status))
THEN
925 CALL timestop(handle)
927 END SUBROUTINE fft3d_ps
940 SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, status, debug)
942 INTEGER,
INTENT(IN) :: fsign
943 INTEGER,
DIMENSION(3),
INTENT(IN) :: n
944 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
946 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
949 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:, :), &
951 INTEGER,
INTENT(OUT),
OPTIONAL :: status
952 LOGICAL,
INTENT(IN),
OPTIONAL :: debug
954 CHARACTER(len=*),
PARAMETER :: routineN =
'fft3d_pb'
956 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
957 POINTER :: abuf, bbuf
958 INTEGER :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
959 mcz2, mx1, mx2, mx3, my1, my2, my3, &
960 my_pos, mz1, mz2, mz3, output_unit, &
962 INTEGER,
DIMENSION(2) :: dim
964 REAL(KIND=
dp) :: norm, sum_data
988 CALL timeset(routinen, handle)
991 dim = group%num_pe_cart
994 IF (
PRESENT(debug))
THEN
1000 IF (fsign ==
fwfft)
THEN
1001 norm = 1.0_dp/real(product(n), kind=
dp)
1002 ELSE IF (fsign ==
bwfft)
THEN
1005 cpabort(
"Unknown FFT direction!")
1011 lg(1) =
SIZE(gin, 1)
1012 lg(2) =
SIZE(gin, 2)
1013 lz(1) =
SIZE(zin, 1)
1014 lz(2) =
SIZE(zin, 2)
1015 lz(3) =
SIZE(zin, 3)
1016 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1017 WRITE (output_unit,
'(A)')
" Parallel 3D FFT : fft3d_pb"
1018 WRITE (output_unit,
'(A,T60,3I7)')
" Transform lengths ", n
1019 WRITE (output_unit,
'(A,T67,2I7)')
" Array dimensions (gin) ", lg
1020 WRITE (output_unit,
'(A,T60,3I7)')
" Array dimensions (cin) ", lz
1024 mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
1025 my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
1026 mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
1027 mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
1028 my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
1029 mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
1030 mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
1031 my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
1032 mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
1033 fft_scratch_size%mx1 = mx1
1034 fft_scratch_size%mx2 = mx2
1035 fft_scratch_size%mx3 = mx3
1036 fft_scratch_size%my1 = my1
1037 fft_scratch_size%my2 = my2
1038 fft_scratch_size%my3 = my3
1039 fft_scratch_size%mz1 = mz1
1040 fft_scratch_size%mz2 = mz2
1041 fft_scratch_size%mz3 = mz3
1042 mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
1043 mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
1044 mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
1045 mcy3 = maxval(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
1046 fft_scratch_size%mcz1 = mcz1
1047 fft_scratch_size%mcx2 = mcx2
1048 fft_scratch_size%mcz2 = mcz2
1049 fft_scratch_size%mcy3 = mcy3
1050 fft_scratch_size%rs_group = group
1051 fft_scratch_size%g_pos = my_pos
1052 fft_scratch_size%numtask = dim(1)*dim(2)
1054 IF (dim(1) > 1 .AND. dim(2) > 1)
THEN
1060 CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
1062 IF (sign ==
fwfft)
THEN
1065 bbuf => fft_scratch%a2buf
1068 sum_data = abs(sum(zin))
1069 CALL group%sum(sum_data)
1070 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1071 WRITE (output_unit,
'(A)')
" Two step communication algorithm "
1072 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Z ", n(3), mx1*my1
1073 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
1078 CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
1080 abuf => fft_scratch%a3buf
1083 sum_data = abs(sum(bbuf))
1084 CALL group%sum(sum_data)
1085 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1086 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
1090 CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
1092 bbuf => fft_scratch%a4buf
1095 sum_data = abs(sum(abuf))
1096 CALL group%sum(sum_data)
1097 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1098 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Y ", n(2), mx2*mz2
1099 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) ", sum_data
1104 CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1106 abuf => fft_scratch%a5buf
1109 sum_data = abs(sum(bbuf))
1110 CALL group%sum(sum_data)
1111 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1112 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) T", sum_data
1116 CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
1119 sum_data = abs(sum(abuf))
1120 CALL group%sum(sum_data)
1121 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1122 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), my3*mz3
1123 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(5) ", sum_data
1128 CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1131 sum_data = abs(sum(gin))
1132 CALL group%sum(sum_data)
1133 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1134 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(6) ", sum_data
1138 ELSEIF (sign ==
bwfft)
THEN
1141 bbuf => fft_scratch%a5buf
1144 sum_data = abs(sum(gin))
1145 CALL group%sum(sum_data)
1146 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1147 WRITE (output_unit,
'(A)')
" Two step communication algorithm "
1148 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), my3*mz3
1149 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
1154 CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1156 abuf => fft_scratch%a4buf
1159 sum_data = abs(sum(bbuf))
1160 CALL group%sum(sum_data)
1161 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1162 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
1166 CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
1168 bbuf => fft_scratch%a3buf
1171 sum_data = abs(sum(abuf))
1172 CALL group%sum(sum_data)
1173 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1174 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Y ", n(2), mx2*mz2
1175 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) ", sum_data
1180 CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1182 abuf => fft_scratch%a2buf
1185 sum_data = abs(sum(bbuf))
1186 CALL group%sum(sum_data)
1187 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1188 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) T", sum_data
1192 CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
1195 sum_data = abs(sum(abuf))
1196 CALL group%sum(sum_data)
1197 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1198 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Z ", n(3), mx1*my1
1199 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(5) ", sum_data
1204 CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
1207 sum_data = abs(sum(zin))
1208 CALL group%sum(sum_data)
1209 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1210 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(6) ", sum_data
1215 cpabort(
"Illegal fsign parameter.")
1220 ELSEIF (dim(2) == 1)
THEN
1226 CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
1228 IF (sign ==
fwfft)
THEN
1232 sum_data = abs(sum(zin))
1233 CALL group%sum(sum_data)
1234 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1235 WRITE (output_unit,
'(A)')
" one step communication algorithm "
1236 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Z ", n(3), mx1*my1
1237 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Y ", n(2), mx1*mz1
1238 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
1242 abuf => fft_scratch%a3buf
1243 bbuf => fft_scratch%a4buf
1245 CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
1246 CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1248 abuf => fft_scratch%a5buf
1251 sum_data = abs(sum(bbuf))
1252 CALL group%sum(sum_data)
1253 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1254 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
1258 CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
1261 sum_data = abs(sum(abuf))
1262 CALL group%sum(sum_data)
1263 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1264 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), my3*mz3
1265 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) ", sum_data
1270 CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1273 sum_data = abs(sum(gin))
1274 CALL group%sum(sum_data)
1275 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1276 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) ", sum_data
1280 ELSEIF (sign ==
bwfft)
THEN
1284 sum_data = abs(sum(gin))
1285 CALL group%sum(sum_data)
1286 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1287 WRITE (output_unit,
'(A)')
" one step communication algorithm "
1288 WRITE (output_unit,
'(A,T67,2I7)')
" Transform X ", n(1), my3*mz3
1289 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(1) ", sum_data
1293 bbuf => fft_scratch%a5buf
1296 CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1298 abuf => fft_scratch%a4buf
1301 sum_data = abs(sum(bbuf))
1302 CALL group%sum(sum_data)
1303 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1304 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(2) T", sum_data
1308 CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
1310 bbuf => fft_scratch%a3buf
1313 sum_data = abs(sum(abuf))
1314 CALL group%sum(sum_data)
1315 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1316 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Y ", n(2), mx1*mz1
1317 WRITE (output_unit,
'(A,T67,2I7)')
" Transform Z ", n(3), mx1*my1
1318 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(3) ", sum_data
1323 CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1326 CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
1329 sum_data = abs(sum(zin))
1330 CALL group%sum(sum_data)
1331 IF (my_pos == 0 .AND. output_unit > 0)
THEN
1332 WRITE (output_unit,
'(A,T61,E20.14)')
" Sum of data(4) ", sum_data
1337 cpabort(
"Illegal fsign parameter.")
1344 cpabort(
"Partition not implemented.")
1348 IF (
PRESENT(status))
THEN
1352 CALL timestop(handle)
1354 END SUBROUTINE fft3d_pb
1371 SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1373 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1377 INTEGER,
INTENT(IN) :: my_pos
1378 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: p2p
1379 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
1381 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nray
1382 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
1384 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
1388 CHARACTER(len=*),
PARAMETER :: routinen =
'x_to_yz'
1390 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1392 COMPLEX(KIND=sp),
CONTIGUOUS,
DIMENSION(:, :), &
1394 INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1396 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
1398 CALL timeset(routinen, handle)
1401 scount => fft_scratch%scount
1402 rcount => fft_scratch%rcount
1403 sdispl => fft_scratch%sdispl
1404 rdispl => fft_scratch%rdispl
1406 IF (alltoall_sgl)
THEN
1407 ss => fft_scratch%ss
1408 tt => fft_scratch%tt
1409 ss(:, :) = cmplx(sb(:, :), kind=
sp)
1412 rr => fft_scratch%rr
1416 nm = maxval(nray(0:np - 1))
1423 nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1425 sdispl(ip) = nr*(bo(1, 1, ix) - 1)
1428 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1435 rdispl(ip) = nm*nx*ip
1438 IF (alltoall_sgl)
THEN
1439 CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
1441 CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
1444 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1450 ixx = nray(ip)*(ix - 1)
1451 IF (alltoall_sgl)
THEN
1455 tb(iy, iz, ix) = tt(ir + ixx, ip)
1461 tb(iy, iz, ix) = rr(ir + ixx, ip)
1468 CALL timestop(handle)
1487 SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
1489 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
1493 INTEGER,
INTENT(IN) :: my_pos
1494 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: p2p
1495 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
1497 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nray
1498 INTEGER,
DIMENSION(:, :, 0:),
INTENT(IN) :: bo
1499 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1503 CHARACTER(len=*),
PARAMETER :: routinen =
'yz_to_x'
1505 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1507 COMPLEX(KIND=sp),
CONTIGUOUS,
DIMENSION(:, :), &
1509 INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1511 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
1513 CALL timeset(routinen, handle)
1517 scount => fft_scratch%scount
1518 rcount => fft_scratch%rcount
1519 sdispl => fft_scratch%sdispl
1520 rdispl => fft_scratch%rdispl
1522 IF (alltoall_sgl)
THEN
1523 ss => fft_scratch%ss
1524 tt => fft_scratch%tt
1528 rr => fft_scratch%rr
1531 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1537 ixx = nray(ip)*(ix - 1)
1538 IF (alltoall_sgl)
THEN
1542 tt(ir + ixx, ip) = cmplx(tb(iy, iz, ix), kind=
sp)
1548 rr(ir + ixx, ip) = tb(iy, iz, ix)
1554 nm = maxval(nray(0:np - 1))
1561 nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1563 rdispl(ip) = nr*(bo(1, 1, ix) - 1)
1566 nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1573 sdispl(ip) = nm*nx*ip
1577 IF (alltoall_sgl)
THEN
1578 CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
1581 CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
1584 CALL timestop(handle)
1604 SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1606 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1610 INTEGER,
DIMENSION(2),
INTENT(IN) :: dims
1611 INTEGER,
INTENT(IN) :: my_pos
1612 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: p2p
1613 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: yzp
1614 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nray
1615 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: bo
1616 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT),
CONTIGUOUS :: tb
1619 CHARACTER(len=*),
PARAMETER :: routinen =
'yz_to_xz'
1621 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER,
CONTIGUOUS :: xzbuf, yzbuf
1622 COMPLEX(KIND=sp),
DIMENSION(:),
POINTER,
CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1623 INTEGER :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
1624 jj, jx, jy, jz, myx, myz, np, npx, &
1626 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1628 INTEGER,
DIMENSION(:, :),
CONTIGUOUS,
POINTER :: pgrid
1630 CALL timeset(routinen, handle)
1634 rs_pos = p2p(my_pos)
1636 IF (alltoall_sgl)
THEN
1637 yzbuf_sgl => fft_scratch%yzbuf_sgl
1638 xzbuf_sgl => fft_scratch%xzbuf_sgl
1640 yzbuf => fft_scratch%yzbuf
1641 xzbuf => fft_scratch%xzbuf
1645 pgrid => fft_scratch%pgrid
1646 xcor => fft_scratch%xcor
1647 zcor => fft_scratch%zcor
1648 pzcoord => fft_scratch%pzcoord
1649 scount => fft_scratch%scount
1650 rcount => fft_scratch%rcount
1651 sdispl => fft_scratch%sdispl
1652 rdispl => fft_scratch%rdispl
1658 IF (fft_scratch%in == 0)
THEN
1664 xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1668 zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1671 IF (alltoall_sgl)
THEN
1672 DO ir = 1, nray(my_pos)
1673 jy = yzp(1, ir, my_pos)
1674 jz = yzp(2, ir, my_pos)
1675 ip = pgrid(xcor(jx), zcor(jz))
1676 scount(ip) = scount(ip) + 1
1679 DO ir = 1, nray(my_pos)
1680 jy = yzp(1, ir, my_pos)
1681 jz = yzp(2, ir, my_pos)
1682 ip = pgrid(xcor(jx), zcor(jz))
1683 scount(ip) = scount(ip) + 1
1688 CALL group%alltoall(scount, rcount, 1)
1689 fft_scratch%yzcount = scount
1690 fft_scratch%xzcount = rcount
1696 sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1697 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1700 fft_scratch%yzdispl = sdispl
1701 fft_scratch%xzdispl = rdispl
1705 IF (scount(ip) /= 0) icrs = icrs + 1
1706 IF (rcount(ip) /= 0) icrs = icrs + 1
1708 CALL group%sum(icrs)
1709 fft_scratch%rsratio = real(icrs, kind=
dp)/(real(2*np, kind=
dp)*real(np, kind=
dp))
1713 scount = fft_scratch%yzcount
1714 rcount = fft_scratch%xzcount
1715 sdispl = fft_scratch%yzdispl
1716 rdispl = fft_scratch%xzdispl
1726 IF (scount(ip) == 0) cycle
1729 nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1730 DO ir = 1, nray(my_pos)
1731 jz = yzp(2, ir, my_pos)
1732 IF (zcor(jz) == pzcoord(ipl))
THEN
1734 jy = yzp(1, ir, my_pos)
1735 IF (alltoall_sgl)
THEN
1737 yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = cmplx(sb(ir, jx + bo(1, 1, ipl)), kind=
sp)
1741 yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
1749 IF (alltoall_sgl)
THEN
1750 CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
1752 IF (fft_scratch%rsratio < ratio_sparse_alltoall)
THEN
1753 CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
1755 CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
1759 myx = fft_scratch%sizes%r_pos(1)
1760 myz = fft_scratch%sizes%r_pos(2)
1761 nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
1771 DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
1774 IF (alltoall_sgl)
THEN
1775 IF (zcor(jz) == myz)
THEN
1778 jz = jz - bo(1, 3, rs_pos) + 1
1779 tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
1782 IF (zcor(jz) == myz)
THEN
1785 jz = jz - bo(1, 3, rs_pos) + 1
1786 tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
1794 CALL timestop(handle)
1814 SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1816 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1820 INTEGER,
DIMENSION(2),
INTENT(IN) :: dims
1821 INTEGER,
INTENT(IN) :: my_pos
1822 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: p2p
1823 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: yzp
1824 INTEGER,
CONTIGUOUS,
DIMENSION(0:),
INTENT(IN) :: nray
1825 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: bo
1826 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT),
CONTIGUOUS :: tb
1829 CHARACTER(len=*),
PARAMETER :: routinen =
'xz_to_yz'
1831 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER,
CONTIGUOUS :: xzbuf, yzbuf
1832 COMPLEX(KIND=sp),
DIMENSION(:),
POINTER,
CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1833 INTEGER :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
1834 jj, jx, jy, jz, mp, myx, myz, np, npx, &
1836 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1838 INTEGER,
DIMENSION(:, :),
CONTIGUOUS,
POINTER :: pgrid
1840 CALL timeset(routinen, handle)
1844 IF (alltoall_sgl)
THEN
1845 yzbuf_sgl => fft_scratch%yzbuf_sgl
1846 xzbuf_sgl => fft_scratch%xzbuf_sgl
1848 yzbuf => fft_scratch%yzbuf
1849 xzbuf => fft_scratch%xzbuf
1853 pgrid => fft_scratch%pgrid
1854 xcor => fft_scratch%xcor
1855 zcor => fft_scratch%zcor
1856 pzcoord => fft_scratch%pzcoord
1857 scount => fft_scratch%scount
1858 rcount => fft_scratch%rcount
1859 sdispl => fft_scratch%sdispl
1860 rdispl => fft_scratch%rdispl
1864 IF (fft_scratch%in == 0)
THEN
1867 nx = maxval(bo(2, 1, :))
1871 xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1875 zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1878 DO ir = 1, nray(my_pos)
1879 jy = yzp(1, ir, my_pos)
1880 jz = yzp(2, ir, my_pos)
1881 ip = pgrid(xcor(jx), zcor(jz))
1882 rcount(ip) = rcount(ip) + 1
1886 CALL group%alltoall(rcount, scount, 1)
1887 fft_scratch%xzcount = scount
1888 fft_scratch%yzcount = rcount
1894 sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1895 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1898 fft_scratch%xzdispl = sdispl
1899 fft_scratch%yzdispl = rdispl
1903 IF (scount(ip) /= 0) icrs = icrs + 1
1904 IF (rcount(ip) /= 0) icrs = icrs + 1
1906 CALL group%sum(icrs)
1907 fft_scratch%rsratio = real(icrs, kind=
dp)/(real(2*np, kind=
dp)*real(np, kind=
dp))
1911 scount = fft_scratch%xzcount
1912 rcount = fft_scratch%yzcount
1913 sdispl = fft_scratch%xzdispl
1914 rdispl = fft_scratch%yzdispl
1918 myx = fft_scratch%sizes%r_pos(1)
1919 myz = fft_scratch%sizes%r_pos(2)
1921 nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
1922 nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
1934 IF (zcor(jz) == myz)
THEN
1937 jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
1938 IF (alltoall_sgl)
THEN
1940 ixx = jj + jx*scount(ipl)/nx
1941 xzbuf_sgl(ixx + sdispl(ipl)) = cmplx(sb(jy, jz + jx*nz), kind=
sp)
1945 ixx = jj + jx*scount(ipl)/nx
1946 xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
1954 IF (alltoall_sgl)
THEN
1955 CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
1957 IF (fft_scratch%rsratio < ratio_sparse_alltoall)
THEN
1958 CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
1960 CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
1970 IF (rcount(ip) == 0) cycle
1973 nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1974 DO ir = 1, nray(my_pos)
1975 jz = yzp(2, ir, my_pos)
1976 IF (zcor(jz) == pzcoord(ipl))
THEN
1978 jy = yzp(1, ir, my_pos)
1979 IF (alltoall_sgl)
THEN
1981 tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
1985 tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
1993 CALL timestop(handle)
2010 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2012 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
2013 INTENT(IN) :: boin, boout
2014 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2018 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_1'
2020 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2022 INTEGER :: handle, ip, ipl, ir, is, ixy, iz, mip, &
2024 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
2025 INTEGER,
CONTIGUOUS,
DIMENSION(:, :),
POINTER :: pgrid
2026 INTEGER,
DIMENSION(2) :: dim, pos
2028 CALL timeset(routinen, handle)
2030 mip = fft_scratch%mip
2031 dim = fft_scratch%dim
2032 pos = fft_scratch%pos
2033 scount => fft_scratch%scount
2034 rcount => fft_scratch%rcount
2035 sdispl => fft_scratch%sdispl
2036 rdispl => fft_scratch%rdispl
2037 pgrid => fft_scratch%pgcube
2040 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2041 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2048 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2049 scount(ip) = nx*nz*ny
2050 sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
2053 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2054 mz = maxval(boin(2, 3, :) - boin(1, 3, :) + 1)
2060 nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2061 rcount(ip) = nx*nz*ny
2062 rdispl(ip) = nx*ny*mz*ip
2066 rbuf => fft_scratch%rbuf1
2068 CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2076 nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2078 is = boin(1, 3, ipl) + iz - 1
2079 ir = iz + nz*(ixy - 1)
2080 sout(is, ixy) = rbuf(ir, ip)
2086 CALL timestop(handle)
2100 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2102 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
2103 INTENT(IN) :: boin, boout
2104 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2108 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_2'
2110 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2112 INTEGER :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
2114 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
2115 INTEGER,
CONTIGUOUS,
DIMENSION(:, :),
POINTER :: pgrid
2116 INTEGER,
DIMENSION(2) :: dim, pos
2119 CALL timeset(routinen, handle)
2121 sub_group = fft_scratch%cart_sub_comm(2)
2122 mip = fft_scratch%mip
2123 dim = fft_scratch%dim
2124 pos = fft_scratch%pos
2125 scount => fft_scratch%scount
2126 rcount => fft_scratch%rcount
2127 sdispl => fft_scratch%sdispl
2128 rdispl => fft_scratch%rdispl
2129 pgrid => fft_scratch%pgcube
2132 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2133 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2134 mz = maxval(boout(2, 3, :) - boout(1, 3, :) + 1)
2136 rbuf => fft_scratch%rbuf2
2145 nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2146 DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
2147 ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
2148 rbuf(ir, ip) = cin(iz, ixy)
2156 nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2157 scount(ip) = nx*ny*nz
2158 sdispl(ip) = nx*ny*mz*ip
2162 nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
2168 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2169 rcount(ip) = nx*ny*nz
2170 rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
2174 CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2176 CALL timestop(handle)
2188 SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
2190 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2192 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
2193 INTENT(IN) :: boin, boout
2194 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2198 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_3'
2200 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2202 INTEGER :: handle, ip, ipl, ir, is, ixz, iy, lb, &
2203 mip, my, my_id, np, num_threads, nx, &
2205 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
2206 INTEGER,
CONTIGUOUS,
DIMENSION(:, :),
POINTER :: pgrid
2207 INTEGER,
DIMENSION(2) :: dim, pos
2210 CALL timeset(routinen, handle)
2212 sub_group = fft_scratch%cart_sub_comm(1)
2213 mip = fft_scratch%mip
2214 dim = fft_scratch%dim
2215 pos = fft_scratch%pos
2217 scount => fft_scratch%scount
2218 rcount => fft_scratch%rcount
2219 sdispl => fft_scratch%sdispl
2220 rdispl => fft_scratch%rdispl
2221 pgrid => fft_scratch%pgcube
2223 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2224 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2230 nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
2231 scount(ip) = nx*nz*ny
2232 sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
2235 nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2236 my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2242 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2243 rcount(ip) = nx*nz*ny
2244 rdispl(ip) = nx*my*nz*ip
2248 rbuf => fft_scratch%rbuf3
2256 IF (my_id < num_threads)
THEN
2257 lb = (
SIZE(rbuf, 2)*my_id)/num_threads
2258 ub = (
SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2259 rbuf(:, lb:ub) = 0.0_dp
2263 CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2271 ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2273 is = boin(1, 2, ipl) + iy - 1
2274 ir = iy + ny*(ixz - 1)
2275 sout(is, ixz) = rbuf(ir, ip)
2281 CALL timestop(handle)
2283 END SUBROUTINE cube_transpose_3
2293 SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
2295 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2297 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:), &
2298 INTENT(IN) :: boin, boout
2299 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2303 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_4'
2305 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2307 INTEGER :: handle, ip, ipl, ir, iy, izx, lb, mip, &
2308 my, my_id, np, num_threads, nx, ny, &
2310 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: rcount, rdispl, scount, sdispl
2311 INTEGER,
CONTIGUOUS,
DIMENSION(:, :),
POINTER :: pgrid
2312 INTEGER,
DIMENSION(2) :: dim, pos
2315 CALL timeset(routinen, handle)
2317 sub_group = fft_scratch%cart_sub_comm(1)
2318 mip = fft_scratch%mip
2319 dim = fft_scratch%dim
2320 pos = fft_scratch%pos
2322 scount => fft_scratch%scount
2323 rcount => fft_scratch%rcount
2324 sdispl => fft_scratch%sdispl
2325 rdispl => fft_scratch%rdispl
2326 pgrid => fft_scratch%pgcube
2328 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2329 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2330 my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2332 rbuf => fft_scratch%rbuf4
2340 IF (my_id < num_threads)
THEN
2341 lb = (
SIZE(rbuf, 2)*my_id)/num_threads
2342 ub = (
SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2343 rbuf(:, lb:ub) = 0.0_dp
2351 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2352 DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
2353 ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
2354 rbuf(ir, ip) = cin(iy, izx)
2362 ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2363 scount(ip) = nx*ny*nz
2364 sdispl(ip) = nx*nz*my*ip
2368 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2374 nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
2375 rcount(ip) = nx*ny*nz
2376 rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
2380 CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2382 CALL timestop(handle)
2384 END SUBROUTINE cube_transpose_4
2395 SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
2397 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2401 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: boin, boout
2402 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(OUT),
CONTIGUOUS :: sout
2405 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_5'
2407 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS :: rbuf
2408 INTEGER :: handle, ip, ir, is, ixz, iy, lb, mip, &
2409 my, my_id, np, num_threads, nx, ny, &
2411 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: rcount, rdispl, scount, sdispl
2413 CALL timeset(routinen, handle)
2415 np = fft_scratch%sizes%numtask
2416 mip = fft_scratch%mip
2417 scount => fft_scratch%scount
2418 rcount => fft_scratch%rcount
2419 sdispl => fft_scratch%sdispl
2420 rdispl => fft_scratch%rdispl
2422 ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2423 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2428 nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
2429 scount(ip) = nx*nz*ny
2430 sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
2433 nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2434 my = maxval(boin(2, 2, :) - boin(1, 2, :) + 1)
2439 ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2440 rcount(ip) = nx*nz*ny
2441 rdispl(ip) = nx*my*nz*ip
2445 rbuf => fft_scratch%rbuf5
2453 IF (my_id < num_threads)
THEN
2454 lb = (
SIZE(rbuf, 2)*my_id)/num_threads
2455 ub = (
SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2456 rbuf(:, lb:ub) = 0.0_dp
2460 CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2467 ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2469 is = boin(1, 2, ip) + iy - 1
2470 ir = iy + ny*(ixz - 1)
2471 sout(is, ixz) = rbuf(ir, ip)
2477 CALL timestop(handle)
2479 END SUBROUTINE cube_transpose_5
2490 SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
2492 COMPLEX(KIND=dp),
CONTIGUOUS,
DIMENSION(:, :), &
2496 INTEGER,
CONTIGUOUS,
DIMENSION(:, :, 0:),
INTENT(IN) :: boin, boout
2497 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(OUT),
CONTIGUOUS :: sout
2500 CHARACTER(len=*),
PARAMETER :: routinen =
'cube_transpose_6'
2502 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER,
CONTIGUOUS :: rbuf
2503 INTEGER :: handle, ip, ir, iy, izx, lb, mip, my, &
2504 my_id, np, num_threads, nx, ny, nz, ub
2505 INTEGER,
DIMENSION(:),
POINTER,
CONTIGUOUS :: rcount, rdispl, scount, sdispl
2507 CALL timeset(routinen, handle)
2509 np = fft_scratch%sizes%numtask
2510 mip = fft_scratch%mip
2511 scount => fft_scratch%scount
2512 rcount => fft_scratch%rcount
2513 sdispl => fft_scratch%sdispl
2514 rdispl => fft_scratch%rdispl
2516 nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2517 nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2518 my = maxval(boout(2, 2, :) - boout(1, 2, :) + 1)
2520 rbuf => fft_scratch%rbuf5
2528 IF (my_id < num_threads)
THEN
2529 lb = (
SIZE(rbuf, 2)*my_id)/num_threads
2530 ub = (
SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2531 rbuf(:, lb:ub) = 0.0_dp
2538 ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2539 DO iy = boout(1, 2, ip), boout(2, 2, ip)
2540 ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
2541 rbuf(ir, ip) = cin(iy, izx)
2548 ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2549 scount(ip) = nx*ny*nz
2550 sdispl(ip) = nx*nz*my*ip
2554 ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2559 nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
2560 rcount(ip) = nx*ny*nz
2561 rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
2565 CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2567 CALL timestop(handle)
2569 END SUBROUTINE cube_transpose_6
2576 CALL release_fft_scratch_pool()
2592 SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
2595#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2597 COMPLEX(KIND=dp),
POINTER :: dummy_ptr_z
2601 IF (
ASSOCIATED(fft_scratch%ziptr))
THEN
2602 CALL fft_dealloc(fft_scratch%ziptr)
2604 IF (
ASSOCIATED(fft_scratch%zoptr))
THEN
2605 CALL fft_dealloc(fft_scratch%zoptr)
2607 IF (
ASSOCIATED(fft_scratch%p1buf))
THEN
2608 CALL fft_dealloc(fft_scratch%p1buf)
2610 IF (
ASSOCIATED(fft_scratch%p2buf))
THEN
2611#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2612 dummy_ptr_z => fft_scratch%p2buf(1, 1)
2615 CALL fft_dealloc(fft_scratch%p2buf)
2618 IF (
ASSOCIATED(fft_scratch%p3buf))
THEN
2619#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2620 dummy_ptr_z => fft_scratch%p3buf(1, 1)
2623 CALL fft_dealloc(fft_scratch%p3buf)
2626 IF (
ASSOCIATED(fft_scratch%p4buf))
THEN
2627#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2628 dummy_ptr_z => fft_scratch%p4buf(1, 1)
2631 CALL fft_dealloc(fft_scratch%p4buf)
2634 IF (
ASSOCIATED(fft_scratch%p5buf))
THEN
2635#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2636 dummy_ptr_z => fft_scratch%p5buf(1, 1)
2639 CALL fft_dealloc(fft_scratch%p5buf)
2642 IF (
ASSOCIATED(fft_scratch%p6buf))
THEN
2643 CALL fft_dealloc(fft_scratch%p6buf)
2645 IF (
ASSOCIATED(fft_scratch%p7buf))
THEN
2646#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2647 dummy_ptr_z => fft_scratch%p7buf(1, 1)
2650 CALL fft_dealloc(fft_scratch%p7buf)
2653 IF (
ASSOCIATED(fft_scratch%r1buf))
THEN
2654#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2655 dummy_ptr_z => fft_scratch%r1buf(1, 1)
2658 CALL fft_dealloc(fft_scratch%r1buf)
2661 IF (
ASSOCIATED(fft_scratch%r2buf))
THEN
2662 CALL fft_dealloc(fft_scratch%r2buf)
2664 IF (
ASSOCIATED(fft_scratch%tbuf))
THEN
2665#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2666 dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
2669 CALL fft_dealloc(fft_scratch%tbuf)
2672 IF (
ASSOCIATED(fft_scratch%a1buf))
THEN
2673 CALL fft_dealloc(fft_scratch%a1buf)
2675 IF (
ASSOCIATED(fft_scratch%a2buf))
THEN
2676 CALL fft_dealloc(fft_scratch%a2buf)
2678 IF (
ASSOCIATED(fft_scratch%a3buf))
THEN
2679 CALL fft_dealloc(fft_scratch%a3buf)
2681 IF (
ASSOCIATED(fft_scratch%a4buf))
THEN
2682 CALL fft_dealloc(fft_scratch%a4buf)
2684 IF (
ASSOCIATED(fft_scratch%a5buf))
THEN
2685 CALL fft_dealloc(fft_scratch%a5buf)
2687 IF (
ASSOCIATED(fft_scratch%a6buf))
THEN
2688 CALL fft_dealloc(fft_scratch%a6buf)
2690 IF (
ASSOCIATED(fft_scratch%scount))
THEN
2691 DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
2692 fft_scratch%sdispl, fft_scratch%rdispl)
2694 IF (
ASSOCIATED(fft_scratch%rr))
THEN
2695 DEALLOCATE (fft_scratch%rr)
2697 IF (
ASSOCIATED(fft_scratch%xzbuf))
THEN
2698 DEALLOCATE (fft_scratch%xzbuf)
2700 IF (
ASSOCIATED(fft_scratch%yzbuf))
THEN
2701 DEALLOCATE (fft_scratch%yzbuf)
2703 IF (
ASSOCIATED(fft_scratch%xzbuf_sgl))
THEN
2704 DEALLOCATE (fft_scratch%xzbuf_sgl)
2706 IF (
ASSOCIATED(fft_scratch%yzbuf_sgl))
THEN
2707 DEALLOCATE (fft_scratch%yzbuf_sgl)
2709 IF (
ASSOCIATED(fft_scratch%ss))
THEN
2710 DEALLOCATE (fft_scratch%ss)
2712 IF (
ASSOCIATED(fft_scratch%tt))
THEN
2713 DEALLOCATE (fft_scratch%tt)
2715 IF (
ASSOCIATED(fft_scratch%pgrid))
THEN
2716 DEALLOCATE (fft_scratch%pgrid)
2718 IF (
ASSOCIATED(fft_scratch%pgcube))
THEN
2719 DEALLOCATE (fft_scratch%pgcube)
2721 IF (
ASSOCIATED(fft_scratch%xcor))
THEN
2722 DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
2724 IF (
ASSOCIATED(fft_scratch%pzcoord))
THEN
2725 DEALLOCATE (fft_scratch%pzcoord)
2727 IF (
ASSOCIATED(fft_scratch%xzcount))
THEN
2728 DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
2729 DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
2731 fft_scratch%rsratio = 1._dp
2733 IF (
ASSOCIATED(fft_scratch%rbuf1))
THEN
2734 DEALLOCATE (fft_scratch%rbuf1)
2736 IF (
ASSOCIATED(fft_scratch%rbuf2))
THEN
2737 DEALLOCATE (fft_scratch%rbuf2)
2739 IF (
ASSOCIATED(fft_scratch%rbuf3))
THEN
2740 DEALLOCATE (fft_scratch%rbuf3)
2742 IF (
ASSOCIATED(fft_scratch%rbuf4))
THEN
2743 DEALLOCATE (fft_scratch%rbuf4)
2745 IF (
ASSOCIATED(fft_scratch%rbuf5))
THEN
2746 DEALLOCATE (fft_scratch%rbuf5)
2748 IF (
ASSOCIATED(fft_scratch%rbuf6))
THEN
2749 DEALLOCATE (fft_scratch%rbuf6)
2753 CALL fft_scratch%cart_sub_comm(1)%free()
2756 CALL fft_scratch%cart_sub_comm(2)%free()
2766 END SUBROUTINE deallocate_fft_scratch_type
2771 SUBROUTINE release_fft_scratch_pool()
2780 IF (
ASSOCIATED(fft_scratch))
THEN
2781 fft_scratch_current => fft_scratch
2782 fft_scratch => fft_scratch_current%fft_scratch_next
2783 NULLIFY (fft_scratch_current%fft_scratch_next)
2785 CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
2787 DEALLOCATE (fft_scratch_current%fft_scratch)
2788 DEALLOCATE (fft_scratch_current)
2796 END SUBROUTINE release_fft_scratch_pool
2801 SUBROUTINE resize_fft_scratch_pool()
2803 INTEGER :: last_tick, nscratch
2808 last_tick = huge(last_tick)
2809 NULLIFY (fft_scratch_old)
2814 IF (
ASSOCIATED(fft_scratch_current))
THEN
2815 nscratch = nscratch + 1
2817 IF (.NOT. fft_scratch_current%fft_scratch%in_use)
THEN
2818 IF (fft_scratch_current%fft_scratch%last_tick < last_tick)
THEN
2819 last_tick = fft_scratch_current%fft_scratch%last_tick
2820 fft_scratch_old => fft_scratch_current
2823 fft_scratch_current => fft_scratch_current%fft_scratch_next
2832 IF (
ASSOCIATED(fft_scratch_old))
THEN
2835 IF (
ASSOCIATED(fft_scratch_current))
THEN
2837 IF (
ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old))
THEN
2839 fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
2842 CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
2843 DEALLOCATE (fft_scratch_old%fft_scratch)
2844 DEALLOCATE (fft_scratch_old)
2847 fft_scratch_current => fft_scratch_current%fft_scratch_next
2855 cpwarn(
"The number of the scratches exceeded the limit, but none could be deallocated")
2859 END SUBROUTINE resize_fft_scratch_pool
2870 INTEGER,
INTENT(IN) :: tf_type
2871 INTEGER,
DIMENSION(:),
INTENT(IN) :: n
2873 OPTIONAL :: fft_sizes
2875 CHARACTER(len=*),
PARAMETER :: routinen =
'get_fft_scratch'
2877 INTEGER :: coord(2), dim(2), handle, i, ix, iz, lg, lmax, m1, m2, &
2878 mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
2879 nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
2880 INTEGER,
DIMENSION(3) :: pcoord
2882 LOGICAL,
DIMENSION(2) :: dims
2886#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2888 INTEGER(KIND=C_SIZE_T) :: length
2889 TYPE(c_ptr) :: cptr_r1buf, cptr_tbuf, &
2890 cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
2892 CALL timeset(routinen, handle)
2896 CALL resize_fft_scratch_pool()
2903 IF (
ASSOCIATED(fft_scratch_current))
THEN
2904 IF (fft_scratch_current%fft_scratch%in_use)
THEN
2905 fft_scratch_last => fft_scratch_current
2906 fft_scratch_current => fft_scratch_current%fft_scratch_next
2909 IF (tf_type /= fft_scratch_current%fft_scratch%tf_type)
THEN
2910 fft_scratch_last => fft_scratch_current
2911 fft_scratch_current => fft_scratch_current%fft_scratch_next
2914 IF (.NOT. all(n == fft_scratch_current%fft_scratch%nfft))
THEN
2915 fft_scratch_last => fft_scratch_current
2916 fft_scratch_current => fft_scratch_current%fft_scratch_next
2919 IF (
PRESENT(fft_sizes))
THEN
2920 IF (fft_sizes%rs_group /= fft_scratch_current%fft_scratch%group)
THEN
2921 fft_scratch_last => fft_scratch_current
2922 fft_scratch_current => fft_scratch_current%fft_scratch_next
2925 CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
2926 IF (.NOT. equal)
THEN
2927 fft_scratch_last => fft_scratch_current
2928 fft_scratch_current => fft_scratch_current%fft_scratch_next
2933 fft_scratch => fft_scratch_current%fft_scratch
2934 fft_scratch_current%fft_scratch%in_use = .true.
2939 ALLOCATE (fft_scratch_new)
2940 ALLOCATE (fft_scratch_new%fft_scratch)
2942 IF (tf_type .NE. 400)
THEN
2943 fft_scratch_new%fft_scratch%sizes = fft_sizes
2944 np = fft_sizes%numtask
2945 ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
2946 fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
2947 fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
2950 SELECT CASE (tf_type)
2952 cpabort(
"Invalid scratch type.")
2954 cpassert(
PRESENT(fft_sizes))
2961 CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
2962 CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
2963 CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
2964 CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
2965 CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
2966 CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
2967 fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
2969 dim = fft_sizes%rs_group%num_pe_cart
2970 pos = fft_sizes%rs_group%mepos_cart
2971 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
2972 fft_scratch_new%fft_scratch%dim = dim
2973 fft_scratch_new%fft_scratch%pos = pos
2974 mcz1 = fft_sizes%mcz1
2975 mcx2 = fft_sizes%mcx2
2976 mcz2 = fft_sizes%mcz2
2977 mcy3 = fft_sizes%mcy3
2978 ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
2979 ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
2980 ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:dim(1) - 1))
2981 ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:dim(1) - 1))
2983 dims = (/.true., .false./)
2984 CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
2985 dims = (/.false., .true./)
2986 CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
2989 DO i = 0, dim(1) - 1
2990 coord = (/i, pos(2)/)
2991 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
2993 DO i = 0, dim(2) - 1
2994 coord = (/pos(1), i/)
2995 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3000 fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf,
fft_plan_style)
3002 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf,
fft_plan_style)
3004 fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf,
fft_plan_style)
3006 fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf,
fft_plan_style)
3008 fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf,
fft_plan_style)
3010 fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf,
fft_plan_style)
3013 cpassert(
PRESENT(fft_sizes))
3019 CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
3020 CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
3021 fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
3022 CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
3023 CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
3024 CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
3025 CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
3027 dim = fft_sizes%rs_group%num_pe_cart
3028 pos = fft_sizes%rs_group%mepos_cart
3029 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3030 fft_scratch_new%fft_scratch%dim = dim
3031 fft_scratch_new%fft_scratch%pos = pos
3032 mcy3 = fft_sizes%mcy3
3033 ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:dim(1) - 1))
3034 ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:dim(1) - 1))
3038 fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf,
fft_plan_style)
3040 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf,
fft_plan_style)
3042 fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf,
fft_plan_style)
3044 fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf,
fft_plan_style)
3046 fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf,
fft_plan_style)
3048 fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf,
fft_plan_style)
3051 cpassert(
PRESENT(fft_sizes))
3056 lmax = fft_sizes%lmax
3057 mmax = fft_sizes%mmax
3060 np = fft_sizes%numtask
3061 nmray = fft_sizes%nmray
3062 nyzray = fft_sizes%nyzray
3063#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3064 length = int(2*
dp_size*max(mmax, 1)*max(lmax, 1), kind=c_size_t)
3067 CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/max(mmax, 1), max(lmax, 1)/))
3068 length = int(2*
dp_size*max(ny, 1)*max(nz, 1)*max(nx, 1), kind=c_size_t)
3071 CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/max(ny, 1), max(nz, 1), max(nx, 1)/))
3073 CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
3074 CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
3076 fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
3077 CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
3079 IF (alltoall_sgl)
THEN
3080 ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
3081 ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
3083 ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
3088 fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf,
fft_plan_style)
3090 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf,
fft_plan_style)
3092 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf,
fft_plan_style)
3094 fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf,
fft_plan_style)
3096 fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf,
fft_plan_style)
3098 fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf,
fft_plan_style)
3101 cpassert(
PRESENT(fft_sizes))
3106 mcx2 = fft_sizes%mcx2
3109 nmax = fft_sizes%nmax
3110 nmray = fft_sizes%nmray
3111 nyzray = fft_sizes%nyzray
3112 m1 = fft_sizes%r_dim(1)
3113 m2 = fft_sizes%r_dim(2)
3116 CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
3117 CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
3118#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3119 length = int(2*
dp_size*max(n(3), 1)*max(mx1*my1, 1), kind=c_size_t)
3122 CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/max(n(3), 1), max(mx1*my1, 1)/))
3123 length = int(2*
dp_size*max(mx2*mz2, 1)*max(n(2), 1), kind=c_size_t)
3126 CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/max(mx2*mz2, 1), max(n(2), 1)/))
3127 length = int(2*
dp_size*max(n(2), 1)*max(mx2*mz2, 1), kind=c_size_t)
3130 CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/max(n(2), 1), max(mx2*mz2, 1)/))
3131 length = int(2*
dp_size*max(nyzray, 1)*max(n(1), 1), kind=c_size_t)
3134 CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/max(nyzray, 1), max(n(1), 1)/))
3135 length = int(2*
dp_size*max(mg, 1)*max(lg, 1), kind=c_size_t)
3138 CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/max(mg, 1), max(lg, 1)/))
3140 CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
3141 CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
3142 CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
3143 CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
3144 CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
3146 IF (alltoall_sgl)
THEN
3147 ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
3148 ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
3150 ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
3151 ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
3153 ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
3154 ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
3155 ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
3156 ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
3157 ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
3158 fft_scratch_new%fft_scratch%yzcount(0:np - 1))
3159 ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
3160 fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
3161 fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
3163 dim = fft_sizes%rs_group%num_pe_cart
3164 pos = fft_sizes%rs_group%mepos_cart
3165 fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3166 fft_scratch_new%fft_scratch%dim = dim
3167 fft_scratch_new%fft_scratch%pos = pos
3168 mcz1 = fft_sizes%mcz1
3169 mcz2 = fft_sizes%mcz2
3170 ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:dim(2) - 1))
3171 ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:dim(2) - 1))
3173 dims = (/.false., .true./)
3174 CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
3177 DO i = 0, dim(2) - 1
3178 coord = (/pos(1), i/)
3179 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3186 CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
3192 CALL fft_sizes%rs_group%coords(i, pcoord)
3193 fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
3198 fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf,
fft_plan_style)
3200 fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf,
fft_plan_style)
3202 fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf,
fft_plan_style)
3204 fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf,
fft_plan_style)
3206 fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf,
fft_plan_style)
3208 fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf,
fft_plan_style)
3212 CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
3213 CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
3217 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr,
fft_plan_style)
3219 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr,
fft_plan_style)
3222 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr,
fft_plan_style)
3224 fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr,
fft_plan_style)
3228 NULLIFY (fft_scratch_new%fft_scratch_next)
3229 fft_scratch_new%fft_scratch%fft_scratch_id = &
3230 fft_scratch_last%fft_scratch%fft_scratch_id + 1
3231 fft_scratch_new%fft_scratch%in_use = .true.
3232 fft_scratch_new%fft_scratch%nfft = n
3233 fft_scratch_last%fft_scratch_next => fft_scratch_new
3234 fft_scratch_new%fft_scratch%tf_type = tf_type
3235 fft_scratch => fft_scratch_new%fft_scratch
3244 CALL timestop(handle)
3256 INTEGER :: scratch_id
3259 scratch_id = fft_scratch%fft_scratch_id
3263 IF (
ASSOCIATED(fft_scratch_current))
THEN
3264 IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id)
THEN
3265 fft_scratch%in_use = .false.
3266 NULLIFY (fft_scratch)
3269 fft_scratch_current => fft_scratch_current%fft_scratch_next
3272 cpabort(
"Invalid scratch type.")
3289 SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
3290 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: rs
3291 INTEGER,
DIMENSION(:),
POINTER :: scount, sdispl
3292 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: rq
3293 INTEGER,
DIMENSION(:),
POINTER :: rcount, rdispl
3297 COMPLEX(KIND=dp),
DIMENSION(:),
POINTER :: msgin, msgout
3298 INTEGER :: ip, n, nr, ns, pos
3304 ALLOCATE (sreq(0:n - 1))
3305 ALLOCATE (rreq(0:n - 1))
3308 IF (rcount(ip) == 0) cycle
3309 IF (ip == pos) cycle
3310 msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
3311 CALL group%irecv(msgout, ip, rreq(nr))
3316 IF (scount(ip) == 0) cycle
3317 IF (ip == pos) cycle
3318 msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
3319 CALL group%isend(msgin, ip, sreq(ns))
3322 IF (rcount(pos) /= 0)
THEN
3323 IF (rcount(pos) /= scount(pos)) cpabort(
"Invalid count.")
3324 rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
3332 END SUBROUTINE sparse_alltoall
3341 SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
3347 equal = equal .AND. fft_size_1%nx == fft_size_2%nx
3348 equal = equal .AND. fft_size_1%ny == fft_size_2%ny
3349 equal = equal .AND. fft_size_1%nz == fft_size_2%nz
3351 equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
3352 equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
3353 equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
3355 equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
3356 equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
3357 equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
3359 equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
3360 equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
3361 equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
3363 equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
3364 equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
3365 equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
3366 equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
3368 equal = equal .AND. fft_size_1%lg == fft_size_2%lg
3369 equal = equal .AND. fft_size_1%mg == fft_size_2%mg
3371 equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
3372 equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
3374 equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
3375 equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
3377 equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
3379 equal = equal .AND. all(fft_size_1%g_pos == fft_size_2%g_pos)
3380 equal = equal .AND. all(fft_size_1%r_pos == fft_size_2%r_pos)
3381 equal = equal .AND. all(fft_size_1%r_dim == fft_size_2%r_dim)
3383 equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
3385 END SUBROUTINE is_equal
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
subroutine, public fft_do_init(fft_type, wisdom_file)
...
subroutine, public fft_3d(plan, scale, zin, zout, stat)
...
subroutine, public fft_create_plan_1dm(plan, fft_type, fsign, trans, n, m, zin, zout, plan_style)
...
subroutine, public fft_destroy_plan(plan)
...
integer function, public fft_library(fftlib)
Interface to FFT libraries.
subroutine, public fft_1dm(plan, zin, zout, scale, stat)
...
subroutine, public fft_get_lengths(fft_type, data, max_length)
...
subroutine, public fft_create_plan_3d(plan, fft_type, fft_in_place, fsign, n, zin, zout, plan_style)
...
subroutine, public fft_do_cleanup(fft_type, wisdom_file, ionode)
...
Type to store data about a (1D or 3D) FFT, including FFTW plan.
Defines the basic variable types.
integer, parameter, public dp_size
integer, parameter, public dp
integer, parameter, public sp
Definition of mathematical constants and functions.
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
type(mp_comm_type), parameter, public mp_comm_null
Fortran API for the offload package, which is written in C.
integer function, public offload_free_pinned_mem(buffer)
free pinned memory
integer function, public offload_malloc_pinned_mem(buffer, length)
allocate pinned memory.