45#include "../base/base_uses.f90"
76 LOGICAL,
PRIVATE,
PARAMETER :: debug_this_module = .false.
77 CHARACTER(len=*),
PARAMETER,
PRIVATE :: modulen =
'realspace_grid_types'
82 INTEGER :: distribution_layout(3) = -1
83 REAL(kind=
dp) :: memory_factor = 0.0_dp
84 LOGICAL :: lock_distribution = .false.
86 REAL(kind=
dp) :: halo_reduction_factor = 1.0_dp
93 INTEGER :: ref_count = 0
95 INTEGER(int_8) :: ngpts = 0_int_8
96 INTEGER,
DIMENSION(3) :: npts = 0
97 INTEGER,
DIMENSION(3) :: lb = 0
98 INTEGER,
DIMENSION(3) :: ub = 0
100 INTEGER :: border = 0
102 INTEGER,
DIMENSION(3) :: perd = -1
103 REAL(kind=
dp),
DIMENSION(3, 3) :: dh = 0.0_dp
104 REAL(kind=
dp),
DIMENSION(3, 3) :: dh_inv = 0.0_dp
105 LOGICAL :: orthorhombic = .true.
107 LOGICAL :: parallel = .true.
108 LOGICAL :: distributed = .true.
112 INTEGER :: my_pos = -1
113 INTEGER :: group_size = 0
114 INTEGER,
DIMENSION(3) :: group_dim = -1
115 INTEGER,
DIMENSION(3) :: group_coor = -1
116 INTEGER,
DIMENSION(3) :: neighbours = -1
119 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: lb_global
120 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: ub_global
122 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: rank2coord
123 INTEGER,
DIMENSION(:, :, :),
ALLOCATABLE :: coord2rank
125 INTEGER,
DIMENSION(:),
ALLOCATABLE :: x2coord
126 INTEGER,
DIMENSION(:),
ALLOCATABLE :: y2coord
127 INTEGER,
DIMENSION(:),
ALLOCATABLE :: z2coord
129 INTEGER :: my_virtual_pos = -1
130 INTEGER,
DIMENSION(3) :: virtual_group_coor = -1
132 INTEGER,
DIMENSION(:),
ALLOCATABLE :: virtual2real, real2virtual
140 INTEGER :: ngpts_local = -1
141 INTEGER,
DIMENSION(3) :: npts_local = -1
142 INTEGER,
DIMENSION(3) :: lb_local = -1
143 INTEGER,
DIMENSION(3) :: ub_local = -1
144 INTEGER,
DIMENSION(3) :: lb_real = -1
145 INTEGER,
DIMENSION(3) :: ub_real = -1
147 INTEGER,
DIMENSION(:),
ALLOCATABLE :: px, py, pz
149 REAL(kind=
dp),
DIMENSION(:, :, :),
CONTIGUOUS,
POINTER :: r => null()
174 INTEGER,
INTENT(IN) :: rank_in
175 INTEGER,
DIMENSION(3),
INTENT(IN) :: shift
180 coord =
modulo(rs_desc%rank2coord(:, rank_in) + shift, rs_desc%group_dim)
181 rank_out = rs_desc%coord2rank(coord(1), coord(2), coord(3))
205 INTEGER,
INTENT(IN),
OPTIONAL :: border_points
207 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_grid_create_descriptor'
209 INTEGER :: border_size, dir, handle, i, j, k, l, &
210 lb(2), min_npts_real, n_slices(3), &
211 n_slices_tmp(3), nmin
213 REAL(kind=
dp) :: ratio, ratio_best, volume, volume_dist
215 CALL timeset(routinen, handle)
217 IF (
PRESENT(border_points))
THEN
218 border_size = border_points
225 CALL pw_grid%para%group%sync()
231 desc%dh_inv = pw_grid%dh_inv
232 desc%orthorhombic = pw_grid%orthorhombic
238 desc%npts = pw_grid%npts
239 desc%ngpts = product(int(desc%npts, kind=
int_8))
240 desc%lb = pw_grid%bounds(1, :)
241 desc%ub = pw_grid%bounds(2, :)
242 desc%border = border_size
243 IF (border_size == 0)
THEN
248 desc%parallel = .false.
249 desc%distributed = .false.
258 desc%group_size = pw_grid%para%group%num_pe
259 desc%npts = pw_grid%npts
260 desc%ngpts = product(int(desc%npts, kind=
int_8))
261 desc%lb = pw_grid%bounds(1, :)
262 desc%ub = pw_grid%bounds(2, :)
265 IF (border_size == 0)
THEN
266 nmin = (input_settings%nsmax + 1)/2
267 nmin = max(0, nint(nmin*input_settings%halo_reduction_factor))
276 IF (border_size > 0)
THEN
277 CALL cp_abort(__location__, &
278 "An explicit border size > 0 is not yet working for "// &
279 "replicated realspace grids. Request DISTRIBUTION_TYPE "// &
280 "distributed for RS_GRID explicitly.")
286 ratio_best = -huge(ratio_best)
289 DO k = 1, min(desc%npts(3), desc%group_size)
290 DO j = 1, min(desc%npts(2), desc%group_size)
291 i = min(desc%npts(1), desc%group_size/(j*k))
292 n_slices_tmp = (/i, j, k/)
295 IF (product(n_slices_tmp) .NE. desc%group_size) cycle
299 IF (.NOT. all(pack(n_slices_tmp == input_settings%distribution_layout, &
300 (/-1, -1, -1/) /= input_settings%distribution_layout) &
307 IF (n_slices_tmp(dir) > 1)
THEN
308 DO l = 0, n_slices_tmp(dir) - 1
309 lb =
get_limit(desc%npts(dir), n_slices_tmp(dir), l)
310 IF (lb(2) - lb(1) + 1 + 2*nmin > desc%npts(dir)) overlap = .true.
320 ratio = product(real(desc%npts, kind=
dp)/n_slices_tmp)/ &
321 product(real(desc%npts, kind=
dp)/n_slices_tmp + &
322 merge((/0.0, 0.0, 0.0/), 2*(/1.06*nmin, 1.05*nmin, 1.03*nmin/), n_slices_tmp == (/1, 1, 1/)))
323 IF (ratio > ratio_best)
THEN
325 n_slices = n_slices_tmp
334 volume = product(real(desc%npts, kind=
dp))
335 volume_dist = product(real(desc%npts, kind=
dp)/n_slices + &
336 merge((/0, 0, 0/), 2*(/nmin, nmin, nmin/), n_slices == (/1, 1, 1/)))
337 IF (volume < volume_dist*input_settings%memory_factor)
THEN
344 desc%group_dim(:) = n_slices(:)
345 CALL desc%group%from_dup(pw_grid%para%group)
346 desc%group_size = desc%group%num_pe
347 desc%my_pos = desc%group%mepos
349 IF (all(n_slices == 1))
THEN
352 desc%border = border_size
353 IF (border_size == 0)
THEN
358 desc%distributed = .false.
359 desc%parallel = .true.
360 desc%group_coor(:) = 0
361 desc%my_virtual_pos = 0
363 ALLOCATE (desc%virtual2real(0:desc%group_size - 1))
364 ALLOCATE (desc%real2virtual(0:desc%group_size - 1))
366 DO i = 0, desc%group_size - 1
367 desc%virtual2real(i) = i
368 desc%real2virtual(i) = i
373 IF (border_size == 0)
THEN
376 IF (n_slices(dir) > 1) desc%perd(dir) = 0
384 desc%parallel = .true.
385 desc%distributed = .true.
388 ALLOCATE (desc%rank2coord(3, 0:desc%group_size - 1))
389 ALLOCATE (desc%coord2rank(0:desc%group_dim(1) - 1, 0:desc%group_dim(2) - 1, 0:desc%group_dim(3) - 1))
390 ALLOCATE (desc%lb_global(3, 0:desc%group_size - 1))
391 ALLOCATE (desc%ub_global(3, 0:desc%group_size - 1))
392 ALLOCATE (desc%x2coord(desc%lb(1):desc%ub(1)))
393 ALLOCATE (desc%y2coord(desc%lb(2):desc%ub(2)))
394 ALLOCATE (desc%z2coord(desc%lb(3):desc%ub(3)))
396 DO i = 0, desc%group_size - 1
398 desc%rank2coord(1, i) = i/(desc%group_dim(2)*desc%group_dim(3))
399 desc%rank2coord(2, i) =
modulo(i, desc%group_dim(2)*desc%group_dim(3)) &
401 desc%rank2coord(3, i) =
modulo(i, desc%group_dim(3))
403 IF (i == desc%my_pos)
THEN
404 desc%group_coor = desc%rank2coord(:, i)
407 desc%coord2rank(desc%rank2coord(1, i), desc%rank2coord(2, i), desc%rank2coord(3, i)) = i
409 desc%lb_global(:, i) = desc%lb
410 desc%ub_global(:, i) = desc%ub
412 IF (desc%group_dim(dir) .GT. 1)
THEN
413 lb =
get_limit(desc%npts(dir), desc%group_dim(dir), desc%rank2coord(dir, i))
414 desc%lb_global(dir, i) = lb(1) + desc%lb(dir) - 1
415 desc%ub_global(dir, i) = lb(2) + desc%lb(dir) - 1
422 DO l = 0, desc%group_dim(dir) - 1
423 IF (desc%group_dim(dir) .GT. 1)
THEN
424 lb =
get_limit(desc%npts(dir), desc%group_dim(dir), l)
425 lb = lb + desc%lb(dir) - 1
432 desc%x2coord(lb(1):lb(2)) = l
434 desc%y2coord(lb(1):lb(2)) = l
436 desc%z2coord(lb(1):lb(2)) = l
443 desc%neighbours(dir) = 0
444 IF ((n_slices(dir) > 1) .OR. (border_size > 0))
THEN
445 min_npts_real = huge(0)
446 DO l = 0, n_slices(dir) - 1
447 lb =
get_limit(desc%npts(dir), n_slices(dir), l)
448 min_npts_real = min(lb(2) - lb(1) + 1, min_npts_real)
450 desc%neighbours(dir) = (desc%border + min_npts_real - 1)/min_npts_real
454 ALLOCATE (desc%virtual2real(0:desc%group_size - 1))
455 ALLOCATE (desc%real2virtual(0:desc%group_size - 1))
457 DO i = 0, desc%group_size - 1
458 desc%virtual2real(i) = i
459 desc%real2virtual(i) = i
462 desc%my_virtual_pos = desc%real2virtual(desc%my_pos)
463 desc%virtual_group_coor(:) = desc%rank2coord(:, desc%my_virtual_pos)
468 CALL timestop(handle)
482 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_grid_create'
486 CALL timeset(routinen, handle)
496 rs%lb_local = rs%lb_real - desc%border*(1 - desc%perd)
497 rs%ub_local = rs%ub_real + desc%border*(1 - desc%perd)
498 rs%npts_local = rs%ub_local - rs%lb_local + 1
499 rs%ngpts_local = product(rs%npts_local)
502 IF (all(rs%desc%group_dim == 1))
THEN
507 rs%lb_local = rs%lb_real - desc%border*(1 - desc%perd)
508 rs%ub_local = rs%ub_real + desc%border*(1 - desc%perd)
509 rs%npts_local = rs%ub_local - rs%lb_local + 1
510 rs%ngpts_local = product(rs%npts_local)
514 rs%lb_real = desc%lb_global(:, desc%my_virtual_pos)
515 rs%ub_real = desc%ub_global(:, desc%my_virtual_pos)
516 rs%lb_local = rs%lb_real - desc%border*(1 - desc%perd)
517 rs%ub_local = rs%ub_real + desc%border*(1 - desc%perd)
518 rs%npts_local = rs%ub_local - rs%lb_local + 1
519 rs%ngpts_local = product(rs%npts_local)
523 rs%r(rs%lb_local(1):rs%ub_local(1), &
524 rs%lb_local(2):rs%ub_local(2), &
525 rs%lb_local(3):rs%ub_local(3)) => rs%buffer%host_buffer
527 ALLOCATE (rs%px(desc%npts(1)))
528 ALLOCATE (rs%py(desc%npts(2)))
529 ALLOCATE (rs%pz(desc%npts(3)))
531 CALL timestop(handle)
554 INTEGER,
DIMENSION(:),
INTENT(IN) :: real2virtual
558 desc%real2virtual(:) = real2virtual
560 DO i = 0, desc%group_size - 1
561 desc%virtual2real(desc%real2virtual(i)) = i
564 desc%my_virtual_pos = desc%real2virtual(desc%my_pos)
566 IF (.NOT. all(desc%group_dim == 1))
THEN
567 desc%virtual_group_coor(:) = desc%rank2coord(:, desc%my_virtual_pos)
580 INTEGER,
INTENT(in) :: iounit
582 INTEGER :: dir, i, nn
583 REAL(kind=
dp) :: pp(3)
585 IF (rs%desc%parallel)
THEN
587 WRITE (iounit,
'(/,A,T71,I10)') &
588 " RS_GRID| Information for grid number ", rs%desc%pw%id_nr
590 WRITE (iounit,
'(A,I3,T30,2I8,T62,A,T71,I10)')
" RS_GRID| Bounds ", &
591 i, rs%desc%lb(i), rs%desc%ub(i),
"Points:", rs%desc%npts(i)
593 IF (.NOT. rs%desc%distributed)
THEN
594 WRITE (iounit,
'(A)')
" RS_GRID| Real space fully replicated"
595 WRITE (iounit,
'(A,T71,I10)') &
596 " RS_GRID| Group size ", rs%desc%group_dim(2)
599 IF (rs%desc%perd(dir) /= 1)
THEN
600 WRITE (iounit,
'(A,T71,I3,A)') &
601 " RS_GRID| Real space distribution over ", rs%desc%group_dim(dir),
" groups"
602 WRITE (iounit,
'(A,T71,I10)') &
603 " RS_GRID| Real space distribution along direction ", dir
604 WRITE (iounit,
'(A,T71,I10)') &
605 " RS_GRID| Border size ", rs%desc%border
610 IF (rs%desc%distributed)
THEN
612 IF (rs%desc%perd(dir) /= 1)
THEN
613 nn = rs%npts_local(dir)
614 CALL rs%desc%group%sum(nn)
615 pp(1) = real(nn, kind=
dp)/real(product(rs%desc%group_dim), kind=
dp)
616 nn = rs%npts_local(dir)
617 CALL rs%desc%group%max(nn)
618 pp(2) = real(nn, kind=
dp)
619 nn = rs%npts_local(dir)
620 CALL rs%desc%group%min(nn)
621 pp(3) = real(nn, kind=
dp)
623 WRITE (iounit,
'(A,T48,A)')
" RS_GRID| Distribution", &
625 WRITE (iounit,
'(A,T45,F12.1,2I12)')
" RS_GRID| Planes ", &
626 pp(1), nint(pp(2)), nint(pp(3))
634 WRITE (iounit,
'(/,A,T71,I10)') &
635 " RS_GRID| Information for grid number ", rs%desc%pw%id_nr
637 WRITE (iounit,
'(A,I3,T30,2I8,T62,A,T71,I10)')
" RS_GRID| Bounds ", &
638 i, rs%desc%lb(i), rs%desc%ub(i),
"Points:", rs%desc%npts(i)
655 CHARACTER(len=*),
PARAMETER :: routinen =
'transfer_rs2pw'
657 INTEGER :: handle, handle2, i
659 CALL timeset(routinen, handle2)
660 CALL timeset(routinen//
"_"//trim(adjustl(
cp_to_string(ceiling(pw%pw_grid%cutoff/10)*10))), handle)
662 IF (.NOT.
ASSOCIATED(rs%desc%pw, pw%pw_grid)) &
663 cpabort(
"Different rs and pw indentifiers")
665 IF (rs%desc%distributed)
THEN
666 CALL transfer_rs2pw_distributed(rs, pw)
667 ELSE IF (rs%desc%parallel)
THEN
668 CALL transfer_rs2pw_replicated(rs, pw)
670 IF (rs%desc%border == 0)
THEN
671 CALL dcopy(
SIZE(rs%r), rs%r, 1, pw%array, 1)
673 cpassert(lbound(pw%array, 3) .EQ. rs%lb_real(3))
675 DO i = rs%lb_real(3), rs%ub_real(3)
676 pw%array(:, :, i) = rs%r(rs%lb_real(1):rs%ub_real(1), &
677 rs%lb_real(2):rs%ub_real(2), i)
683 CALL timestop(handle)
684 CALL timestop(handle2)
698 CHARACTER(len=*),
PARAMETER :: routinen =
'transfer_pw2rs'
700 INTEGER :: handle, handle2, i, im, j, jm, k, km
702 CALL timeset(routinen, handle2)
703 CALL timeset(routinen//
"_"//trim(adjustl(
cp_to_string(ceiling(pw%pw_grid%cutoff/10)*10))), handle)
705 IF (.NOT.
ASSOCIATED(rs%desc%pw, pw%pw_grid)) &
706 cpabort(
"Different rs and pw indentifiers")
708 IF (rs%desc%distributed)
THEN
709 CALL transfer_pw2rs_distributed(rs, pw)
710 ELSE IF (rs%desc%parallel)
THEN
711 CALL transfer_pw2rs_replicated(rs, pw)
713 IF (rs%desc%border == 0)
THEN
714 CALL dcopy(
SIZE(rs%r), pw%array, 1, rs%r, 1)
719 DO k = rs%lb_local(3), rs%ub_local(3)
720 IF (k < rs%lb_real(3))
THEN
721 km = k + rs%desc%npts(3)
722 ELSE IF (k > rs%ub_real(3))
THEN
723 km = k - rs%desc%npts(3)
727 DO j = rs%lb_local(2), rs%ub_local(2)
728 IF (j < rs%lb_real(2))
THEN
729 jm = j + rs%desc%npts(2)
730 ELSE IF (j > rs%ub_real(2))
THEN
731 jm = j - rs%desc%npts(2)
735 DO i = rs%lb_local(1), rs%ub_local(1)
736 IF (i < rs%lb_real(1))
THEN
737 im = i + rs%desc%npts(1)
738 ELSE IF (i > rs%ub_real(1))
THEN
739 im = i - rs%desc%npts(1)
743 rs%r(i, j, k) = pw%array(im, jm, km)
751 CALL timestop(handle)
752 CALL timestop(handle2)
761 SUBROUTINE transfer_rs2pw_replicated(rs, pw)
765 INTEGER :: dest, ii, ip, ix, iy, iz, nma, nn, s(3), &
767 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: rcount
768 INTEGER,
DIMENSION(3) :: lb, ub
769 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: recvbuf, sendbuf, swaparray
771 associate(np => pw%pw_grid%para%group%num_pe, bo => pw%pw_grid%para%bo(1:2, 1:3, 0:pw%pw_grid%para%group%num_pe - 1, 1), &
772 pbo => pw%pw_grid%bounds, group => pw%pw_grid%para%group, mepos => pw%pw_grid%para%group%mepos, &
774 ALLOCATE (rcount(0:np - 1))
776 rcount(ip - 1) = product(bo(2, :, ip) - bo(1, :, ip) + 1)
778 nma = maxval(rcount(0:np - 1))
779 ALLOCATE (sendbuf(nma), recvbuf(nma))
780 sendbuf = 1.0e99_dp; recvbuf = 1.0e99_dp
785 dest =
modulo(mepos + 1, np)
786 source =
modulo(mepos - 1, np)
791 lb = pbo(1, :) + bo(1, :,
modulo(mepos - ip, np) + 1) - 1
792 ub = pbo(1, :) + bo(2, :,
modulo(mepos - ip, np) + 1) - 1
801 ii = (iz - lb(3))*s(1)*s(2) + (ix - lb(1)) + 1
803 sendbuf(ii) = sendbuf(ii) + grid(ix, iy, iz)
809 CALL group%sendrecv(sendbuf, dest, recvbuf, source, 13)
810 CALL move_alloc(sendbuf, swaparray)
811 CALL move_alloc(recvbuf, sendbuf)
812 CALL move_alloc(swaparray, recvbuf)
817 CALL dcopy(nn, sendbuf, 1, pw%array, 1)
823 END SUBROUTINE transfer_rs2pw_replicated
830 SUBROUTINE transfer_pw2rs_replicated(rs, pw)
832 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw
834 INTEGER :: dest, i, ii, im, ip, ix, iy, iz, j, jm, &
835 k, km, nma, nn, source
836 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: rcount
837 INTEGER,
DIMENSION(3) :: lb, ub
838 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: recvbuf, sendbuf, swaparray
839 TYPE(mp_request_type),
DIMENSION(2) :: req
841 associate(np => pw%pw_grid%para%group%num_pe, bo => pw%pw_grid%para%bo(1:2, 1:3, 0:pw%pw_grid%para%group%num_pe - 1, 1), &
842 pbo => pw%pw_grid%bounds, group => pw%pw_grid%para%group, mepos => pw%pw_grid%para%group%mepos, &
844 ALLOCATE (rcount(0:np - 1))
846 rcount(ip - 1) = product(bo(2, :, ip) - bo(1, :, ip) + 1)
848 nma = maxval(rcount(0:np - 1))
849 ALLOCATE (sendbuf(nma), recvbuf(nma))
850 sendbuf = 1.0e99_dp; recvbuf = 1.0e99_dp
856 CALL dcopy(nn, pw%array, 1, sendbuf, 1)
858 dest =
modulo(mepos + 1, np)
859 source =
modulo(mepos - 1, np)
863 IF (ip .NE. np - 1)
THEN
864 CALL group%isendrecv(sendbuf, dest, recvbuf, source, &
867 lb = pbo(1, :) + bo(1, :,
modulo(mepos - ip, np) + 1) - 1
868 ub = pbo(1, :) + bo(2, :,
modulo(mepos - ip, np) + 1) - 1
876 grid(ix, iy, iz) = sendbuf(ii)
880 IF (ip .NE. np - 1)
THEN
883 CALL move_alloc(sendbuf, swaparray)
884 CALL move_alloc(recvbuf, sendbuf)
885 CALL move_alloc(swaparray, recvbuf)
887 IF (rs%desc%border > 0)
THEN
891 DO k = rs%lb_local(3), rs%ub_local(3)
892 IF (k < rs%lb_real(3))
THEN
893 km = k + rs%desc%npts(3)
894 ELSE IF (k > rs%ub_real(3))
THEN
895 km = k - rs%desc%npts(3)
899 DO j = rs%lb_local(2), rs%ub_local(2)
900 IF (j < rs%lb_real(2))
THEN
901 jm = j + rs%desc%npts(2)
902 ELSE IF (j > rs%ub_real(2))
THEN
903 jm = j - rs%desc%npts(2)
907 DO i = rs%lb_local(1), rs%ub_local(1)
908 IF (i < rs%lb_real(1))
THEN
909 im = i + rs%desc%npts(1)
910 ELSE IF (i > rs%ub_real(1))
THEN
911 im = i - rs%desc%npts(1)
915 rs%r(i, j, k) = rs%r(im, jm, km)
927 END SUBROUTINE transfer_pw2rs_replicated
951 SUBROUTINE transfer_rs2pw_distributed(rs, pw)
953 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw
955 CHARACTER(LEN=200) :: error_string
956 INTEGER :: completed, dest_down, dest_up, i, idir, j, k, lb, my_id, my_pw_rank, my_rs_rank, &
957 n_shifts, nn, num_threads, position, source_down, source_up, ub, x, y, z
958 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: dshifts, recv_disps, recv_sizes, &
959 send_disps, send_sizes, ushifts
960 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: bounds, recv_tasks, send_tasks
961 INTEGER,
DIMENSION(2) :: neighbours, pos
962 INTEGER,
DIMENSION(3) :: coords, lb_recv, lb_recv_down, lb_recv_up, lb_send, lb_send_down, &
963 lb_send_up, ub_recv, ub_recv_down, ub_recv_up, ub_send, ub_send_down, ub_send_up
964 LOGICAL,
DIMENSION(3) :: halo_swapped
965 REAL(kind=dp) :: pw_sum, rs_sum
966 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: recv_buf_3d_down, recv_buf_3d_up, &
967 send_buf_3d_down, send_buf_3d_up
968 TYPE(cp_1d_r_p_type),
ALLOCATABLE,
DIMENSION(:) :: recv_bufs, send_bufs
969 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: recv_reqs, send_reqs
970 TYPE(mp_request_type),
DIMENSION(4) :: req
976 IF (debug_this_module)
THEN
977 rs_sum = accurate_sum(rs%r)*abs(det_3x3(rs%desc%dh))
978 CALL rs%desc%group%sum(rs_sum)
981 halo_swapped = .false.
988 IF (rs%desc%perd(idir) .NE. 1)
THEN
990 ALLOCATE (dshifts(0:rs%desc%neighbours(idir)))
991 ALLOCATE (ushifts(0:rs%desc%neighbours(idir)))
997 DO n_shifts = 1, min(rs%desc%neighbours(idir), rs%desc%group_dim(idir) - 1)
1001 position =
modulo(rs%desc%virtual_group_coor(idir) - n_shifts, rs%desc%group_dim(idir))
1002 neighbours = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), position)
1003 dshifts(n_shifts) = dshifts(n_shifts - 1) + (neighbours(2) - neighbours(1) + 1)
1005 position =
modulo(rs%desc%virtual_group_coor(idir) + n_shifts, rs%desc%group_dim(idir))
1006 neighbours = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), position)
1007 ushifts(n_shifts) = ushifts(n_shifts - 1) + (neighbours(2) - neighbours(1) + 1)
1013 CALL cart_shift(rs, idir, -1*n_shifts, source_down, dest_down)
1015 lb_send_down(:) = rs%lb_local(:)
1016 lb_recv_down(:) = rs%lb_local(:)
1017 ub_recv_down(:) = rs%ub_local(:)
1018 ub_send_down(:) = rs%ub_local(:)
1020 IF (dshifts(n_shifts - 1) .LE. rs%desc%border)
THEN
1021 ub_send_down(idir) = lb_send_down(idir) + rs%desc%border - 1 - dshifts(n_shifts - 1)
1022 lb_send_down(idir) = max(lb_send_down(idir), &
1023 lb_send_down(idir) + rs%desc%border - dshifts(n_shifts))
1025 ub_recv_down(idir) = ub_recv_down(idir) - rs%desc%border
1026 lb_recv_down(idir) = max(lb_recv_down(idir) + rs%desc%border, &
1027 ub_recv_down(idir) - rs%desc%border + 1 + ushifts(n_shifts - 1))
1029 lb_send_down(idir) = 0
1030 ub_send_down(idir) = -1
1031 lb_recv_down(idir) = 0
1032 ub_recv_down(idir) = -1
1036 IF (halo_swapped(i))
THEN
1037 lb_send_down(i) = rs%lb_real(i)
1038 ub_send_down(i) = rs%ub_real(i)
1039 lb_recv_down(i) = rs%lb_real(i)
1040 ub_recv_down(i) = rs%ub_real(i)
1045 ALLOCATE (recv_buf_3d_down(lb_recv_down(1):ub_recv_down(1), &
1046 lb_recv_down(2):ub_recv_down(2), lb_recv_down(3):ub_recv_down(3)))
1047 CALL rs%desc%group%irecv(recv_buf_3d_down, source_down, req(1))
1050 nn = product(ub_send_down - lb_send_down + 1)
1051 ALLOCATE (send_buf_3d_down(lb_send_down(1):ub_send_down(1), &
1052 lb_send_down(2):ub_send_down(2), lb_send_down(3):ub_send_down(3)))
1059 IF (my_id < num_threads)
THEN
1060 lb = lb_send_down(3) + ((ub_send_down(3) - lb_send_down(3) + 1)*my_id)/num_threads
1061 ub = lb_send_down(3) + ((ub_send_down(3) - lb_send_down(3) + 1)*(my_id + 1))/num_threads - 1
1063 send_buf_3d_down(lb_send_down(1):ub_send_down(1), lb_send_down(2):ub_send_down(2), &
1064 lb:ub) = rs%r(lb_send_down(1):ub_send_down(1), &
1065 lb_send_down(2):ub_send_down(2), lb:ub)
1069 CALL rs%desc%group%isend(send_buf_3d_down, dest_down, req(3))
1072 CALL cart_shift(rs, idir, n_shifts, source_up, dest_up)
1074 lb_send_up(:) = rs%lb_local(:)
1075 lb_recv_up(:) = rs%lb_local(:)
1076 ub_recv_up(:) = rs%ub_local(:)
1077 ub_send_up(:) = rs%ub_local(:)
1079 IF (ushifts(n_shifts - 1) .LE. rs%desc%border)
THEN
1081 lb_send_up(idir) = ub_send_up(idir) - rs%desc%border + 1 + ushifts(n_shifts - 1)
1082 ub_send_up(idir) = min(ub_send_up(idir), &
1083 ub_send_up(idir) - rs%desc%border + ushifts(n_shifts))
1085 lb_recv_up(idir) = lb_recv_up(idir) + rs%desc%border
1086 ub_recv_up(idir) = min(ub_recv_up(idir) - rs%desc%border, &
1087 lb_recv_up(idir) + rs%desc%border - 1 - dshifts(n_shifts - 1))
1089 lb_send_up(idir) = 0
1090 ub_send_up(idir) = -1
1091 lb_recv_up(idir) = 0
1092 ub_recv_up(idir) = -1
1096 IF (halo_swapped(i))
THEN
1097 lb_send_up(i) = rs%lb_real(i)
1098 ub_send_up(i) = rs%ub_real(i)
1099 lb_recv_up(i) = rs%lb_real(i)
1100 ub_recv_up(i) = rs%ub_real(i)
1105 ALLOCATE (recv_buf_3d_up(lb_recv_up(1):ub_recv_up(1), &
1106 lb_recv_up(2):ub_recv_up(2), lb_recv_up(3):ub_recv_up(3)))
1107 CALL rs%desc%group%irecv(recv_buf_3d_up, source_up, req(2))
1110 nn = product(ub_send_up - lb_send_up + 1)
1111 ALLOCATE (send_buf_3d_up(lb_send_up(1):ub_send_up(1), &
1112 lb_send_up(2):ub_send_up(2), lb_send_up(3):ub_send_up(3)))
1119 IF (my_id < num_threads)
THEN
1120 lb = lb_send_up(3) + ((ub_send_up(3) - lb_send_up(3) + 1)*my_id)/num_threads
1121 ub = lb_send_up(3) + ((ub_send_up(3) - lb_send_up(3) + 1)*(my_id + 1))/num_threads - 1
1123 send_buf_3d_up(lb_send_up(1):ub_send_up(1), lb_send_up(2):ub_send_up(2), &
1124 lb:ub) = rs%r(lb_send_up(1):ub_send_up(1), &
1125 lb_send_up(2):ub_send_up(2), lb:ub)
1129 CALL rs%desc%group%isend(send_buf_3d_up, dest_up, req(4))
1135 CALL mp_waitany(req(1:2), completed)
1137 IF (completed .EQ. 1)
THEN
1140 IF (ub_recv_down(idir) .GE. lb_recv_down(idir))
THEN
1147 IF (my_id < num_threads)
THEN
1148 lb = lb_recv_down(3) + ((ub_recv_down(3) - lb_recv_down(3) + 1)*my_id)/num_threads
1149 ub = lb_recv_down(3) + ((ub_recv_down(3) - lb_recv_down(3) + 1)*(my_id + 1))/num_threads - 1
1151 rs%r(lb_recv_down(1):ub_recv_down(1), &
1152 lb_recv_down(2):ub_recv_down(2), lb:ub) = &
1153 rs%r(lb_recv_down(1):ub_recv_down(1), &
1154 lb_recv_down(2):ub_recv_down(2), lb:ub) + &
1155 recv_buf_3d_down(:, :, lb:ub)
1159 DEALLOCATE (recv_buf_3d_down)
1163 IF (ub_recv_up(idir) .GE. lb_recv_up(idir))
THEN
1170 IF (my_id < num_threads)
THEN
1171 lb = lb_recv_up(3) + ((ub_recv_up(3) - lb_recv_up(3) + 1)*my_id)/num_threads
1172 ub = lb_recv_up(3) + ((ub_recv_up(3) - lb_recv_up(3) + 1)*(my_id + 1))/num_threads - 1
1174 rs%r(lb_recv_up(1):ub_recv_up(1), &
1175 lb_recv_up(2):ub_recv_up(2), lb:ub) = &
1176 rs%r(lb_recv_up(1):ub_recv_up(1), &
1177 lb_recv_up(2):ub_recv_up(2), lb:ub) + &
1178 recv_buf_3d_up(:, :, lb:ub)
1182 DEALLOCATE (recv_buf_3d_up)
1189 CALL mp_waitall(req(3:4))
1191 DEALLOCATE (send_buf_3d_down)
1192 DEALLOCATE (send_buf_3d_up)
1195 DEALLOCATE (dshifts)
1196 DEALLOCATE (ushifts)
1200 halo_swapped(idir) = .true.
1205 ALLOCATE (bounds(0:pw%pw_grid%para%group%num_pe - 1, 1:4))
1208 DO i = 0, pw%pw_grid%para%group%num_pe - 1
1209 bounds(i, 1:2) = pw%pw_grid%para%bo(1:2, 1, i, 1)
1210 bounds(i, 3:4) = pw%pw_grid%para%bo(1:2, 2, i, 1)
1211 bounds(i, 1:2) = bounds(i, 1:2) - pw%pw_grid%npts(1)/2 - 1
1212 bounds(i, 3:4) = bounds(i, 3:4) - pw%pw_grid%npts(2)/2 - 1
1215 ALLOCATE (send_tasks(0:pw%pw_grid%para%group%num_pe - 1, 1:6))
1216 ALLOCATE (send_sizes(0:pw%pw_grid%para%group%num_pe - 1))
1217 ALLOCATE (send_disps(0:pw%pw_grid%para%group%num_pe - 1))
1218 ALLOCATE (recv_tasks(0:pw%pw_grid%para%group%num_pe - 1, 1:6))
1219 ALLOCATE (recv_sizes(0:pw%pw_grid%para%group%num_pe - 1))
1220 ALLOCATE (recv_disps(0:pw%pw_grid%para%group%num_pe - 1))
1221 send_tasks(:, 1) = 1
1222 send_tasks(:, 2) = 0
1223 send_tasks(:, 3) = 1
1224 send_tasks(:, 4) = 0
1225 send_tasks(:, 5) = 1
1226 send_tasks(:, 6) = 0
1230 my_rs_rank = rs%desc%my_pos
1231 my_pw_rank = pw%pw_grid%para%group%mepos
1242 DO i = 0, rs%desc%group_size - 1
1244 coords(:) = rs%desc%rank2coord(:, rs%desc%real2virtual(i))
1248 pos(:) = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), coords(idir))
1249 pos(:) = pos(:) - rs%desc%npts(idir)/2 - 1
1250 lb_send(idir) = pos(1)
1251 ub_send(idir) = pos(2)
1254 IF (lb_send(1) .GT. bounds(my_rs_rank, 2)) cycle
1255 IF (ub_send(1) .LT. bounds(my_rs_rank, 1)) cycle
1256 IF (lb_send(2) .GT. bounds(my_rs_rank, 4)) cycle
1257 IF (ub_send(2) .LT. bounds(my_rs_rank, 3)) cycle
1259 recv_tasks(i, 1) = max(lb_send(1), bounds(my_rs_rank, 1))
1260 recv_tasks(i, 2) = min(ub_send(1), bounds(my_rs_rank, 2))
1261 recv_tasks(i, 3) = max(lb_send(2), bounds(my_rs_rank, 3))
1262 recv_tasks(i, 4) = min(ub_send(2), bounds(my_rs_rank, 4))
1263 recv_tasks(i, 5) = lb_send(3)
1264 recv_tasks(i, 6) = ub_send(3)
1265 recv_sizes(i) = (recv_tasks(i, 2) - recv_tasks(i, 1) + 1)* &
1266 (recv_tasks(i, 4) - recv_tasks(i, 3) + 1)*(recv_tasks(i, 6) - recv_tasks(i, 5) + 1)
1271 coords(:) = rs%desc%rank2coord(:, rs%desc%real2virtual(my_rs_rank))
1273 pos(:) = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), coords(idir))
1274 pos(:) = pos(:) - rs%desc%npts(idir)/2 - 1
1275 lb_send(idir) = pos(1)
1276 ub_send(idir) = pos(2)
1279 lb_recv(:) = lb_send(:)
1280 ub_recv(:) = ub_send(:)
1283 DO j = 0, pw%pw_grid%para%group%num_pe - 1
1285 IF (lb_send(1) .GT. bounds(j, 2)) cycle
1286 IF (ub_send(1) .LT. bounds(j, 1)) cycle
1287 IF (lb_send(2) .GT. bounds(j, 4)) cycle
1288 IF (ub_send(2) .LT. bounds(j, 3)) cycle
1290 send_tasks(j, 1) = max(lb_send(1), bounds(j, 1))
1291 send_tasks(j, 2) = min(ub_send(1), bounds(j, 2))
1292 send_tasks(j, 3) = max(lb_send(2), bounds(j, 3))
1293 send_tasks(j, 4) = min(ub_send(2), bounds(j, 4))
1294 send_tasks(j, 5) = lb_send(3)
1295 send_tasks(j, 6) = ub_send(3)
1296 send_sizes(j) = (send_tasks(j, 2) - send_tasks(j, 1) + 1)* &
1297 (send_tasks(j, 4) - send_tasks(j, 3) + 1)*(send_tasks(j, 6) - send_tasks(j, 5) + 1)
1304 DO i = 1, pw%pw_grid%para%group%num_pe - 1
1305 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
1306 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
1309 cpassert(sum(send_sizes) == product(ub_recv - lb_recv + 1))
1311 ALLOCATE (send_bufs(0:rs%desc%group_size - 1))
1312 ALLOCATE (recv_bufs(0:rs%desc%group_size - 1))
1314 DO i = 0, rs%desc%group_size - 1
1315 IF (send_sizes(i) .NE. 0)
THEN
1316 ALLOCATE (send_bufs(i)%array(send_sizes(i)))
1318 NULLIFY (send_bufs(i)%array)
1320 IF (recv_sizes(i) .NE. 0)
THEN
1321 ALLOCATE (recv_bufs(i)%array(recv_sizes(i)))
1323 NULLIFY (recv_bufs(i)%array)
1327 ALLOCATE (recv_reqs(0:rs%desc%group_size - 1))
1328 recv_reqs = mp_request_null
1330 DO i = 0, rs%desc%group_size - 1
1331 IF (recv_sizes(i) .NE. 0)
THEN
1332 CALL rs%desc%group%irecv(recv_bufs(i)%array, i, recv_reqs(i))
1340 DO i = 0, rs%desc%group_size - 1
1342 DO z = send_tasks(i, 5), send_tasks(i, 6)
1343 DO y = send_tasks(i, 3), send_tasks(i, 4)
1344 DO x = send_tasks(i, 1), send_tasks(i, 2)
1346 send_bufs(i)%array(k) = rs%r(x, y, z)
1353 ALLOCATE (send_reqs(0:rs%desc%group_size - 1))
1354 send_reqs = mp_request_null
1356 DO i = 0, rs%desc%group_size - 1
1357 IF (send_sizes(i) .NE. 0)
THEN
1358 CALL rs%desc%group%isend(send_bufs(i)%array, i, send_reqs(i))
1364 DO i = 0, rs%desc%group_size - 1
1365 IF (recv_sizes(i) .EQ. 0) cycle
1367 CALL mp_waitany(recv_reqs, completed)
1369 DO z = recv_tasks(completed - 1, 5), recv_tasks(completed - 1, 6)
1370 DO y = recv_tasks(completed - 1, 3), recv_tasks(completed - 1, 4)
1371 DO x = recv_tasks(completed - 1, 1), recv_tasks(completed - 1, 2)
1373 pw%array(x, y, z) = recv_bufs(completed - 1)%array(k)
1379 CALL mp_waitall(send_reqs)
1381 DEALLOCATE (recv_reqs)
1382 DEALLOCATE (send_reqs)
1384 DO i = 0, rs%desc%group_size - 1
1385 IF (
ASSOCIATED(send_bufs(i)%array))
THEN
1386 DEALLOCATE (send_bufs(i)%array)
1388 IF (
ASSOCIATED(recv_bufs(i)%array))
THEN
1389 DEALLOCATE (recv_bufs(i)%array)
1393 DEALLOCATE (send_bufs)
1394 DEALLOCATE (recv_bufs)
1395 DEALLOCATE (send_tasks)
1396 DEALLOCATE (send_sizes)
1397 DEALLOCATE (send_disps)
1398 DEALLOCATE (recv_tasks)
1399 DEALLOCATE (recv_sizes)
1400 DEALLOCATE (recv_disps)
1402 IF (debug_this_module)
THEN
1404 pw_sum = pw_integrate_function(pw)
1405 IF (abs(pw_sum - rs_sum)/max(1.0_dp, abs(pw_sum), abs(rs_sum)) > epsilon(rs_sum)*1000)
THEN
1406 WRITE (error_string,
'(A,6(1X,I4.4),3F25.16)')
"rs_pw_transfer_distributed", &
1407 rs%desc%npts, rs%desc%group_dim, pw_sum, rs_sum, abs(pw_sum - rs_sum)
1408 CALL cp_abort(__location__, &
1409 error_string//
" Please report this bug ... quick workaround: use "// &
1410 "DISTRIBUTION_TYPE REPLICATED")
1414 END SUBROUTINE transfer_rs2pw_distributed
1438 SUBROUTINE transfer_pw2rs_distributed(rs, pw)
1440 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw
1442 INTEGER :: completed, dest_down, dest_up, i, idir, j, k, lb, my_id, my_pw_rank, my_rs_rank, &
1443 n_shifts, nn, num_threads, position, source_down, source_up, ub, x, y, z
1444 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: dshifts, recv_disps, recv_sizes, &
1445 send_disps, send_sizes, ushifts
1446 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: bounds, recv_tasks, send_tasks
1447 INTEGER,
DIMENSION(2) :: neighbours, pos
1448 INTEGER,
DIMENSION(3) :: coords, lb_recv, lb_recv_down, lb_recv_up, lb_send, lb_send_down, &
1449 lb_send_up, ub_recv, ub_recv_down, ub_recv_up, ub_send, ub_send_down, ub_send_up
1450 LOGICAL,
DIMENSION(3) :: halo_swapped
1451 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: recv_buf_3d_down, recv_buf_3d_up, &
1452 send_buf_3d_down, send_buf_3d_up
1453 TYPE(cp_1d_r_p_type),
ALLOCATABLE,
DIMENSION(:) :: recv_bufs, send_bufs
1454 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: recv_reqs, send_reqs
1455 TYPE(mp_request_type),
DIMENSION(4) :: req
1464 ALLOCATE (bounds(0:pw%pw_grid%para%group%num_pe - 1, 1:4))
1466 DO i = 0, pw%pw_grid%para%group%num_pe - 1
1467 bounds(i, 1:2) = pw%pw_grid%para%bo(1:2, 1, i, 1)
1468 bounds(i, 3:4) = pw%pw_grid%para%bo(1:2, 2, i, 1)
1469 bounds(i, 1:2) = bounds(i, 1:2) - pw%pw_grid%npts(1)/2 - 1
1470 bounds(i, 3:4) = bounds(i, 3:4) - pw%pw_grid%npts(2)/2 - 1
1473 ALLOCATE (send_tasks(0:pw%pw_grid%para%group%num_pe - 1, 1:6))
1474 ALLOCATE (send_sizes(0:pw%pw_grid%para%group%num_pe - 1))
1475 ALLOCATE (send_disps(0:pw%pw_grid%para%group%num_pe - 1))
1476 ALLOCATE (recv_tasks(0:pw%pw_grid%para%group%num_pe - 1, 1:6))
1477 ALLOCATE (recv_sizes(0:pw%pw_grid%para%group%num_pe - 1))
1478 ALLOCATE (recv_disps(0:pw%pw_grid%para%group%num_pe - 1))
1481 send_tasks(:, 1) = 1
1482 send_tasks(:, 2) = 0
1483 send_tasks(:, 3) = 1
1484 send_tasks(:, 4) = 0
1485 send_tasks(:, 5) = 1
1486 send_tasks(:, 6) = 0
1490 recv_tasks(:, 1) = 1
1491 recv_tasks(:, 2) = 0
1492 send_tasks(:, 3) = 1
1493 send_tasks(:, 4) = 0
1494 send_tasks(:, 5) = 1
1495 send_tasks(:, 6) = 0
1498 my_rs_rank = rs%desc%my_pos
1499 my_pw_rank = pw%pw_grid%para%group%mepos
1512 DO i = 0, pw%pw_grid%para%group%num_pe - 1
1514 coords(:) = rs%desc%rank2coord(:, rs%desc%real2virtual(i))
1518 pos(:) = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), coords(idir))
1519 pos(:) = pos(:) - rs%desc%npts(idir)/2 - 1
1520 lb_send(idir) = pos(1)
1521 ub_send(idir) = pos(2)
1524 IF (ub_send(1) .LT. bounds(my_rs_rank, 1)) cycle
1525 IF (lb_send(1) .GT. bounds(my_rs_rank, 2)) cycle
1526 IF (ub_send(2) .LT. bounds(my_rs_rank, 3)) cycle
1527 IF (lb_send(2) .GT. bounds(my_rs_rank, 4)) cycle
1529 send_tasks(i, 1) = max(lb_send(1), bounds(my_rs_rank, 1))
1530 send_tasks(i, 2) = min(ub_send(1), bounds(my_rs_rank, 2))
1531 send_tasks(i, 3) = max(lb_send(2), bounds(my_rs_rank, 3))
1532 send_tasks(i, 4) = min(ub_send(2), bounds(my_rs_rank, 4))
1533 send_tasks(i, 5) = lb_send(3)
1534 send_tasks(i, 6) = ub_send(3)
1535 send_sizes(i) = (send_tasks(i, 2) - send_tasks(i, 1) + 1)* &
1536 (send_tasks(i, 4) - send_tasks(i, 3) + 1)*(send_tasks(i, 6) - send_tasks(i, 5) + 1)
1541 coords(:) = rs%desc%rank2coord(:, rs%desc%real2virtual(my_rs_rank))
1543 pos(:) = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), coords(idir))
1544 pos(:) = pos(:) - rs%desc%npts(idir)/2 - 1
1545 lb_send(idir) = pos(1)
1546 ub_send(idir) = pos(2)
1549 lb_recv(:) = lb_send(:)
1550 ub_recv(:) = ub_send(:)
1554 DO j = 0, pw%pw_grid%para%group%num_pe - 1
1556 IF (ub_send(1) .LT. bounds(j, 1)) cycle
1557 IF (lb_send(1) .GT. bounds(j, 2)) cycle
1558 IF (ub_send(2) .LT. bounds(j, 3)) cycle
1559 IF (lb_send(2) .GT. bounds(j, 4)) cycle
1561 recv_tasks(j, 1) = max(lb_send(1), bounds(j, 1))
1562 recv_tasks(j, 2) = min(ub_send(1), bounds(j, 2))
1563 recv_tasks(j, 3) = max(lb_send(2), bounds(j, 3))
1564 recv_tasks(j, 4) = min(ub_send(2), bounds(j, 4))
1565 recv_tasks(j, 5) = lb_send(3)
1566 recv_tasks(j, 6) = ub_send(3)
1567 recv_sizes(j) = (recv_tasks(j, 2) - recv_tasks(j, 1) + 1)* &
1568 (recv_tasks(j, 4) - recv_tasks(j, 3) + 1)*(recv_tasks(j, 6) - recv_tasks(j, 5) + 1)
1575 DO i = 1, pw%pw_grid%para%group%num_pe - 1
1576 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
1577 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
1580 cpassert(sum(recv_sizes) == product(ub_recv - lb_recv + 1))
1582 ALLOCATE (send_bufs(0:rs%desc%group_size - 1))
1583 ALLOCATE (recv_bufs(0:rs%desc%group_size - 1))
1585 DO i = 0, rs%desc%group_size - 1
1586 IF (send_sizes(i) .NE. 0)
THEN
1587 ALLOCATE (send_bufs(i)%array(send_sizes(i)))
1589 NULLIFY (send_bufs(i)%array)
1591 IF (recv_sizes(i) .NE. 0)
THEN
1592 ALLOCATE (recv_bufs(i)%array(recv_sizes(i)))
1594 NULLIFY (recv_bufs(i)%array)
1598 ALLOCATE (recv_reqs(0:rs%desc%group_size - 1))
1599 recv_reqs = mp_request_null
1601 DO i = 0, rs%desc%group_size - 1
1602 IF (recv_sizes(i) .NE. 0)
THEN
1603 CALL rs%desc%group%irecv(recv_bufs(i)%array, i, recv_reqs(i))
1611 DO i = 0, rs%desc%group_size - 1
1613 DO z = send_tasks(i, 5), send_tasks(i, 6)
1614 DO y = send_tasks(i, 3), send_tasks(i, 4)
1615 DO x = send_tasks(i, 1), send_tasks(i, 2)
1617 send_bufs(i)%array(k) = pw%array(x, y, z)
1624 ALLOCATE (send_reqs(0:rs%desc%group_size - 1))
1625 send_reqs = mp_request_null
1627 DO i = 0, rs%desc%group_size - 1
1628 IF (send_sizes(i) .NE. 0)
THEN
1629 CALL rs%desc%group%isend(send_bufs(i)%array, i, send_reqs(i))
1636 DO i = 0, rs%desc%group_size - 1
1637 IF (recv_sizes(i) .EQ. 0) cycle
1639 CALL mp_waitany(recv_reqs, completed)
1641 DO z = recv_tasks(completed - 1, 5), recv_tasks(completed - 1, 6)
1642 DO y = recv_tasks(completed - 1, 3), recv_tasks(completed - 1, 4)
1643 DO x = recv_tasks(completed - 1, 1), recv_tasks(completed - 1, 2)
1645 rs%r(x, y, z) = recv_bufs(completed - 1)%array(k)
1651 CALL mp_waitall(send_reqs)
1653 DEALLOCATE (recv_reqs)
1654 DEALLOCATE (send_reqs)
1656 DO i = 0, rs%desc%group_size - 1
1657 IF (
ASSOCIATED(send_bufs(i)%array))
THEN
1658 DEALLOCATE (send_bufs(i)%array)
1660 IF (
ASSOCIATED(recv_bufs(i)%array))
THEN
1661 DEALLOCATE (recv_bufs(i)%array)
1665 DEALLOCATE (send_bufs)
1666 DEALLOCATE (recv_bufs)
1667 DEALLOCATE (send_tasks)
1668 DEALLOCATE (send_sizes)
1669 DEALLOCATE (send_disps)
1670 DEALLOCATE (recv_tasks)
1671 DEALLOCATE (recv_sizes)
1672 DEALLOCATE (recv_disps)
1675 halo_swapped = .false.
1679 IF (rs%desc%perd(idir) /= 1)
THEN
1681 ALLOCATE (dshifts(0:rs%desc%neighbours(idir)))
1682 ALLOCATE (ushifts(0:rs%desc%neighbours(idir)))
1686 DO n_shifts = 1, rs%desc%neighbours(idir)
1691 position =
modulo(rs%desc%virtual_group_coor(idir) - n_shifts, rs%desc%group_dim(idir))
1692 neighbours = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), position)
1693 dshifts(n_shifts) = dshifts(n_shifts - 1) + (neighbours(2) - neighbours(1) + 1)
1695 position =
modulo(rs%desc%virtual_group_coor(idir) + n_shifts, rs%desc%group_dim(idir))
1696 neighbours = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), position)
1697 ushifts(n_shifts) = ushifts(n_shifts - 1) + (neighbours(2) - neighbours(1) + 1)
1703 CALL cart_shift(rs, idir, -1*n_shifts, source_down, dest_down)
1705 lb_send_down(:) = rs%lb_local(:)
1706 ub_send_down(:) = rs%ub_local(:)
1707 lb_recv_down(:) = rs%lb_local(:)
1708 ub_recv_down(:) = rs%ub_local(:)
1710 IF (dshifts(n_shifts - 1) .LE. rs%desc%border)
THEN
1711 lb_send_down(idir) = lb_send_down(idir) + rs%desc%border
1712 ub_send_down(idir) = min(ub_send_down(idir) - rs%desc%border, &
1713 lb_send_down(idir) + rs%desc%border - 1 - dshifts(n_shifts - 1))
1715 lb_recv_down(idir) = ub_recv_down(idir) - rs%desc%border + 1 + ushifts(n_shifts - 1)
1716 ub_recv_down(idir) = min(ub_recv_down(idir), &
1717 ub_recv_down(idir) - rs%desc%border + ushifts(n_shifts))
1719 lb_send_down(idir) = 0
1720 ub_send_down(idir) = -1
1721 lb_recv_down(idir) = 0
1722 ub_recv_down(idir) = -1
1726 IF (.NOT. (halo_swapped(i) .OR. i .EQ. idir))
THEN
1727 lb_send_down(i) = rs%lb_real(i)
1728 ub_send_down(i) = rs%ub_real(i)
1729 lb_recv_down(i) = rs%lb_real(i)
1730 ub_recv_down(i) = rs%ub_real(i)
1735 nn = product(ub_recv_down - lb_recv_down + 1)
1736 ALLOCATE (recv_buf_3d_down(lb_recv_down(1):ub_recv_down(1), &
1737 lb_recv_down(2):ub_recv_down(2), lb_recv_down(3):ub_recv_down(3)))
1740 CALL rs%desc%group%irecv(recv_buf_3d_down, source_down, req(1))
1743 nn = product(ub_send_down - lb_send_down + 1)
1744 ALLOCATE (send_buf_3d_down(lb_send_down(1):ub_send_down(1), &
1745 lb_send_down(2):ub_send_down(2), lb_send_down(3):ub_send_down(3)))
1752 IF (my_id < num_threads)
THEN
1753 lb = lb_send_down(3) + ((ub_send_down(3) - lb_send_down(3) + 1)*my_id)/num_threads
1754 ub = lb_send_down(3) + ((ub_send_down(3) - lb_send_down(3) + 1)*(my_id + 1))/num_threads - 1
1756 send_buf_3d_down(lb_send_down(1):ub_send_down(1), lb_send_down(2):ub_send_down(2), &
1757 lb:ub) = rs%r(lb_send_down(1):ub_send_down(1), &
1758 lb_send_down(2):ub_send_down(2), lb:ub)
1762 CALL rs%desc%group%isend(send_buf_3d_down, dest_down, req(3))
1766 CALL cart_shift(rs, idir, n_shifts, source_up, dest_up)
1768 lb_send_up(:) = rs%lb_local(:)
1769 ub_send_up(:) = rs%ub_local(:)
1770 lb_recv_up(:) = rs%lb_local(:)
1771 ub_recv_up(:) = rs%ub_local(:)
1773 IF (ushifts(n_shifts - 1) .LE. rs%desc%border)
THEN
1774 ub_send_up(idir) = ub_send_up(idir) - rs%desc%border
1775 lb_send_up(idir) = max(lb_send_up(idir) + rs%desc%border, &
1776 ub_send_up(idir) - rs%desc%border + 1 + ushifts(n_shifts - 1))
1778 ub_recv_up(idir) = lb_recv_up(idir) + rs%desc%border - 1 - dshifts(n_shifts - 1)
1779 lb_recv_up(idir) = max(lb_recv_up(idir), &
1780 lb_recv_up(idir) + rs%desc%border - dshifts(n_shifts))
1782 lb_send_up(idir) = 0
1783 ub_send_up(idir) = -1
1784 lb_recv_up(idir) = 0
1785 ub_recv_up(idir) = -1
1789 IF (.NOT. (halo_swapped(i) .OR. i .EQ. idir))
THEN
1790 lb_send_up(i) = rs%lb_real(i)
1791 ub_send_up(i) = rs%ub_real(i)
1792 lb_recv_up(i) = rs%lb_real(i)
1793 ub_recv_up(i) = rs%ub_real(i)
1798 nn = product(ub_recv_up - lb_recv_up + 1)
1799 ALLOCATE (recv_buf_3d_up(lb_recv_up(1):ub_recv_up(1), &
1800 lb_recv_up(2):ub_recv_up(2), lb_recv_up(3):ub_recv_up(3)))
1804 CALL rs%desc%group%irecv(recv_buf_3d_up, source_up, req(2))
1807 nn = product(ub_send_up - lb_send_up + 1)
1808 ALLOCATE (send_buf_3d_up(lb_send_up(1):ub_send_up(1), &
1809 lb_send_up(2):ub_send_up(2), lb_send_up(3):ub_send_up(3)))
1816 IF (my_id < num_threads)
THEN
1817 lb = lb_send_up(3) + ((ub_send_up(3) - lb_send_up(3) + 1)*my_id)/num_threads
1818 ub = lb_send_up(3) + ((ub_send_up(3) - lb_send_up(3) + 1)*(my_id + 1))/num_threads - 1
1820 send_buf_3d_up(lb_send_up(1):ub_send_up(1), lb_send_up(2):ub_send_up(2), &
1821 lb:ub) = rs%r(lb_send_up(1):ub_send_up(1), &
1822 lb_send_up(2):ub_send_up(2), lb:ub)
1826 CALL rs%desc%group%isend(send_buf_3d_up, dest_up, req(4))
1832 CALL mp_waitany(req(1:2), completed)
1834 IF (completed .EQ. 1)
THEN
1837 IF (ub_recv_down(idir) .GE. lb_recv_down(idir))
THEN
1845 IF (my_id < num_threads)
THEN
1846 lb = lb_recv_down(3) + ((ub_recv_down(3) - lb_recv_down(3) + 1)*my_id)/num_threads
1847 ub = lb_recv_down(3) + ((ub_recv_down(3) - lb_recv_down(3) + 1)*(my_id + 1))/num_threads - 1
1849 rs%r(lb_recv_down(1):ub_recv_down(1), lb_recv_down(2):ub_recv_down(2), &
1850 lb:ub) = recv_buf_3d_down(:, :, lb:ub)
1855 DEALLOCATE (recv_buf_3d_down)
1859 IF (ub_recv_up(idir) .GE. lb_recv_up(idir))
THEN
1867 IF (my_id < num_threads)
THEN
1868 lb = lb_recv_up(3) + ((ub_recv_up(3) - lb_recv_up(3) + 1)*my_id)/num_threads
1869 ub = lb_recv_up(3) + ((ub_recv_up(3) - lb_recv_up(3) + 1)*(my_id + 1))/num_threads - 1
1871 rs%r(lb_recv_up(1):ub_recv_up(1), lb_recv_up(2):ub_recv_up(2), &
1872 lb:ub) = recv_buf_3d_up(:, :, lb:ub)
1877 DEALLOCATE (recv_buf_3d_up)
1881 CALL mp_waitall(req(3:4))
1883 DEALLOCATE (send_buf_3d_down)
1884 DEALLOCATE (send_buf_3d_up)
1887 DEALLOCATE (ushifts)
1888 DEALLOCATE (dshifts)
1891 halo_swapped(idir) = .true.
1895 END SUBROUTINE transfer_pw2rs_distributed
1908 CHARACTER(len=*),
PARAMETER :: routinen =
'rs_grid_zero'
1910 INTEGER :: handle, i, j, k, l(3), u(3)
1912 CALL timeset(routinen, handle)
1913 l(1) = lbound(rs%r, 1); l(2) = lbound(rs%r, 2); l(3) = lbound(rs%r, 3)
1914 u(1) = ubound(rs%r, 1); u(2) = ubound(rs%r, 2); u(3) = ubound(rs%r, 3)
1921 rs%r(i, j, k) = 0.0_dp
1926 CALL timestop(handle)
1943 REAL(dp),
INTENT(IN) :: scalar
1945 CHARACTER(len=*),
PARAMETER :: routinen =
'rs_grid_mult_and_add'
1947 INTEGER :: handle, i, j, k, l(3), u(3)
1951 CALL timeset(routinen, handle)
1952 IF (scalar .NE. 0.0_dp)
THEN
1953 l(1) = lbound(rs1%r, 1); l(2) = lbound(rs1%r, 2); l(3) = lbound(rs1%r, 3)
1954 u(1) = ubound(rs1%r, 1); u(2) = ubound(rs1%r, 2); u(3) = ubound(rs1%r, 3)
1961 rs1%r(i, j, k) = rs1%r(i, j, k) + scalar*rs2%r(i, j, k)*rs3%r(i, j, k)
1967 CALL timestop(handle)
1981 TYPE(pw_grid_type),
INTENT(IN),
TARGET :: pw_grid
1984 cpassert(
ASSOCIATED(rs%desc%pw, pw_grid))
1985 rs%desc%dh = pw_grid%dh
1986 rs%desc%dh_inv = pw_grid%dh_inv
2000 cpassert(rs_desc%ref_count > 0)
2001 rs_desc%ref_count = rs_desc%ref_count + 1
2019 IF (
ALLOCATED(rs_grid%px))
DEALLOCATE (rs_grid%px)
2020 IF (
ALLOCATED(rs_grid%py))
DEALLOCATE (rs_grid%py)
2021 IF (
ALLOCATED(rs_grid%pz))
DEALLOCATE (rs_grid%pz)
2034 IF (
ASSOCIATED(rs_desc))
THEN
2035 cpassert(rs_desc%ref_count > 0)
2036 rs_desc%ref_count = rs_desc%ref_count - 1
2037 IF (rs_desc%ref_count == 0)
THEN
2039 CALL pw_grid_release(rs_desc%pw)
2041 IF (rs_desc%parallel)
THEN
2043 CALL rs_desc%group%free()
2045 DEALLOCATE (rs_desc%virtual2real)
2046 DEALLOCATE (rs_desc%real2virtual)
2049 IF (rs_desc%distributed)
THEN
2050 DEALLOCATE (rs_desc%rank2coord)
2051 DEALLOCATE (rs_desc%coord2rank)
2052 DEALLOCATE (rs_desc%lb_global)
2053 DEALLOCATE (rs_desc%ub_global)
2054 DEALLOCATE (rs_desc%x2coord)
2055 DEALLOCATE (rs_desc%y2coord)
2056 DEALLOCATE (rs_desc%z2coord)
2059 DEALLOCATE (rs_desc)
2077 PURE SUBROUTINE cart_shift(rs_grid, dir, disp, source, dest)
2080 INTEGER,
INTENT(IN) :: dir, disp
2081 INTEGER,
INTENT(OUT) :: source, dest
2083 INTEGER,
DIMENSION(3) :: shift_coords
2085 shift_coords = rs_grid%desc%virtual_group_coor
2086 shift_coords(dir) =
modulo(shift_coords(dir) + disp, rs_grid%desc%group_dim(dir))
2087 dest = rs_grid%desc%virtual2real(rs_grid%desc%coord2rank(shift_coords(1), shift_coords(2), shift_coords(3)))
2088 shift_coords = rs_grid%desc%virtual_group_coor
2089 shift_coords(dir) =
modulo(shift_coords(dir) - disp, rs_grid%desc%group_dim(dir))
2090 source = rs_grid%desc%virtual2real(rs_grid%desc%coord2rank(shift_coords(1), shift_coords(2), shift_coords(3)))
2104 INTEGER :: max_ngpts
2106 CHARACTER(len=*),
PARAMETER :: routinen =
'rs_grid_max_ngpts'
2108 INTEGER :: handle, i
2109 INTEGER,
DIMENSION(3) :: lb, ub
2111 CALL timeset(routinen, handle)
2114 IF ((desc%pw%para%mode == pw_mode_local) .OR. &
2115 (all(desc%group_dim == 1)))
THEN
2116 cpassert(product(int(desc%npts, kind=int_8)) < huge(1))
2117 max_ngpts = product(desc%npts)
2119 DO i = 0, desc%group_size - 1
2120 lb = desc%lb_global(:, i)
2121 ub = desc%ub_global(:, i)
2122 lb = lb - desc%border*(1 - desc%perd)
2123 ub = ub + desc%border*(1 - desc%perd)
2124 cpassert(product(int(ub - lb + 1, kind=int_8)) < huge(1))
2125 max_ngpts = max(max_ngpts, product(ub - lb + 1))
2129 CALL timestop(handle)
2145 REAL(kind=dp),
DIMENSION(3, 3),
INTENT(IN) :: h_inv
2146 REAL(kind=dp),
DIMENSION(3),
INTENT(IN) :: ra
2147 INTEGER,
INTENT(IN),
OPTIONAL :: offset, group_size, my_pos
2149 INTEGER :: dir, lb(3), location(3), tp(3), ub(3)
2153 IF (.NOT. all(rs_grid%desc%perd == 1))
THEN
2156 tp(dir) = floor(dot_product(h_inv(dir, :), ra)*rs_grid%desc%npts(dir))
2157 tp(dir) =
modulo(tp(dir), rs_grid%desc%npts(dir))
2158 IF (rs_grid%desc%perd(dir) .NE. 1)
THEN
2159 lb(dir) = rs_grid%lb_local(dir) + rs_grid%desc%border
2160 ub(dir) = rs_grid%ub_local(dir) - rs_grid%desc%border
2162 lb(dir) = rs_grid%lb_local(dir)
2163 ub(dir) = rs_grid%ub_local(dir)
2166 location(dir) = tp(dir) + rs_grid%desc%lb(dir)
2168 IF (all(lb(:) <= location(:)) .AND. all(location(:) <= ub(:)))
THEN
2172 IF (
PRESENT(offset) .AND.
PRESENT(group_size) .AND.
PRESENT(my_pos))
THEN
2174 IF (
modulo(offset, group_size) == my_pos) res = .true.
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
various routines to log and control the output. The idea is that decisions about where to log should ...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Collection of simple mathematical functions and subroutines.
Interface to the message passing library MPI.
type(mp_comm_type), parameter, public mp_comm_null
subroutine, public mp_waitany(requests, completed)
waits for completion of any of the given requests
type(mp_request_type), parameter, public mp_request_null
Fortran API for the offload package, which is written in C.
subroutine, public offload_free_buffer(buffer)
Deallocates given buffer.
subroutine, public offload_create_buffer(length, buffer)
Allocates a buffer of given length, ie. number of elements.
integer, parameter, public pw_mode_local
This module defines the grid data type and some basic operations on it.
subroutine, public pw_grid_release(pw_grid)
releases the given pw grid
subroutine, public pw_grid_retain(pw_grid)
retains the given pw grid
subroutine, public rs_grid_print(rs, iounit)
Print information on grids to output.
integer, parameter, public rsgrid_replicated
subroutine, public rs_grid_create(rs, desc)
...
subroutine, public rs_grid_create_descriptor(desc, pw_grid, input_settings, border_points)
Determine the setup of real space grids - this is divided up into the creation of a descriptor and th...
pure integer function, public rs_grid_locate_rank(rs_desc, rank_in, shift)
returns the 1D rank of the task which is a cartesian shift away from 1D rank rank_in only possible if...
subroutine, public transfer_pw2rs(rs, pw)
...
integer, parameter, public rsgrid_automatic
pure subroutine, public rs_grid_reorder_ranks(desc, real2virtual)
Defines a new ordering of ranks on this realspace grid, recalculating the data bounds and reallocatin...
subroutine, public rs_grid_mult_and_add(rs1, rs2, rs3, scalar)
rs1(i) = rs1(i) + rs2(i)*rs3(i)
subroutine, public rs_grid_set_box(pw_grid, rs)
Set box matrix info for real space grid This is needed for variable cell simulations.
subroutine, public rs_grid_retain_descriptor(rs_desc)
retains the given rs grid descriptor (see doc/ReferenceCounting.html)
subroutine, public rs_grid_release_descriptor(rs_desc)
releases the given rs grid descriptor (see doc/ReferenceCounting.html)
integer function, public rs_grid_max_ngpts(desc)
returns the maximum number of points in the local grid of any process to account for the case where t...
integer, parameter, public rsgrid_distributed
pure logical function, public map_gaussian_here(rs_grid, h_inv, ra, offset, group_size, my_pos)
...
subroutine, public transfer_rs2pw(rs, pw)
...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
subroutine, public rs_grid_zero(rs)
Initialize grid to zero.
All kind of helpful little routines.
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
void offload_free_buffer(offload_buffer *buffer)
Deallocate given buffer.
represent a pointer to a 1d array