45 #include "../base/base_uses.f90"
50 PUBLIC :: realspace_grid_type, &
51 realspace_grid_desc_type, &
52 realspace_grid_p_type, &
53 realspace_grid_desc_p_type, &
54 realspace_grid_input_type
76 LOGICAL,
PRIVATE,
PARAMETER :: debug_this_module = .false.
77 CHARACTER(len=*),
PARAMETER,
PRIVATE :: modulen =
'realspace_grid_types'
80 TYPE realspace_grid_input_type
82 INTEGER :: distribution_layout(3) = -1
83 REAL(kind=
dp) :: memory_factor = 0.0_dp
84 LOGICAL :: lock_distribution = .false.
86 REAL(kind=
dp) :: halo_reduction_factor = 1.0_dp
87 END TYPE realspace_grid_input_type
90 TYPE realspace_grid_desc_type
91 TYPE(pw_grid_type),
POINTER :: pw => null()
93 INTEGER :: ref_count = 0
95 INTEGER(int_8) :: ngpts = 0_int_8
96 INTEGER,
DIMENSION(3) :: npts = 0
97 INTEGER,
DIMENSION(3) :: lb = 0
98 INTEGER,
DIMENSION(3) :: ub = 0
100 INTEGER :: border = 0
102 INTEGER,
DIMENSION(3) :: perd = -1
103 REAL(kind=
dp),
DIMENSION(3, 3) :: dh = 0.0_dp
104 REAL(kind=
dp),
DIMENSION(3, 3) :: dh_inv = 0.0_dp
105 LOGICAL :: orthorhombic = .true.
107 LOGICAL :: parallel = .true.
108 LOGICAL :: distributed = .true.
112 INTEGER :: my_pos = -1
113 LOGICAL :: group_head = .false.
114 INTEGER :: group_size = 0
115 INTEGER,
DIMENSION(3) :: group_dim = -1
116 INTEGER,
DIMENSION(3) :: group_coor = -1
117 INTEGER,
DIMENSION(3) :: neighbours = -1
120 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: lb_global
121 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: ub_global
123 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: rank2coord
124 INTEGER,
DIMENSION(:, :, :),
ALLOCATABLE :: coord2rank
126 INTEGER,
DIMENSION(:),
ALLOCATABLE :: x2coord
127 INTEGER,
DIMENSION(:),
ALLOCATABLE :: y2coord
128 INTEGER,
DIMENSION(:),
ALLOCATABLE :: z2coord
130 INTEGER :: my_virtual_pos = -1
131 INTEGER,
DIMENSION(3) :: virtual_group_coor = -1
133 INTEGER,
DIMENSION(:),
ALLOCATABLE :: virtual2real, real2virtual
135 END TYPE realspace_grid_desc_type
137 TYPE realspace_grid_type
139 TYPE(realspace_grid_desc_type),
POINTER :: desc => null()
141 INTEGER :: ngpts_local = -1
142 INTEGER,
DIMENSION(3) :: npts_local = -1
143 INTEGER,
DIMENSION(3) :: lb_local = -1
144 INTEGER,
DIMENSION(3) :: ub_local = -1
145 INTEGER,
DIMENSION(3) :: lb_real = -1
146 INTEGER,
DIMENSION(3) :: ub_real = -1
148 INTEGER,
DIMENSION(:),
ALLOCATABLE :: px, py, pz
149 TYPE(offload_buffer_type) :: buffer = offload_buffer_type()
150 REAL(kind=
dp),
DIMENSION(:, :, :),
CONTIGUOUS,
POINTER :: r => null()
152 END TYPE realspace_grid_type
155 TYPE realspace_grid_p_type
156 TYPE(realspace_grid_type),
POINTER :: rs_grid => null()
157 END TYPE realspace_grid_p_type
159 TYPE realspace_grid_desc_p_type
160 TYPE(realspace_grid_desc_type),
POINTER :: rs_desc => null()
161 END TYPE realspace_grid_desc_p_type
174 TYPE(realspace_grid_desc_type),
INTENT(IN) :: rs_desc
175 INTEGER,
INTENT(IN) :: rank_in
176 INTEGER,
DIMENSION(3),
INTENT(IN) :: shift
181 coord =
modulo(rs_desc%rank2coord(:, rank_in) + shift, rs_desc%group_dim)
182 rank_out = rs_desc%coord2rank(coord(1), coord(2), coord(3))
203 TYPE(realspace_grid_desc_type),
POINTER :: desc
204 TYPE(pw_grid_type),
INTENT(INOUT),
TARGET :: pw_grid
205 TYPE(realspace_grid_input_type),
INTENT(IN) :: input_settings
206 INTEGER,
INTENT(IN),
OPTIONAL :: border_points
208 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_grid_create_descriptor'
210 INTEGER :: border_size, dir, handle, i, j, k, l, &
211 lb(2), min_npts_real, n_slices(3), &
212 n_slices_tmp(3), nmin
214 REAL(kind=
dp) :: ratio, ratio_best, volume, volume_dist
216 CALL timeset(routinen, handle)
218 IF (
PRESENT(border_points))
THEN
219 border_size = border_points
226 CALL pw_grid%para%group%sync()
232 desc%dh_inv = pw_grid%dh_inv
233 desc%orthorhombic = pw_grid%orthorhombic
239 desc%npts = pw_grid%npts
240 desc%ngpts = product(int(desc%npts, kind=
int_8))
241 desc%lb = pw_grid%bounds(1, :)
242 desc%ub = pw_grid%bounds(2, :)
243 desc%border = border_size
244 IF (border_size == 0)
THEN
249 desc%parallel = .false.
250 desc%distributed = .false.
253 desc%group_head = .true.
260 desc%group_size = pw_grid%para%group_size
261 desc%npts = pw_grid%npts
262 desc%ngpts = product(int(desc%npts, kind=
int_8))
263 desc%lb = pw_grid%bounds(1, :)
264 desc%ub = pw_grid%bounds(2, :)
267 IF (border_size == 0)
THEN
268 nmin = (input_settings%nsmax + 1)/2
269 nmin = max(0, nint(nmin*input_settings%halo_reduction_factor))
278 IF (border_size > 0)
THEN
279 CALL cp_abort(__location__, &
280 "An explicit border size > 0 is not yet working for "// &
281 "replicated realspace grids. Request DISTRIBUTION_TYPE "// &
282 "distributed for RS_GRID explicitly.")
288 ratio_best = -huge(ratio_best)
291 DO k = 1, min(desc%npts(3), desc%group_size)
292 DO j = 1, min(desc%npts(2), desc%group_size)
293 i = min(desc%npts(1), desc%group_size/(j*k))
294 n_slices_tmp = (/i, j, k/)
297 IF (product(n_slices_tmp) .NE. desc%group_size) cycle
301 IF (.NOT. all(pack(n_slices_tmp == input_settings%distribution_layout, &
302 (/-1, -1, -1/) /= input_settings%distribution_layout) &
309 IF (n_slices_tmp(dir) > 1)
THEN
310 DO l = 0, n_slices_tmp(dir) - 1
311 lb =
get_limit(desc%npts(dir), n_slices_tmp(dir), l)
312 IF (lb(2) - lb(1) + 1 + 2*nmin > desc%npts(dir)) overlap = .true.
322 ratio = product(real(desc%npts, kind=
dp)/n_slices_tmp)/ &
323 product(real(desc%npts, kind=
dp)/n_slices_tmp + &
324 merge((/0.0, 0.0, 0.0/), 2*(/1.06*nmin, 1.05*nmin, 1.03*nmin/), n_slices_tmp == (/1, 1, 1/)))
325 IF (ratio > ratio_best)
THEN
327 n_slices = n_slices_tmp
336 volume = product(real(desc%npts, kind=
dp))
337 volume_dist = product(real(desc%npts, kind=
dp)/n_slices + &
338 merge((/0, 0, 0/), 2*(/nmin, nmin, nmin/), n_slices == (/1, 1, 1/)))
339 IF (volume < volume_dist*input_settings%memory_factor)
THEN
346 desc%group_dim(:) = n_slices(:)
347 CALL desc%group%from_dup(pw_grid%para%group)
348 desc%group_size = desc%group%num_pe
349 desc%my_pos = desc%group%mepos
351 IF (all(n_slices == 1))
THEN
354 desc%border = border_size
355 IF (border_size == 0)
THEN
360 desc%distributed = .false.
361 desc%parallel = .true.
362 desc%group_head = pw_grid%para%group_head
363 desc%group_coor(:) = 0
364 desc%my_virtual_pos = 0
366 ALLOCATE (desc%virtual2real(0:desc%group_size - 1))
367 ALLOCATE (desc%real2virtual(0:desc%group_size - 1))
369 DO i = 0, desc%group_size - 1
370 desc%virtual2real(i) = i
371 desc%real2virtual(i) = i
376 IF (border_size == 0)
THEN
379 IF (n_slices(dir) > 1) desc%perd(dir) = 0
387 desc%parallel = .true.
388 desc%distributed = .true.
389 desc%group_head = (desc%my_pos == 0)
392 ALLOCATE (desc%rank2coord(3, 0:desc%group_size - 1))
393 ALLOCATE (desc%coord2rank(0:desc%group_dim(1) - 1, 0:desc%group_dim(2) - 1, 0:desc%group_dim(3) - 1))
394 ALLOCATE (desc%lb_global(3, 0:desc%group_size - 1))
395 ALLOCATE (desc%ub_global(3, 0:desc%group_size - 1))
396 ALLOCATE (desc%x2coord(desc%lb(1):desc%ub(1)))
397 ALLOCATE (desc%y2coord(desc%lb(2):desc%ub(2)))
398 ALLOCATE (desc%z2coord(desc%lb(3):desc%ub(3)))
400 DO i = 0, desc%group_size - 1
402 desc%rank2coord(1, i) = i/(desc%group_dim(2)*desc%group_dim(3))
403 desc%rank2coord(2, i) =
modulo(i, desc%group_dim(2)*desc%group_dim(3)) &
405 desc%rank2coord(3, i) =
modulo(i, desc%group_dim(3))
407 IF (i == desc%my_pos)
THEN
408 desc%group_coor = desc%rank2coord(:, i)
411 desc%coord2rank(desc%rank2coord(1, i), desc%rank2coord(2, i), desc%rank2coord(3, i)) = i
413 desc%lb_global(:, i) = desc%lb
414 desc%ub_global(:, i) = desc%ub
416 IF (desc%group_dim(dir) .GT. 1)
THEN
417 lb =
get_limit(desc%npts(dir), desc%group_dim(dir), desc%rank2coord(dir, i))
418 desc%lb_global(dir, i) = lb(1) + desc%lb(dir) - 1
419 desc%ub_global(dir, i) = lb(2) + desc%lb(dir) - 1
426 DO l = 0, desc%group_dim(dir) - 1
427 IF (desc%group_dim(dir) .GT. 1)
THEN
428 lb =
get_limit(desc%npts(dir), desc%group_dim(dir), l)
429 lb = lb + desc%lb(dir) - 1
436 desc%x2coord(lb(1):lb(2)) = l
438 desc%y2coord(lb(1):lb(2)) = l
440 desc%z2coord(lb(1):lb(2)) = l
447 desc%neighbours(dir) = 0
448 IF ((n_slices(dir) > 1) .OR. (border_size > 0))
THEN
449 min_npts_real = huge(0)
450 DO l = 0, n_slices(dir) - 1
451 lb =
get_limit(desc%npts(dir), n_slices(dir), l)
452 min_npts_real = min(lb(2) - lb(1) + 1, min_npts_real)
454 desc%neighbours(dir) = (desc%border + min_npts_real - 1)/min_npts_real
458 ALLOCATE (desc%virtual2real(0:desc%group_size - 1))
459 ALLOCATE (desc%real2virtual(0:desc%group_size - 1))
461 DO i = 0, desc%group_size - 1
462 desc%virtual2real(i) = i
463 desc%real2virtual(i) = i
466 desc%my_virtual_pos = desc%real2virtual(desc%my_pos)
467 desc%virtual_group_coor(:) = desc%rank2coord(:, desc%my_virtual_pos)
472 CALL timestop(handle)
482 TYPE(realspace_grid_type),
INTENT(OUT) :: rs
483 TYPE(realspace_grid_desc_type),
INTENT(INOUT), &
486 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_grid_create'
490 CALL timeset(routinen, handle)
500 rs%lb_local = rs%lb_real - desc%border*(1 - desc%perd)
501 rs%ub_local = rs%ub_real + desc%border*(1 - desc%perd)
502 rs%npts_local = rs%ub_local - rs%lb_local + 1
503 rs%ngpts_local = product(rs%npts_local)
506 IF (all(rs%desc%group_dim == 1))
THEN
511 rs%lb_local = rs%lb_real - desc%border*(1 - desc%perd)
512 rs%ub_local = rs%ub_real + desc%border*(1 - desc%perd)
513 rs%npts_local = rs%ub_local - rs%lb_local + 1
514 rs%ngpts_local = product(rs%npts_local)
518 rs%lb_real = desc%lb_global(:, desc%my_virtual_pos)
519 rs%ub_real = desc%ub_global(:, desc%my_virtual_pos)
520 rs%lb_local = rs%lb_real - desc%border*(1 - desc%perd)
521 rs%ub_local = rs%ub_real + desc%border*(1 - desc%perd)
522 rs%npts_local = rs%ub_local - rs%lb_local + 1
523 rs%ngpts_local = product(rs%npts_local)
527 rs%r(rs%lb_local(1):rs%ub_local(1), &
528 rs%lb_local(2):rs%ub_local(2), &
529 rs%lb_local(3):rs%ub_local(3)) => rs%buffer%host_buffer
531 ALLOCATE (rs%px(desc%npts(1)))
532 ALLOCATE (rs%py(desc%npts(2)))
533 ALLOCATE (rs%pz(desc%npts(3)))
535 CALL timestop(handle)
557 TYPE(realspace_grid_desc_type),
INTENT(INOUT) :: desc
558 INTEGER,
DIMENSION(:),
INTENT(IN) :: real2virtual
562 desc%real2virtual(:) = real2virtual
564 DO i = 0, desc%group_size - 1
565 desc%virtual2real(desc%real2virtual(i)) = i
568 desc%my_virtual_pos = desc%real2virtual(desc%my_pos)
570 IF (.NOT. all(desc%group_dim == 1))
THEN
571 desc%virtual_group_coor(:) = desc%rank2coord(:, desc%my_virtual_pos)
583 TYPE(realspace_grid_type),
INTENT(IN) :: rs
584 INTEGER,
INTENT(in) :: iounit
586 INTEGER :: dir, i, nn
587 REAL(kind=
dp) :: pp(3)
589 IF (rs%desc%parallel)
THEN
591 WRITE (iounit,
'(/,A,T71,I10)') &
592 " RS_GRID| Information for grid number ", rs%desc%pw%id_nr
594 WRITE (iounit,
'(A,I3,T30,2I8,T62,A,T71,I10)')
" RS_GRID| Bounds ", &
595 i, rs%desc%lb(i), rs%desc%ub(i),
"Points:", rs%desc%npts(i)
597 IF (.NOT. rs%desc%distributed)
THEN
598 WRITE (iounit,
'(A)')
" RS_GRID| Real space fully replicated"
599 WRITE (iounit,
'(A,T71,I10)') &
600 " RS_GRID| Group size ", rs%desc%group_dim(2)
603 IF (rs%desc%perd(dir) /= 1)
THEN
604 WRITE (iounit,
'(A,T71,I3,A)') &
605 " RS_GRID| Real space distribution over ", rs%desc%group_dim(dir),
" groups"
606 WRITE (iounit,
'(A,T71,I10)') &
607 " RS_GRID| Real space distribution along direction ", dir
608 WRITE (iounit,
'(A,T71,I10)') &
609 " RS_GRID| Border size ", rs%desc%border
614 IF (rs%desc%distributed)
THEN
616 IF (rs%desc%perd(dir) /= 1)
THEN
617 nn = rs%npts_local(dir)
618 CALL rs%desc%group%sum(nn)
619 pp(1) = real(nn, kind=
dp)/real(product(rs%desc%group_dim), kind=
dp)
620 nn = rs%npts_local(dir)
621 CALL rs%desc%group%max(nn)
622 pp(2) = real(nn, kind=
dp)
623 nn = rs%npts_local(dir)
624 CALL rs%desc%group%min(nn)
625 pp(3) = real(nn, kind=
dp)
627 WRITE (iounit,
'(A,T48,A)')
" RS_GRID| Distribution", &
629 WRITE (iounit,
'(A,T45,F12.1,2I12)')
" RS_GRID| Planes ", &
630 pp(1), nint(pp(2)), nint(pp(3))
638 WRITE (iounit,
'(/,A,T71,I10)') &
639 " RS_GRID| Information for grid number ", rs%desc%pw%id_nr
641 WRITE (iounit,
'(A,I3,T30,2I8,T62,A,T71,I10)')
" RS_GRID| Bounds ", &
642 i, rs%desc%lb(i), rs%desc%ub(i),
"Points:", rs%desc%npts(i)
656 TYPE(realspace_grid_type),
INTENT(IN) :: rs
657 TYPE(pw_r3d_rs_type),
INTENT(INOUT) :: pw
659 CHARACTER(len=*),
PARAMETER :: routinen =
'transfer_rs2pw'
661 INTEGER :: handle, handle2, i
663 CALL timeset(routinen, handle2)
664 CALL timeset(routinen//
"_"//trim(adjustl(cp_to_string(ceiling(pw%pw_grid%cutoff/10)*10))), handle)
666 IF (.NOT.
ASSOCIATED(rs%desc%pw, pw%pw_grid)) &
667 cpabort(
"Different rs and pw indentifiers")
669 IF (rs%desc%distributed)
THEN
670 CALL transfer_rs2pw_distributed(rs, pw)
671 ELSE IF (rs%desc%parallel)
THEN
672 CALL transfer_rs2pw_replicated(rs, pw)
674 IF (rs%desc%border == 0)
THEN
675 CALL dcopy(
SIZE(rs%r), rs%r, 1, pw%array, 1)
677 cpassert(lbound(pw%array, 3) .EQ. rs%lb_real(3))
679 DO i = rs%lb_real(3), rs%ub_real(3)
680 pw%array(:, :, i) = rs%r(rs%lb_real(1):rs%ub_real(1), &
681 rs%lb_real(2):rs%ub_real(2), i)
687 CALL timestop(handle)
688 CALL timestop(handle2)
699 TYPE(realspace_grid_type),
INTENT(IN) :: rs
700 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw
702 CHARACTER(len=*),
PARAMETER :: routinen =
'transfer_pw2rs'
704 INTEGER :: handle, handle2, i, im, j, jm, k, km
706 CALL timeset(routinen, handle2)
707 CALL timeset(routinen//
"_"//trim(adjustl(cp_to_string(ceiling(pw%pw_grid%cutoff/10)*10))), handle)
709 IF (.NOT.
ASSOCIATED(rs%desc%pw, pw%pw_grid)) &
710 cpabort(
"Different rs and pw indentifiers")
712 IF (rs%desc%distributed)
THEN
713 CALL transfer_pw2rs_distributed(rs, pw)
714 ELSE IF (rs%desc%parallel)
THEN
715 CALL transfer_pw2rs_replicated(rs, pw)
717 IF (rs%desc%border == 0)
THEN
718 CALL dcopy(
SIZE(rs%r), pw%array, 1, rs%r, 1)
723 DO k = rs%lb_local(3), rs%ub_local(3)
724 IF (k < rs%lb_real(3))
THEN
725 km = k + rs%desc%npts(3)
726 ELSE IF (k > rs%ub_real(3))
THEN
727 km = k - rs%desc%npts(3)
731 DO j = rs%lb_local(2), rs%ub_local(2)
732 IF (j < rs%lb_real(2))
THEN
733 jm = j + rs%desc%npts(2)
734 ELSE IF (j > rs%ub_real(2))
THEN
735 jm = j - rs%desc%npts(2)
739 DO i = rs%lb_local(1), rs%ub_local(1)
740 IF (i < rs%lb_real(1))
THEN
741 im = i + rs%desc%npts(1)
742 ELSE IF (i > rs%ub_real(1))
THEN
743 im = i - rs%desc%npts(1)
747 rs%r(i, j, k) = pw%array(im, jm, km)
755 CALL timestop(handle)
756 CALL timestop(handle2)
765 SUBROUTINE transfer_rs2pw_replicated(rs, pw)
766 TYPE(realspace_grid_type),
INTENT(IN) :: rs
767 TYPE(pw_r3d_rs_type),
INTENT(INOUT) :: pw
769 INTEGER :: dest, ii, ip, ix, iy, iz, nma, nn, s(3), &
771 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: rcount
772 INTEGER,
DIMENSION(3) :: lb, ub
773 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: recvbuf, sendbuf, swaparray
775 associate(np => pw%pw_grid%para%group_size, bo => pw%pw_grid%para%bo(1:2, 1:3, 0:pw%pw_grid%para%group_size - 1, 1), &
776 pbo => pw%pw_grid%bounds, group => pw%pw_grid%para%rs_group, mepos => pw%pw_grid%para%rs_mpo, &
778 ALLOCATE (rcount(0:np - 1))
780 rcount(ip - 1) = product(bo(2, :, ip) - bo(1, :, ip) + 1)
782 nma = maxval(rcount(0:np - 1))
783 ALLOCATE (sendbuf(nma), recvbuf(nma))
784 sendbuf = 1.0e99_dp; recvbuf = 1.0e99_dp
789 dest =
modulo(mepos + 1, np)
790 source =
modulo(mepos - 1, np)
795 lb = pbo(1, :) + bo(1, :,
modulo(mepos - ip, np) + 1) - 1
796 ub = pbo(1, :) + bo(2, :,
modulo(mepos - ip, np) + 1) - 1
805 ii = (iz - lb(3))*s(1)*s(2) + (ix - lb(1)) + 1
807 sendbuf(ii) = sendbuf(ii) + grid(ix, iy, iz)
813 CALL group%sendrecv(sendbuf, dest, recvbuf, source, 13)
814 CALL move_alloc(sendbuf, swaparray)
815 CALL move_alloc(recvbuf, sendbuf)
816 CALL move_alloc(swaparray, recvbuf)
821 CALL dcopy(nn, sendbuf, 1, pw%array, 1)
827 END SUBROUTINE transfer_rs2pw_replicated
834 SUBROUTINE transfer_pw2rs_replicated(rs, pw)
835 TYPE(realspace_grid_type),
INTENT(IN) :: rs
836 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw
838 INTEGER :: dest, i, ii, im, ip, ix, iy, iz, j, jm, &
839 k, km, nma, nn, source
840 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: rcount
841 INTEGER,
DIMENSION(3) :: lb, ub
842 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:) :: recvbuf, sendbuf, swaparray
843 TYPE(mp_request_type),
DIMENSION(2) :: req
845 associate(np => pw%pw_grid%para%group_size, bo => pw%pw_grid%para%bo(1:2, 1:3, 0:pw%pw_grid%para%group_size - 1, 1), &
846 pbo => pw%pw_grid%bounds, group => pw%pw_grid%para%rs_group, mepos => pw%pw_grid%para%rs_mpo, &
848 ALLOCATE (rcount(0:np - 1))
850 rcount(ip - 1) = product(bo(2, :, ip) - bo(1, :, ip) + 1)
852 nma = maxval(rcount(0:np - 1))
853 ALLOCATE (sendbuf(nma), recvbuf(nma))
854 sendbuf = 1.0e99_dp; recvbuf = 1.0e99_dp
860 CALL dcopy(nn, pw%array, 1, sendbuf, 1)
862 dest =
modulo(mepos + 1, np)
863 source =
modulo(mepos - 1, np)
867 IF (ip .NE. np - 1)
THEN
868 CALL group%isendrecv(sendbuf, dest, recvbuf, source, &
871 lb = pbo(1, :) + bo(1, :,
modulo(mepos - ip, np) + 1) - 1
872 ub = pbo(1, :) + bo(2, :,
modulo(mepos - ip, np) + 1) - 1
880 grid(ix, iy, iz) = sendbuf(ii)
884 IF (ip .NE. np - 1)
THEN
887 CALL move_alloc(sendbuf, swaparray)
888 CALL move_alloc(recvbuf, sendbuf)
889 CALL move_alloc(swaparray, recvbuf)
891 IF (rs%desc%border > 0)
THEN
895 DO k = rs%lb_local(3), rs%ub_local(3)
896 IF (k < rs%lb_real(3))
THEN
897 km = k + rs%desc%npts(3)
898 ELSE IF (k > rs%ub_real(3))
THEN
899 km = k - rs%desc%npts(3)
903 DO j = rs%lb_local(2), rs%ub_local(2)
904 IF (j < rs%lb_real(2))
THEN
905 jm = j + rs%desc%npts(2)
906 ELSE IF (j > rs%ub_real(2))
THEN
907 jm = j - rs%desc%npts(2)
911 DO i = rs%lb_local(1), rs%ub_local(1)
912 IF (i < rs%lb_real(1))
THEN
913 im = i + rs%desc%npts(1)
914 ELSE IF (i > rs%ub_real(1))
THEN
915 im = i - rs%desc%npts(1)
919 rs%r(i, j, k) = rs%r(im, jm, km)
931 END SUBROUTINE transfer_pw2rs_replicated
955 SUBROUTINE transfer_rs2pw_distributed(rs, pw)
956 TYPE(realspace_grid_type),
INTENT(IN) :: rs
957 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw
959 CHARACTER(LEN=200) :: error_string
960 INTEGER :: completed, dest_down, dest_up, i, idir, j, k, lb, my_id, my_pw_rank, my_rs_rank, &
961 n_shifts, nn, num_threads, position, source_down, source_up, ub, x, y, z
962 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: dshifts, recv_disps, recv_sizes, &
963 send_disps, send_sizes, ushifts
964 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: bounds, recv_tasks, send_tasks
965 INTEGER,
DIMENSION(2) :: neighbours, pos
966 INTEGER,
DIMENSION(3) :: coords, lb_recv, lb_recv_down, lb_recv_up, lb_send, lb_send_down, &
967 lb_send_up, ub_recv, ub_recv_down, ub_recv_up, ub_send, ub_send_down, ub_send_up
968 LOGICAL,
DIMENSION(3) :: halo_swapped
969 REAL(kind=dp) :: pw_sum, rs_sum
970 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: recv_buf_3d_down, recv_buf_3d_up, &
971 send_buf_3d_down, send_buf_3d_up
972 TYPE(cp_1d_r_p_type),
ALLOCATABLE,
DIMENSION(:) :: recv_bufs, send_bufs
973 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: recv_reqs, send_reqs
974 TYPE(mp_request_type),
DIMENSION(4) :: req
980 IF (debug_this_module)
THEN
981 rs_sum = accurate_sum(rs%r)*abs(
det_3x3(rs%desc%dh))
982 CALL rs%desc%group%sum(rs_sum)
985 halo_swapped = .false.
992 IF (rs%desc%perd(idir) .NE. 1)
THEN
994 ALLOCATE (dshifts(0:rs%desc%neighbours(idir)))
995 ALLOCATE (ushifts(0:rs%desc%neighbours(idir)))
1001 DO n_shifts = 1, min(rs%desc%neighbours(idir), rs%desc%group_dim(idir) - 1)
1005 position =
modulo(rs%desc%virtual_group_coor(idir) - n_shifts, rs%desc%group_dim(idir))
1006 neighbours = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), position)
1007 dshifts(n_shifts) = dshifts(n_shifts - 1) + (neighbours(2) - neighbours(1) + 1)
1009 position =
modulo(rs%desc%virtual_group_coor(idir) + n_shifts, rs%desc%group_dim(idir))
1010 neighbours = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), position)
1011 ushifts(n_shifts) = ushifts(n_shifts - 1) + (neighbours(2) - neighbours(1) + 1)
1017 CALL cart_shift(rs, idir, -1*n_shifts, source_down, dest_down)
1019 lb_send_down(:) = rs%lb_local(:)
1020 lb_recv_down(:) = rs%lb_local(:)
1021 ub_recv_down(:) = rs%ub_local(:)
1022 ub_send_down(:) = rs%ub_local(:)
1024 IF (dshifts(n_shifts - 1) .LE. rs%desc%border)
THEN
1025 ub_send_down(idir) = lb_send_down(idir) + rs%desc%border - 1 - dshifts(n_shifts - 1)
1026 lb_send_down(idir) = max(lb_send_down(idir), &
1027 lb_send_down(idir) + rs%desc%border - dshifts(n_shifts))
1029 ub_recv_down(idir) = ub_recv_down(idir) - rs%desc%border
1030 lb_recv_down(idir) = max(lb_recv_down(idir) + rs%desc%border, &
1031 ub_recv_down(idir) - rs%desc%border + 1 + ushifts(n_shifts - 1))
1033 lb_send_down(idir) = 0
1034 ub_send_down(idir) = -1
1035 lb_recv_down(idir) = 0
1036 ub_recv_down(idir) = -1
1040 IF (halo_swapped(i))
THEN
1041 lb_send_down(i) = rs%lb_real(i)
1042 ub_send_down(i) = rs%ub_real(i)
1043 lb_recv_down(i) = rs%lb_real(i)
1044 ub_recv_down(i) = rs%ub_real(i)
1049 ALLOCATE (recv_buf_3d_down(lb_recv_down(1):ub_recv_down(1), &
1050 lb_recv_down(2):ub_recv_down(2), lb_recv_down(3):ub_recv_down(3)))
1051 CALL rs%desc%group%irecv(recv_buf_3d_down, source_down, req(1))
1054 nn = product(ub_send_down - lb_send_down + 1)
1055 ALLOCATE (send_buf_3d_down(lb_send_down(1):ub_send_down(1), &
1056 lb_send_down(2):ub_send_down(2), lb_send_down(3):ub_send_down(3)))
1063 IF (my_id < num_threads)
THEN
1064 lb = lb_send_down(3) + ((ub_send_down(3) - lb_send_down(3) + 1)*my_id)/num_threads
1065 ub = lb_send_down(3) + ((ub_send_down(3) - lb_send_down(3) + 1)*(my_id + 1))/num_threads - 1
1067 send_buf_3d_down(lb_send_down(1):ub_send_down(1), lb_send_down(2):ub_send_down(2), &
1068 lb:ub) = rs%r(lb_send_down(1):ub_send_down(1), &
1069 lb_send_down(2):ub_send_down(2), lb:ub)
1073 CALL rs%desc%group%isend(send_buf_3d_down, dest_down, req(3))
1076 CALL cart_shift(rs, idir, n_shifts, source_up, dest_up)
1078 lb_send_up(:) = rs%lb_local(:)
1079 lb_recv_up(:) = rs%lb_local(:)
1080 ub_recv_up(:) = rs%ub_local(:)
1081 ub_send_up(:) = rs%ub_local(:)
1083 IF (ushifts(n_shifts - 1) .LE. rs%desc%border)
THEN
1085 lb_send_up(idir) = ub_send_up(idir) - rs%desc%border + 1 + ushifts(n_shifts - 1)
1086 ub_send_up(idir) = min(ub_send_up(idir), &
1087 ub_send_up(idir) - rs%desc%border + ushifts(n_shifts))
1089 lb_recv_up(idir) = lb_recv_up(idir) + rs%desc%border
1090 ub_recv_up(idir) = min(ub_recv_up(idir) - rs%desc%border, &
1091 lb_recv_up(idir) + rs%desc%border - 1 - dshifts(n_shifts - 1))
1093 lb_send_up(idir) = 0
1094 ub_send_up(idir) = -1
1095 lb_recv_up(idir) = 0
1096 ub_recv_up(idir) = -1
1100 IF (halo_swapped(i))
THEN
1101 lb_send_up(i) = rs%lb_real(i)
1102 ub_send_up(i) = rs%ub_real(i)
1103 lb_recv_up(i) = rs%lb_real(i)
1104 ub_recv_up(i) = rs%ub_real(i)
1109 ALLOCATE (recv_buf_3d_up(lb_recv_up(1):ub_recv_up(1), &
1110 lb_recv_up(2):ub_recv_up(2), lb_recv_up(3):ub_recv_up(3)))
1111 CALL rs%desc%group%irecv(recv_buf_3d_up, source_up, req(2))
1114 nn = product(ub_send_up - lb_send_up + 1)
1115 ALLOCATE (send_buf_3d_up(lb_send_up(1):ub_send_up(1), &
1116 lb_send_up(2):ub_send_up(2), lb_send_up(3):ub_send_up(3)))
1123 IF (my_id < num_threads)
THEN
1124 lb = lb_send_up(3) + ((ub_send_up(3) - lb_send_up(3) + 1)*my_id)/num_threads
1125 ub = lb_send_up(3) + ((ub_send_up(3) - lb_send_up(3) + 1)*(my_id + 1))/num_threads - 1
1127 send_buf_3d_up(lb_send_up(1):ub_send_up(1), lb_send_up(2):ub_send_up(2), &
1128 lb:ub) = rs%r(lb_send_up(1):ub_send_up(1), &
1129 lb_send_up(2):ub_send_up(2), lb:ub)
1133 CALL rs%desc%group%isend(send_buf_3d_up, dest_up, req(4))
1139 CALL mp_waitany(req(1:2), completed)
1141 IF (completed .EQ. 1)
THEN
1144 IF (ub_recv_down(idir) .GE. lb_recv_down(idir))
THEN
1151 IF (my_id < num_threads)
THEN
1152 lb = lb_recv_down(3) + ((ub_recv_down(3) - lb_recv_down(3) + 1)*my_id)/num_threads
1153 ub = lb_recv_down(3) + ((ub_recv_down(3) - lb_recv_down(3) + 1)*(my_id + 1))/num_threads - 1
1155 rs%r(lb_recv_down(1):ub_recv_down(1), &
1156 lb_recv_down(2):ub_recv_down(2), lb:ub) = &
1157 rs%r(lb_recv_down(1):ub_recv_down(1), &
1158 lb_recv_down(2):ub_recv_down(2), lb:ub) + &
1159 recv_buf_3d_down(:, :, lb:ub)
1163 DEALLOCATE (recv_buf_3d_down)
1167 IF (ub_recv_up(idir) .GE. lb_recv_up(idir))
THEN
1174 IF (my_id < num_threads)
THEN
1175 lb = lb_recv_up(3) + ((ub_recv_up(3) - lb_recv_up(3) + 1)*my_id)/num_threads
1176 ub = lb_recv_up(3) + ((ub_recv_up(3) - lb_recv_up(3) + 1)*(my_id + 1))/num_threads - 1
1178 rs%r(lb_recv_up(1):ub_recv_up(1), &
1179 lb_recv_up(2):ub_recv_up(2), lb:ub) = &
1180 rs%r(lb_recv_up(1):ub_recv_up(1), &
1181 lb_recv_up(2):ub_recv_up(2), lb:ub) + &
1182 recv_buf_3d_up(:, :, lb:ub)
1186 DEALLOCATE (recv_buf_3d_up)
1193 CALL mp_waitall(req(3:4))
1195 DEALLOCATE (send_buf_3d_down)
1196 DEALLOCATE (send_buf_3d_up)
1199 DEALLOCATE (dshifts)
1200 DEALLOCATE (ushifts)
1204 halo_swapped(idir) = .true.
1209 ALLOCATE (bounds(0:pw%pw_grid%para%group_size - 1, 1:4))
1212 DO i = 0, pw%pw_grid%para%group_size - 1
1213 bounds(i, 1:2) = pw%pw_grid%para%bo(1:2, 1, i, 1)
1214 bounds(i, 3:4) = pw%pw_grid%para%bo(1:2, 2, i, 1)
1215 bounds(i, 1:2) = bounds(i, 1:2) - pw%pw_grid%npts(1)/2 - 1
1216 bounds(i, 3:4) = bounds(i, 3:4) - pw%pw_grid%npts(2)/2 - 1
1219 ALLOCATE (send_tasks(0:pw%pw_grid%para%group_size - 1, 1:6))
1220 ALLOCATE (send_sizes(0:pw%pw_grid%para%group_size - 1))
1221 ALLOCATE (send_disps(0:pw%pw_grid%para%group_size - 1))
1222 ALLOCATE (recv_tasks(0:pw%pw_grid%para%group_size - 1, 1:6))
1223 ALLOCATE (recv_sizes(0:pw%pw_grid%para%group_size - 1))
1224 ALLOCATE (recv_disps(0:pw%pw_grid%para%group_size - 1))
1225 send_tasks(:, 1) = 1
1226 send_tasks(:, 2) = 0
1227 send_tasks(:, 3) = 1
1228 send_tasks(:, 4) = 0
1229 send_tasks(:, 5) = 1
1230 send_tasks(:, 6) = 0
1234 my_rs_rank = rs%desc%my_pos
1235 my_pw_rank = pw%pw_grid%para%rs_mpo
1246 DO i = 0, rs%desc%group_size - 1
1248 coords(:) = rs%desc%rank2coord(:, rs%desc%real2virtual(i))
1252 pos(:) = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), coords(idir))
1253 pos(:) = pos(:) - rs%desc%npts(idir)/2 - 1
1254 lb_send(idir) = pos(1)
1255 ub_send(idir) = pos(2)
1258 IF (lb_send(1) .GT. bounds(my_rs_rank, 2)) cycle
1259 IF (ub_send(1) .LT. bounds(my_rs_rank, 1)) cycle
1260 IF (lb_send(2) .GT. bounds(my_rs_rank, 4)) cycle
1261 IF (ub_send(2) .LT. bounds(my_rs_rank, 3)) cycle
1263 recv_tasks(i, 1) = max(lb_send(1), bounds(my_rs_rank, 1))
1264 recv_tasks(i, 2) = min(ub_send(1), bounds(my_rs_rank, 2))
1265 recv_tasks(i, 3) = max(lb_send(2), bounds(my_rs_rank, 3))
1266 recv_tasks(i, 4) = min(ub_send(2), bounds(my_rs_rank, 4))
1267 recv_tasks(i, 5) = lb_send(3)
1268 recv_tasks(i, 6) = ub_send(3)
1269 recv_sizes(i) = (recv_tasks(i, 2) - recv_tasks(i, 1) + 1)* &
1270 (recv_tasks(i, 4) - recv_tasks(i, 3) + 1)*(recv_tasks(i, 6) - recv_tasks(i, 5) + 1)
1275 coords(:) = rs%desc%rank2coord(:, rs%desc%real2virtual(my_rs_rank))
1277 pos(:) = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), coords(idir))
1278 pos(:) = pos(:) - rs%desc%npts(idir)/2 - 1
1279 lb_send(idir) = pos(1)
1280 ub_send(idir) = pos(2)
1283 lb_recv(:) = lb_send(:)
1284 ub_recv(:) = ub_send(:)
1287 DO j = 0, pw%pw_grid%para%group_size - 1
1289 IF (lb_send(1) .GT. bounds(j, 2)) cycle
1290 IF (ub_send(1) .LT. bounds(j, 1)) cycle
1291 IF (lb_send(2) .GT. bounds(j, 4)) cycle
1292 IF (ub_send(2) .LT. bounds(j, 3)) cycle
1294 send_tasks(j, 1) = max(lb_send(1), bounds(j, 1))
1295 send_tasks(j, 2) = min(ub_send(1), bounds(j, 2))
1296 send_tasks(j, 3) = max(lb_send(2), bounds(j, 3))
1297 send_tasks(j, 4) = min(ub_send(2), bounds(j, 4))
1298 send_tasks(j, 5) = lb_send(3)
1299 send_tasks(j, 6) = ub_send(3)
1300 send_sizes(j) = (send_tasks(j, 2) - send_tasks(j, 1) + 1)* &
1301 (send_tasks(j, 4) - send_tasks(j, 3) + 1)*(send_tasks(j, 6) - send_tasks(j, 5) + 1)
1308 DO i = 1, pw%pw_grid%para%group_size - 1
1309 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
1310 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
1313 cpassert(sum(send_sizes) == product(ub_recv - lb_recv + 1))
1315 ALLOCATE (send_bufs(0:rs%desc%group_size - 1))
1316 ALLOCATE (recv_bufs(0:rs%desc%group_size - 1))
1318 DO i = 0, rs%desc%group_size - 1
1319 IF (send_sizes(i) .NE. 0)
THEN
1320 ALLOCATE (send_bufs(i)%array(send_sizes(i)))
1322 NULLIFY (send_bufs(i)%array)
1324 IF (recv_sizes(i) .NE. 0)
THEN
1325 ALLOCATE (recv_bufs(i)%array(recv_sizes(i)))
1327 NULLIFY (recv_bufs(i)%array)
1331 ALLOCATE (recv_reqs(0:rs%desc%group_size - 1))
1332 recv_reqs = mp_request_null
1334 DO i = 0, rs%desc%group_size - 1
1335 IF (recv_sizes(i) .NE. 0)
THEN
1336 CALL rs%desc%group%irecv(recv_bufs(i)%array, i, recv_reqs(i))
1344 DO i = 0, rs%desc%group_size - 1
1346 DO z = send_tasks(i, 5), send_tasks(i, 6)
1347 DO y = send_tasks(i, 3), send_tasks(i, 4)
1348 DO x = send_tasks(i, 1), send_tasks(i, 2)
1350 send_bufs(i)%array(k) = rs%r(x, y, z)
1357 ALLOCATE (send_reqs(0:rs%desc%group_size - 1))
1358 send_reqs = mp_request_null
1360 DO i = 0, rs%desc%group_size - 1
1361 IF (send_sizes(i) .NE. 0)
THEN
1362 CALL rs%desc%group%isend(send_bufs(i)%array, i, send_reqs(i))
1368 DO i = 0, rs%desc%group_size - 1
1369 IF (recv_sizes(i) .EQ. 0) cycle
1371 CALL mp_waitany(recv_reqs, completed)
1373 DO z = recv_tasks(completed - 1, 5), recv_tasks(completed - 1, 6)
1374 DO y = recv_tasks(completed - 1, 3), recv_tasks(completed - 1, 4)
1375 DO x = recv_tasks(completed - 1, 1), recv_tasks(completed - 1, 2)
1377 pw%array(x, y, z) = recv_bufs(completed - 1)%array(k)
1383 CALL mp_waitall(send_reqs)
1385 DEALLOCATE (recv_reqs)
1386 DEALLOCATE (send_reqs)
1388 DO i = 0, rs%desc%group_size - 1
1389 IF (
ASSOCIATED(send_bufs(i)%array))
THEN
1390 DEALLOCATE (send_bufs(i)%array)
1392 IF (
ASSOCIATED(recv_bufs(i)%array))
THEN
1393 DEALLOCATE (recv_bufs(i)%array)
1397 DEALLOCATE (send_bufs)
1398 DEALLOCATE (recv_bufs)
1399 DEALLOCATE (send_tasks)
1400 DEALLOCATE (send_sizes)
1401 DEALLOCATE (send_disps)
1402 DEALLOCATE (recv_tasks)
1403 DEALLOCATE (recv_sizes)
1404 DEALLOCATE (recv_disps)
1406 IF (debug_this_module)
THEN
1408 pw_sum = pw_integrate_function(pw)
1409 IF (abs(pw_sum - rs_sum)/max(1.0_dp, abs(pw_sum), abs(rs_sum)) > epsilon(rs_sum)*1000)
THEN
1410 WRITE (error_string,
'(A,6(1X,I4.4),3F25.16)')
"rs_pw_transfer_distributed", &
1411 rs%desc%npts, rs%desc%group_dim, pw_sum, rs_sum, abs(pw_sum - rs_sum)
1412 CALL cp_abort(__location__, &
1413 error_string//
" Please report this bug ... quick workaround: use "// &
1414 "DISTRIBUTION_TYPE REPLICATED")
1418 END SUBROUTINE transfer_rs2pw_distributed
1442 SUBROUTINE transfer_pw2rs_distributed(rs, pw)
1443 TYPE(realspace_grid_type),
INTENT(IN) :: rs
1444 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw
1446 INTEGER :: completed, dest_down, dest_up, i, idir, j, k, lb, my_id, my_pw_rank, my_rs_rank, &
1447 n_shifts, nn, num_threads, position, source_down, source_up, ub, x, y, z
1448 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: dshifts, recv_disps, recv_sizes, &
1449 send_disps, send_sizes, ushifts
1450 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: bounds, recv_tasks, send_tasks
1451 INTEGER,
DIMENSION(2) :: neighbours, pos
1452 INTEGER,
DIMENSION(3) :: coords, lb_recv, lb_recv_down, lb_recv_up, lb_send, lb_send_down, &
1453 lb_send_up, ub_recv, ub_recv_down, ub_recv_up, ub_send, ub_send_down, ub_send_up
1454 LOGICAL,
DIMENSION(3) :: halo_swapped
1455 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: recv_buf_3d_down, recv_buf_3d_up, &
1456 send_buf_3d_down, send_buf_3d_up
1457 TYPE(cp_1d_r_p_type),
ALLOCATABLE,
DIMENSION(:) :: recv_bufs, send_bufs
1458 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: recv_reqs, send_reqs
1459 TYPE(mp_request_type),
DIMENSION(4) :: req
1468 ALLOCATE (bounds(0:pw%pw_grid%para%group_size - 1, 1:4))
1470 DO i = 0, pw%pw_grid%para%group_size - 1
1471 bounds(i, 1:2) = pw%pw_grid%para%bo(1:2, 1, i, 1)
1472 bounds(i, 3:4) = pw%pw_grid%para%bo(1:2, 2, i, 1)
1473 bounds(i, 1:2) = bounds(i, 1:2) - pw%pw_grid%npts(1)/2 - 1
1474 bounds(i, 3:4) = bounds(i, 3:4) - pw%pw_grid%npts(2)/2 - 1
1477 ALLOCATE (send_tasks(0:pw%pw_grid%para%group_size - 1, 1:6))
1478 ALLOCATE (send_sizes(0:pw%pw_grid%para%group_size - 1))
1479 ALLOCATE (send_disps(0:pw%pw_grid%para%group_size - 1))
1480 ALLOCATE (recv_tasks(0:pw%pw_grid%para%group_size - 1, 1:6))
1481 ALLOCATE (recv_sizes(0:pw%pw_grid%para%group_size - 1))
1482 ALLOCATE (recv_disps(0:pw%pw_grid%para%group_size - 1))
1485 send_tasks(:, 1) = 1
1486 send_tasks(:, 2) = 0
1487 send_tasks(:, 3) = 1
1488 send_tasks(:, 4) = 0
1489 send_tasks(:, 5) = 1
1490 send_tasks(:, 6) = 0
1494 recv_tasks(:, 1) = 1
1495 recv_tasks(:, 2) = 0
1496 send_tasks(:, 3) = 1
1497 send_tasks(:, 4) = 0
1498 send_tasks(:, 5) = 1
1499 send_tasks(:, 6) = 0
1502 my_rs_rank = rs%desc%my_pos
1503 my_pw_rank = pw%pw_grid%para%rs_mpo
1516 DO i = 0, pw%pw_grid%para%group_size - 1
1518 coords(:) = rs%desc%rank2coord(:, rs%desc%real2virtual(i))
1522 pos(:) = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), coords(idir))
1523 pos(:) = pos(:) - rs%desc%npts(idir)/2 - 1
1524 lb_send(idir) = pos(1)
1525 ub_send(idir) = pos(2)
1528 IF (ub_send(1) .LT. bounds(my_rs_rank, 1)) cycle
1529 IF (lb_send(1) .GT. bounds(my_rs_rank, 2)) cycle
1530 IF (ub_send(2) .LT. bounds(my_rs_rank, 3)) cycle
1531 IF (lb_send(2) .GT. bounds(my_rs_rank, 4)) cycle
1533 send_tasks(i, 1) = max(lb_send(1), bounds(my_rs_rank, 1))
1534 send_tasks(i, 2) = min(ub_send(1), bounds(my_rs_rank, 2))
1535 send_tasks(i, 3) = max(lb_send(2), bounds(my_rs_rank, 3))
1536 send_tasks(i, 4) = min(ub_send(2), bounds(my_rs_rank, 4))
1537 send_tasks(i, 5) = lb_send(3)
1538 send_tasks(i, 6) = ub_send(3)
1539 send_sizes(i) = (send_tasks(i, 2) - send_tasks(i, 1) + 1)* &
1540 (send_tasks(i, 4) - send_tasks(i, 3) + 1)*(send_tasks(i, 6) - send_tasks(i, 5) + 1)
1545 coords(:) = rs%desc%rank2coord(:, rs%desc%real2virtual(my_rs_rank))
1547 pos(:) = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), coords(idir))
1548 pos(:) = pos(:) - rs%desc%npts(idir)/2 - 1
1549 lb_send(idir) = pos(1)
1550 ub_send(idir) = pos(2)
1553 lb_recv(:) = lb_send(:)
1554 ub_recv(:) = ub_send(:)
1558 DO j = 0, pw%pw_grid%para%group_size - 1
1560 IF (ub_send(1) .LT. bounds(j, 1)) cycle
1561 IF (lb_send(1) .GT. bounds(j, 2)) cycle
1562 IF (ub_send(2) .LT. bounds(j, 3)) cycle
1563 IF (lb_send(2) .GT. bounds(j, 4)) cycle
1565 recv_tasks(j, 1) = max(lb_send(1), bounds(j, 1))
1566 recv_tasks(j, 2) = min(ub_send(1), bounds(j, 2))
1567 recv_tasks(j, 3) = max(lb_send(2), bounds(j, 3))
1568 recv_tasks(j, 4) = min(ub_send(2), bounds(j, 4))
1569 recv_tasks(j, 5) = lb_send(3)
1570 recv_tasks(j, 6) = ub_send(3)
1571 recv_sizes(j) = (recv_tasks(j, 2) - recv_tasks(j, 1) + 1)* &
1572 (recv_tasks(j, 4) - recv_tasks(j, 3) + 1)*(recv_tasks(j, 6) - recv_tasks(j, 5) + 1)
1579 DO i = 1, pw%pw_grid%para%group_size - 1
1580 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
1581 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
1584 cpassert(sum(recv_sizes) == product(ub_recv - lb_recv + 1))
1586 ALLOCATE (send_bufs(0:rs%desc%group_size - 1))
1587 ALLOCATE (recv_bufs(0:rs%desc%group_size - 1))
1589 DO i = 0, rs%desc%group_size - 1
1590 IF (send_sizes(i) .NE. 0)
THEN
1591 ALLOCATE (send_bufs(i)%array(send_sizes(i)))
1593 NULLIFY (send_bufs(i)%array)
1595 IF (recv_sizes(i) .NE. 0)
THEN
1596 ALLOCATE (recv_bufs(i)%array(recv_sizes(i)))
1598 NULLIFY (recv_bufs(i)%array)
1602 ALLOCATE (recv_reqs(0:rs%desc%group_size - 1))
1603 recv_reqs = mp_request_null
1605 DO i = 0, rs%desc%group_size - 1
1606 IF (recv_sizes(i) .NE. 0)
THEN
1607 CALL rs%desc%group%irecv(recv_bufs(i)%array, i, recv_reqs(i))
1615 DO i = 0, rs%desc%group_size - 1
1617 DO z = send_tasks(i, 5), send_tasks(i, 6)
1618 DO y = send_tasks(i, 3), send_tasks(i, 4)
1619 DO x = send_tasks(i, 1), send_tasks(i, 2)
1621 send_bufs(i)%array(k) = pw%array(x, y, z)
1628 ALLOCATE (send_reqs(0:rs%desc%group_size - 1))
1629 send_reqs = mp_request_null
1631 DO i = 0, rs%desc%group_size - 1
1632 IF (send_sizes(i) .NE. 0)
THEN
1633 CALL rs%desc%group%isend(send_bufs(i)%array, i, send_reqs(i))
1640 DO i = 0, rs%desc%group_size - 1
1641 IF (recv_sizes(i) .EQ. 0) cycle
1643 CALL mp_waitany(recv_reqs, completed)
1645 DO z = recv_tasks(completed - 1, 5), recv_tasks(completed - 1, 6)
1646 DO y = recv_tasks(completed - 1, 3), recv_tasks(completed - 1, 4)
1647 DO x = recv_tasks(completed - 1, 1), recv_tasks(completed - 1, 2)
1649 rs%r(x, y, z) = recv_bufs(completed - 1)%array(k)
1655 CALL mp_waitall(send_reqs)
1657 DEALLOCATE (recv_reqs)
1658 DEALLOCATE (send_reqs)
1660 DO i = 0, rs%desc%group_size - 1
1661 IF (
ASSOCIATED(send_bufs(i)%array))
THEN
1662 DEALLOCATE (send_bufs(i)%array)
1664 IF (
ASSOCIATED(recv_bufs(i)%array))
THEN
1665 DEALLOCATE (recv_bufs(i)%array)
1669 DEALLOCATE (send_bufs)
1670 DEALLOCATE (recv_bufs)
1671 DEALLOCATE (send_tasks)
1672 DEALLOCATE (send_sizes)
1673 DEALLOCATE (send_disps)
1674 DEALLOCATE (recv_tasks)
1675 DEALLOCATE (recv_sizes)
1676 DEALLOCATE (recv_disps)
1679 halo_swapped = .false.
1683 IF (rs%desc%perd(idir) /= 1)
THEN
1685 ALLOCATE (dshifts(0:rs%desc%neighbours(idir)))
1686 ALLOCATE (ushifts(0:rs%desc%neighbours(idir)))
1690 DO n_shifts = 1, rs%desc%neighbours(idir)
1695 position =
modulo(rs%desc%virtual_group_coor(idir) - n_shifts, rs%desc%group_dim(idir))
1696 neighbours = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), position)
1697 dshifts(n_shifts) = dshifts(n_shifts - 1) + (neighbours(2) - neighbours(1) + 1)
1699 position =
modulo(rs%desc%virtual_group_coor(idir) + n_shifts, rs%desc%group_dim(idir))
1700 neighbours = get_limit(rs%desc%npts(idir), rs%desc%group_dim(idir), position)
1701 ushifts(n_shifts) = ushifts(n_shifts - 1) + (neighbours(2) - neighbours(1) + 1)
1707 CALL cart_shift(rs, idir, -1*n_shifts, source_down, dest_down)
1709 lb_send_down(:) = rs%lb_local(:)
1710 ub_send_down(:) = rs%ub_local(:)
1711 lb_recv_down(:) = rs%lb_local(:)
1712 ub_recv_down(:) = rs%ub_local(:)
1714 IF (dshifts(n_shifts - 1) .LE. rs%desc%border)
THEN
1715 lb_send_down(idir) = lb_send_down(idir) + rs%desc%border
1716 ub_send_down(idir) = min(ub_send_down(idir) - rs%desc%border, &
1717 lb_send_down(idir) + rs%desc%border - 1 - dshifts(n_shifts - 1))
1719 lb_recv_down(idir) = ub_recv_down(idir) - rs%desc%border + 1 + ushifts(n_shifts - 1)
1720 ub_recv_down(idir) = min(ub_recv_down(idir), &
1721 ub_recv_down(idir) - rs%desc%border + ushifts(n_shifts))
1723 lb_send_down(idir) = 0
1724 ub_send_down(idir) = -1
1725 lb_recv_down(idir) = 0
1726 ub_recv_down(idir) = -1
1730 IF (.NOT. (halo_swapped(i) .OR. i .EQ. idir))
THEN
1731 lb_send_down(i) = rs%lb_real(i)
1732 ub_send_down(i) = rs%ub_real(i)
1733 lb_recv_down(i) = rs%lb_real(i)
1734 ub_recv_down(i) = rs%ub_real(i)
1739 nn = product(ub_recv_down - lb_recv_down + 1)
1740 ALLOCATE (recv_buf_3d_down(lb_recv_down(1):ub_recv_down(1), &
1741 lb_recv_down(2):ub_recv_down(2), lb_recv_down(3):ub_recv_down(3)))
1744 CALL rs%desc%group%irecv(recv_buf_3d_down, source_down, req(1))
1747 nn = product(ub_send_down - lb_send_down + 1)
1748 ALLOCATE (send_buf_3d_down(lb_send_down(1):ub_send_down(1), &
1749 lb_send_down(2):ub_send_down(2), lb_send_down(3):ub_send_down(3)))
1756 IF (my_id < num_threads)
THEN
1757 lb = lb_send_down(3) + ((ub_send_down(3) - lb_send_down(3) + 1)*my_id)/num_threads
1758 ub = lb_send_down(3) + ((ub_send_down(3) - lb_send_down(3) + 1)*(my_id + 1))/num_threads - 1
1760 send_buf_3d_down(lb_send_down(1):ub_send_down(1), lb_send_down(2):ub_send_down(2), &
1761 lb:ub) = rs%r(lb_send_down(1):ub_send_down(1), &
1762 lb_send_down(2):ub_send_down(2), lb:ub)
1766 CALL rs%desc%group%isend(send_buf_3d_down, dest_down, req(3))
1770 CALL cart_shift(rs, idir, n_shifts, source_up, dest_up)
1772 lb_send_up(:) = rs%lb_local(:)
1773 ub_send_up(:) = rs%ub_local(:)
1774 lb_recv_up(:) = rs%lb_local(:)
1775 ub_recv_up(:) = rs%ub_local(:)
1777 IF (ushifts(n_shifts - 1) .LE. rs%desc%border)
THEN
1778 ub_send_up(idir) = ub_send_up(idir) - rs%desc%border
1779 lb_send_up(idir) = max(lb_send_up(idir) + rs%desc%border, &
1780 ub_send_up(idir) - rs%desc%border + 1 + ushifts(n_shifts - 1))
1782 ub_recv_up(idir) = lb_recv_up(idir) + rs%desc%border - 1 - dshifts(n_shifts - 1)
1783 lb_recv_up(idir) = max(lb_recv_up(idir), &
1784 lb_recv_up(idir) + rs%desc%border - dshifts(n_shifts))
1786 lb_send_up(idir) = 0
1787 ub_send_up(idir) = -1
1788 lb_recv_up(idir) = 0
1789 ub_recv_up(idir) = -1
1793 IF (.NOT. (halo_swapped(i) .OR. i .EQ. idir))
THEN
1794 lb_send_up(i) = rs%lb_real(i)
1795 ub_send_up(i) = rs%ub_real(i)
1796 lb_recv_up(i) = rs%lb_real(i)
1797 ub_recv_up(i) = rs%ub_real(i)
1802 nn = product(ub_recv_up - lb_recv_up + 1)
1803 ALLOCATE (recv_buf_3d_up(lb_recv_up(1):ub_recv_up(1), &
1804 lb_recv_up(2):ub_recv_up(2), lb_recv_up(3):ub_recv_up(3)))
1808 CALL rs%desc%group%irecv(recv_buf_3d_up, source_up, req(2))
1811 nn = product(ub_send_up - lb_send_up + 1)
1812 ALLOCATE (send_buf_3d_up(lb_send_up(1):ub_send_up(1), &
1813 lb_send_up(2):ub_send_up(2), lb_send_up(3):ub_send_up(3)))
1820 IF (my_id < num_threads)
THEN
1821 lb = lb_send_up(3) + ((ub_send_up(3) - lb_send_up(3) + 1)*my_id)/num_threads
1822 ub = lb_send_up(3) + ((ub_send_up(3) - lb_send_up(3) + 1)*(my_id + 1))/num_threads - 1
1824 send_buf_3d_up(lb_send_up(1):ub_send_up(1), lb_send_up(2):ub_send_up(2), &
1825 lb:ub) = rs%r(lb_send_up(1):ub_send_up(1), &
1826 lb_send_up(2):ub_send_up(2), lb:ub)
1830 CALL rs%desc%group%isend(send_buf_3d_up, dest_up, req(4))
1836 CALL mp_waitany(req(1:2), completed)
1838 IF (completed .EQ. 1)
THEN
1841 IF (ub_recv_down(idir) .GE. lb_recv_down(idir))
THEN
1849 IF (my_id < num_threads)
THEN
1850 lb = lb_recv_down(3) + ((ub_recv_down(3) - lb_recv_down(3) + 1)*my_id)/num_threads
1851 ub = lb_recv_down(3) + ((ub_recv_down(3) - lb_recv_down(3) + 1)*(my_id + 1))/num_threads - 1
1853 rs%r(lb_recv_down(1):ub_recv_down(1), lb_recv_down(2):ub_recv_down(2), &
1854 lb:ub) = recv_buf_3d_down(:, :, lb:ub)
1859 DEALLOCATE (recv_buf_3d_down)
1863 IF (ub_recv_up(idir) .GE. lb_recv_up(idir))
THEN
1871 IF (my_id < num_threads)
THEN
1872 lb = lb_recv_up(3) + ((ub_recv_up(3) - lb_recv_up(3) + 1)*my_id)/num_threads
1873 ub = lb_recv_up(3) + ((ub_recv_up(3) - lb_recv_up(3) + 1)*(my_id + 1))/num_threads - 1
1875 rs%r(lb_recv_up(1):ub_recv_up(1), lb_recv_up(2):ub_recv_up(2), &
1876 lb:ub) = recv_buf_3d_up(:, :, lb:ub)
1881 DEALLOCATE (recv_buf_3d_up)
1885 CALL mp_waitall(req(3:4))
1887 DEALLOCATE (send_buf_3d_down)
1888 DEALLOCATE (send_buf_3d_up)
1891 DEALLOCATE (ushifts)
1892 DEALLOCATE (dshifts)
1895 halo_swapped(idir) = .true.
1899 END SUBROUTINE transfer_pw2rs_distributed
1910 TYPE(realspace_grid_type),
INTENT(IN) :: rs
1912 CHARACTER(len=*),
PARAMETER :: routinen =
'rs_grid_zero'
1914 INTEGER :: handle, i, j, k, l(3), u(3)
1916 CALL timeset(routinen, handle)
1917 l(1) = lbound(rs%r, 1); l(2) = lbound(rs%r, 2); l(3) = lbound(rs%r, 3)
1918 u(1) = ubound(rs%r, 1); u(2) = ubound(rs%r, 2); u(3) = ubound(rs%r, 3)
1925 rs%r(i, j, k) = 0.0_dp
1930 CALL timestop(handle)
1946 TYPE(realspace_grid_type),
INTENT(IN) :: rs1, rs2, rs3
1947 REAL(dp),
INTENT(IN) :: scalar
1949 CHARACTER(len=*),
PARAMETER :: routinen =
'rs_grid_mult_and_add'
1951 INTEGER :: handle, i, j, k, l(3), u(3)
1955 CALL timeset(routinen, handle)
1956 IF (scalar .NE. 0.0_dp)
THEN
1957 l(1) = lbound(rs1%r, 1); l(2) = lbound(rs1%r, 2); l(3) = lbound(rs1%r, 3)
1958 u(1) = ubound(rs1%r, 1); u(2) = ubound(rs1%r, 2); u(3) = ubound(rs1%r, 3)
1965 rs1%r(i, j, k) = rs1%r(i, j, k) + scalar*rs2%r(i, j, k)*rs3%r(i, j, k)
1971 CALL timestop(handle)
1985 TYPE(pw_grid_type),
INTENT(IN),
TARGET :: pw_grid
1986 TYPE(realspace_grid_type),
INTENT(IN) :: rs
1988 cpassert(
ASSOCIATED(rs%desc%pw, pw_grid))
1989 rs%desc%dh = pw_grid%dh
1990 rs%desc%dh_inv = pw_grid%dh_inv
2002 TYPE(realspace_grid_desc_type),
INTENT(INOUT) :: rs_desc
2004 cpassert(rs_desc%ref_count > 0)
2005 rs_desc%ref_count = rs_desc%ref_count + 1
2016 TYPE(realspace_grid_type),
INTENT(INOUT) :: rs_grid
2023 IF (
ALLOCATED(rs_grid%px))
DEALLOCATE (rs_grid%px)
2024 IF (
ALLOCATED(rs_grid%py))
DEALLOCATE (rs_grid%py)
2025 IF (
ALLOCATED(rs_grid%pz))
DEALLOCATE (rs_grid%pz)
2036 TYPE(realspace_grid_desc_type),
POINTER :: rs_desc
2038 IF (
ASSOCIATED(rs_desc))
THEN
2039 cpassert(rs_desc%ref_count > 0)
2040 rs_desc%ref_count = rs_desc%ref_count - 1
2041 IF (rs_desc%ref_count == 0)
THEN
2043 CALL pw_grid_release(rs_desc%pw)
2045 IF (rs_desc%parallel)
THEN
2047 CALL rs_desc%group%free()
2049 DEALLOCATE (rs_desc%virtual2real)
2050 DEALLOCATE (rs_desc%real2virtual)
2053 IF (rs_desc%distributed)
THEN
2054 DEALLOCATE (rs_desc%rank2coord)
2055 DEALLOCATE (rs_desc%coord2rank)
2056 DEALLOCATE (rs_desc%lb_global)
2057 DEALLOCATE (rs_desc%ub_global)
2058 DEALLOCATE (rs_desc%x2coord)
2059 DEALLOCATE (rs_desc%y2coord)
2060 DEALLOCATE (rs_desc%z2coord)
2063 DEALLOCATE (rs_desc)
2081 PURE SUBROUTINE cart_shift(rs_grid, dir, disp, source, dest)
2083 TYPE(realspace_grid_type),
INTENT(IN) :: rs_grid
2084 INTEGER,
INTENT(IN) :: dir, disp
2085 INTEGER,
INTENT(OUT) :: source, dest
2087 INTEGER,
DIMENSION(3) :: shift_coords
2089 shift_coords = rs_grid%desc%virtual_group_coor
2090 shift_coords(dir) =
modulo(shift_coords(dir) + disp, rs_grid%desc%group_dim(dir))
2091 dest = rs_grid%desc%virtual2real(rs_grid%desc%coord2rank(shift_coords(1), shift_coords(2), shift_coords(3)))
2092 shift_coords = rs_grid%desc%virtual_group_coor
2093 shift_coords(dir) =
modulo(shift_coords(dir) - disp, rs_grid%desc%group_dim(dir))
2094 source = rs_grid%desc%virtual2real(rs_grid%desc%coord2rank(shift_coords(1), shift_coords(2), shift_coords(3)))
2107 TYPE(realspace_grid_desc_type),
INTENT(IN) :: desc
2108 INTEGER :: max_ngpts
2110 CHARACTER(len=*),
PARAMETER :: routinen =
'rs_grid_max_ngpts'
2112 INTEGER :: handle, i
2113 INTEGER,
DIMENSION(3) :: lb, ub
2115 CALL timeset(routinen, handle)
2118 IF ((desc%pw%para%mode == pw_mode_local) .OR. &
2119 (all(desc%group_dim == 1)))
THEN
2120 cpassert(product(int(desc%npts, kind=int_8)) < huge(1))
2121 max_ngpts = product(desc%npts)
2123 DO i = 0, desc%group_size - 1
2124 lb = desc%lb_global(:, i)
2125 ub = desc%ub_global(:, i)
2126 lb = lb - desc%border*(1 - desc%perd)
2127 ub = ub + desc%border*(1 - desc%perd)
2128 cpassert(product(int(ub - lb + 1, kind=int_8)) < huge(1))
2129 max_ngpts = max(max_ngpts, product(ub - lb + 1))
2133 CALL timestop(handle)
2148 TYPE(realspace_grid_type),
INTENT(IN) :: rs_grid
2149 REAL(kind=dp),
DIMENSION(3, 3),
INTENT(IN) :: h_inv
2150 REAL(kind=dp),
DIMENSION(3),
INTENT(IN) :: ra
2151 INTEGER,
INTENT(IN),
OPTIONAL :: offset, group_size, my_pos
2153 INTEGER :: dir, lb(3), location(3), tp(3), ub(3)
2157 IF (.NOT. all(rs_grid%desc%perd == 1))
THEN
2160 tp(dir) = floor(dot_product(h_inv(dir, :), ra)*rs_grid%desc%npts(dir))
2161 tp(dir) =
modulo(tp(dir), rs_grid%desc%npts(dir))
2162 IF (rs_grid%desc%perd(dir) .NE. 1)
THEN
2163 lb(dir) = rs_grid%lb_local(dir) + rs_grid%desc%border
2164 ub(dir) = rs_grid%ub_local(dir) - rs_grid%desc%border
2166 lb(dir) = rs_grid%lb_local(dir)
2167 ub(dir) = rs_grid%ub_local(dir)
2170 location(dir) = tp(dir) + rs_grid%desc%lb(dir)
2172 IF (all(lb(:) <= location(:)) .AND. all(location(:) <= ub(:)))
THEN
2176 IF (
PRESENT(offset) .AND.
PRESENT(group_size) .AND.
PRESENT(my_pos))
THEN
2178 IF (
modulo(offset, group_size) == my_pos) res = .true.
real(kind=dp) function det_3x3(a)
...
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
various routines to log and control the output. The idea is that decisions about where to log should ...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Collection of simple mathematical functions and subroutines.
Interface to the message passing library MPI.
type(mp_comm_type), parameter, public mp_comm_null
subroutine, public mp_waitany(requests, completed)
waits for completion of any of the given requests
type(mp_request_type), parameter, public mp_request_null
Fortran API for the offload package, which is written in C.
subroutine, public offload_free_buffer(buffer)
Deallocates given buffer.
subroutine, public offload_create_buffer(length, buffer)
Allocates a buffer of given length, ie. number of elements.
integer, parameter, public pw_mode_local
This module defines the grid data type and some basic operations on it.
subroutine, public pw_grid_release(pw_grid)
releases the given pw grid
subroutine, public pw_grid_retain(pw_grid)
retains the given pw grid
subroutine, public rs_grid_print(rs, iounit)
Print information on grids to output.
integer, parameter, public rsgrid_replicated
subroutine, public rs_grid_create(rs, desc)
...
subroutine, public rs_grid_create_descriptor(desc, pw_grid, input_settings, border_points)
Determine the setup of real space grids - this is divided up into the creation of a descriptor and th...
pure integer function, public rs_grid_locate_rank(rs_desc, rank_in, shift)
returns the 1D rank of the task which is a cartesian shift away from 1D rank rank_in only possible if...
subroutine, public transfer_pw2rs(rs, pw)
...
integer, parameter, public rsgrid_automatic
pure subroutine, public rs_grid_reorder_ranks(desc, real2virtual)
Defines a new ordering of ranks on this realspace grid, recalculating the data bounds and reallocatin...
subroutine, public rs_grid_mult_and_add(rs1, rs2, rs3, scalar)
rs1(i) = rs1(i) + rs2(i)*rs3(i)
subroutine, public rs_grid_set_box(pw_grid, rs)
Set box matrix info for real space grid This is needed for variable cell simulations.
subroutine, public rs_grid_retain_descriptor(rs_desc)
retains the given rs grid descriptor (see doc/ReferenceCounting.html)
subroutine, public rs_grid_release_descriptor(rs_desc)
releases the given rs grid descriptor (see doc/ReferenceCounting.html)
integer function, public rs_grid_max_ngpts(desc)
returns the maximum number of points in the local grid of any process to account for the case where t...
integer, parameter, public rsgrid_distributed
pure logical function, public map_gaussian_here(rs_grid, h_inv, ra, offset, group_size, my_pos)
...
subroutine, public transfer_rs2pw(rs, pw)
...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
subroutine, public rs_grid_zero(rs)
Initialize grid to zero.
All kind of helpful little routines.
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
void offload_free_buffer(offload_buffer *buffer)
Deallocate given buffer.