28 USE cp_dbcsr_api,
ONLY: dbcsr_convert_sizes_to_offsets, &
72#include "./base/base_uses.f90"
77 LOGICAL,
PRIVATE,
PARAMETER :: debug_this_module = .false.
81 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'task_list_methods'
117 reorder_rs_grid_ranks, skip_load_balance_distributed, &
118 pw_env_external, sab_orb_external, ext_kpoints)
122 CHARACTER(LEN=*),
INTENT(IN) :: basis_type
123 LOGICAL,
INTENT(IN) :: reorder_rs_grid_ranks, &
124 skip_load_balance_distributed
125 TYPE(
pw_env_type),
OPTIONAL,
POINTER :: pw_env_external
127 OPTIONAL,
POINTER :: sab_orb_external
128 TYPE(
kpoint_type),
OPTIONAL,
POINTER :: ext_kpoints
130 CHARACTER(LEN=*),
PARAMETER :: routinen =
'generate_qs_task_list'
131 INTEGER,
PARAMETER :: max_tasks = 2000
133 INTEGER :: cindex, curr_tasks, handle, i, iatom, iatom_old, igrid_level, igrid_level_old, &
134 ikind, ilevel, img, img_old, ipair, ipgf, iset, itask, jatom, jatom_old, jkind, jpgf, &
135 jset, maxpgf, maxset, natoms, nimages, nkind, nseta, nsetb, slot
136 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: blocks
137 INTEGER,
DIMENSION(3) :: cellind
138 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, npgfa, &
140 INTEGER,
DIMENSION(:, :, :),
POINTER :: cell_to_index
142 REAL(kind=
dp) :: kind_radius_a, kind_radius_b
143 REAL(kind=
dp),
DIMENSION(3) :: ra, rab
144 REAL(kind=
dp),
DIMENSION(:),
POINTER :: set_radius_a, set_radius_b
145 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: rpgfa, rpgfb, zeta, zetb
157 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
162 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
164 CALL timeset(routinen, handle)
167 qs_kind_set=qs_kind_set, &
169 particle_set=particle_set, &
170 dft_control=dft_control)
173 IF (
PRESENT(sab_orb_external)) sab_orb => sab_orb_external
176 IF (
PRESENT(pw_env_external)) pw_env => pw_env_external
177 CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_grids)
180 gridlevel_info => pw_env%gridlevel_info
181 cube_info => pw_env%cube_info
184 nkind =
SIZE(qs_kind_set)
185 natoms =
SIZE(particle_set)
189 qs_kind => qs_kind_set(ikind)
191 basis_set=orb_basis_set, basis_type=basis_type)
193 IF (.NOT.
ASSOCIATED(orb_basis_set)) cycle
196 maxset = max(nseta, maxset)
197 maxpgf = max(maxval(npgfa), maxpgf)
201 nimages = dft_control%nimages
203 NULLIFY (cell_to_index)
205 IF (
PRESENT(ext_kpoints))
THEN
206 IF (
ASSOCIATED(ext_kpoints))
THEN
209 nimages =
SIZE(ext_kpoints%index_to_cell, 2)
212 IF (.NOT. dokp .AND. nimages > 1)
THEN
214 CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
219 IF (
ASSOCIATED(task_list%atom_pair_send))
DEALLOCATE (task_list%atom_pair_send)
220 IF (
ASSOCIATED(task_list%atom_pair_recv))
DEALLOCATE (task_list%atom_pair_recv)
223 IF (.NOT.
ASSOCIATED(task_list%tasks))
THEN
227 curr_tasks =
SIZE(task_list%tasks)
229 ALLOCATE (basis_set_list(nkind))
231 qs_kind => qs_kind_set(ikind)
232 CALL get_qs_kind(qs_kind=qs_kind, basis_set=basis_set_a, &
233 basis_type=basis_type)
234 IF (
ASSOCIATED(basis_set_a))
THEN
235 basis_set_list(ikind)%gto_basis_set => basis_set_a
237 NULLIFY (basis_set_list(ikind)%gto_basis_set)
249 DO slot = 1, sab_orb(1)%nl_size
250 ikind = sab_orb(1)%nlist_task(slot)%ikind
251 jkind = sab_orb(1)%nlist_task(slot)%jkind
252 iatom = sab_orb(1)%nlist_task(slot)%iatom
253 jatom = sab_orb(1)%nlist_task(slot)%jatom
254 rab(1:3) = sab_orb(1)%nlist_task(slot)%r(1:3)
255 cellind(1:3) = sab_orb(1)%nlist_task(slot)%cell(1:3)
257 basis_set_a => basis_set_list(ikind)%gto_basis_set
258 IF (.NOT.
ASSOCIATED(basis_set_a)) cycle
259 basis_set_b => basis_set_list(jkind)%gto_basis_set
260 IF (.NOT.
ASSOCIATED(basis_set_b)) cycle
261 ra(:) =
pbc(particle_set(iatom)%r, cell)
263 la_max => basis_set_a%lmax
264 la_min => basis_set_a%lmin
265 npgfa => basis_set_a%npgf
266 nseta = basis_set_a%nset
267 rpgfa => basis_set_a%pgf_radius
268 set_radius_a => basis_set_a%set_radius
269 kind_radius_a = basis_set_a%kind_radius
270 zeta => basis_set_a%zet
272 lb_max => basis_set_b%lmax
273 lb_min => basis_set_b%lmin
274 npgfb => basis_set_b%npgf
275 nsetb = basis_set_b%nset
276 rpgfb => basis_set_b%pgf_radius
277 set_radius_b => basis_set_b%set_radius
278 kind_radius_b = basis_set_b%kind_radius
279 zetb => basis_set_b%zet
282 cindex = cell_to_index(cellind(1), cellind(2), cellind(3))
288 rs_descs, dft_control, cube_info, gridlevel_info, cindex, &
289 iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, &
290 set_radius_a, set_radius_b, ra, rab, &
291 la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
298 rs_descs=rs_descs, ntasks=task_list%ntasks, natoms=natoms, &
299 tasks=task_list%tasks, atom_pair_send=task_list%atom_pair_send, &
300 atom_pair_recv=task_list%atom_pair_recv, symmetric=.true., &
301 reorder_rs_grid_ranks=reorder_rs_grid_ranks, &
302 skip_load_balance_distributed=skip_load_balance_distributed)
305 ALLOCATE (nsgf(natoms))
306 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_list, nsgf=nsgf)
307 IF (
ASSOCIATED(task_list%atom_pair_send))
THEN
309 CALL rs_calc_offsets(pairs=task_list%atom_pair_send, &
311 group_size=rs_descs(1)%rs_desc%group_size, &
312 pair_offsets=task_list%pair_offsets_send, &
313 rank_offsets=task_list%rank_offsets_send, &
314 rank_sizes=task_list%rank_sizes_send, &
315 buffer_size=task_list%buffer_size_send)
317 CALL rs_calc_offsets(pairs=task_list%atom_pair_recv, &
319 group_size=rs_descs(1)%rs_desc%group_size, &
320 pair_offsets=task_list%pair_offsets_recv, &
321 rank_offsets=task_list%rank_offsets_recv, &
322 rank_sizes=task_list%rank_sizes_recv, &
323 buffer_size=task_list%buffer_size_recv)
324 DEALLOCATE (basis_set_list, nsgf)
327 IF (reorder_rs_grid_ranks)
THEN
328 DO i = 1, gridlevel_info%ngrid_levels
329 IF (rs_descs(i)%rs_desc%distributed)
THEN
336 CALL create_grid_task_list(task_list=task_list, &
337 qs_kind_set=qs_kind_set, &
338 particle_set=particle_set, &
340 basis_type=basis_type, &
346 IF (
ASSOCIATED(task_list%taskstart))
THEN
347 DEALLOCATE (task_list%taskstart)
349 IF (
ASSOCIATED(task_list%taskstop))
THEN
350 DEALLOCATE (task_list%taskstop)
352 IF (
ASSOCIATED(task_list%npairs))
THEN
353 DEALLOCATE (task_list%npairs)
358 ALLOCATE (task_list%npairs(
SIZE(rs_descs)))
360 iatom_old = -1; jatom_old = -1; igrid_level_old = -1; img_old = -1
364 DO i = 1, task_list%ntasks
365 igrid_level = task_list%tasks(i)%grid_level
366 img = task_list%tasks(i)%image
367 iatom = task_list%tasks(i)%iatom
368 jatom = task_list%tasks(i)%jatom
369 iset = task_list%tasks(i)%iset
370 jset = task_list%tasks(i)%jset
371 ipgf = task_list%tasks(i)%ipgf
372 jpgf = task_list%tasks(i)%jpgf
373 IF (igrid_level /= igrid_level_old)
THEN
374 IF (igrid_level_old /= -1)
THEN
375 task_list%npairs(igrid_level_old) = ipair
378 igrid_level_old = igrid_level
382 ELSE IF (iatom /= iatom_old .OR. jatom /= jatom_old .OR. img /= img_old)
THEN
390 IF (task_list%ntasks /= 0)
THEN
391 task_list%npairs(igrid_level) = ipair
398 ALLOCATE (task_list%taskstart(maxval(task_list%npairs),
SIZE(rs_descs)))
399 ALLOCATE (task_list%taskstop(maxval(task_list%npairs),
SIZE(rs_descs)))
401 iatom_old = -1; jatom_old = -1; igrid_level_old = -1; img_old = -1
403 task_list%taskstart = 0
404 task_list%taskstop = 0
406 DO i = 1, task_list%ntasks
407 igrid_level = task_list%tasks(i)%grid_level
408 img = task_list%tasks(i)%image
409 iatom = task_list%tasks(i)%iatom
410 jatom = task_list%tasks(i)%jatom
411 iset = task_list%tasks(i)%iset
412 jset = task_list%tasks(i)%jset
413 ipgf = task_list%tasks(i)%ipgf
414 jpgf = task_list%tasks(i)%jpgf
415 IF (igrid_level /= igrid_level_old)
THEN
416 IF (igrid_level_old /= -1)
THEN
417 task_list%taskstop(ipair, igrid_level_old) = i - 1
420 task_list%taskstart(ipair, igrid_level) = i
421 igrid_level_old = igrid_level
425 ELSE IF (iatom /= iatom_old .OR. jatom /= jatom_old .OR. img /= img_old)
THEN
427 task_list%taskstart(ipair, igrid_level) = i
428 task_list%taskstop(ipair - 1, igrid_level) = i - 1
435 IF (task_list%ntasks /= 0)
THEN
436 task_list%taskstop(ipair, igrid_level) = task_list%ntasks
440 IF (debug_this_module)
THEN
441 tasks => task_list%tasks
443 WRITE (6, *)
"Total number of tasks ", task_list%ntasks
444 DO igrid_level = 1, gridlevel_info%ngrid_levels
445 WRITE (6, *)
"Total number of pairs(grid_level) ", &
446 igrid_level, task_list%npairs(igrid_level)
450 DO igrid_level = 1, gridlevel_info%ngrid_levels
452 ALLOCATE (blocks(natoms, natoms, nimages))
454 DO ipair = 1, task_list%npairs(igrid_level)
455 itask = task_list%taskstart(ipair, igrid_level)
456 ilevel = task_list%tasks(itask)%grid_level
457 img = task_list%tasks(itask)%image
458 iatom = task_list%tasks(itask)%iatom
459 jatom = task_list%tasks(itask)%jatom
460 iset = task_list%tasks(itask)%iset
461 jset = task_list%tasks(itask)%jset
462 ipgf = task_list%tasks(itask)%ipgf
463 jpgf = task_list%tasks(itask)%jpgf
464 IF (blocks(iatom, jatom, img) == -1 .AND. blocks(jatom, iatom, img) == -1)
THEN
465 blocks(iatom, jatom, img) = 1
466 blocks(jatom, iatom, img) = 1
468 WRITE (6, *)
"TASK LIST CONFLICT IN PAIR ", ipair
469 WRITE (6, *)
"Reuse of iatom, jatom, image ", iatom, jatom, img
475 DO itask = task_list%taskstart(ipair, igrid_level), task_list%taskstop(ipair, igrid_level)
476 ilevel = task_list%tasks(itask)%grid_level
477 img = task_list%tasks(itask)%image
478 iatom = task_list%tasks(itask)%iatom
479 jatom = task_list%tasks(itask)%jatom
480 iset = task_list%tasks(itask)%iset
481 jset = task_list%tasks(itask)%jset
482 ipgf = task_list%tasks(itask)%ipgf
483 jpgf = task_list%tasks(itask)%jpgf
484 IF (iatom /= iatom_old .OR. jatom /= jatom_old .OR. img /= img_old)
THEN
485 WRITE (6, *)
"TASK LIST CONFLICT IN TASK ", itask
486 WRITE (6, *)
"Inconsistent iatom, jatom, image ", iatom, jatom, img
487 WRITE (6, *)
"Should be iatom, jatom, image ", iatom_old, jatom_old, img_old
498 CALL timestop(handle)
506 SUBROUTINE create_grid_task_list(task_list, qs_kind_set, particle_set, cell, basis_type, rs_grids)
508 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
511 CHARACTER(LEN=*) :: basis_type
515 INTEGER :: nset, natoms, nkinds, ntasks, &
516 ikind, iatom, itask, nsgf
517 INTEGER,
DIMENSION(:),
ALLOCATABLE :: atom_kinds, level_list, iatom_list, jatom_list, &
518 iset_list, jset_list, ipgf_list, jpgf_list, &
519 border_mask_list, block_num_list
520 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: radius_list
521 REAL(kind=
dp),
DIMENSION(:, :),
ALLOCATABLE :: rab_list, atom_positions
522 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
523 INTEGER,
DIMENSION(:, :),
POINTER :: first_sgf
524 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: sphi, zet
525 INTEGER,
DIMENSION(:),
POINTER :: lmax, lmin, npgf, nsgf_set
527 nkinds =
SIZE(qs_kind_set)
528 natoms =
SIZE(particle_set)
529 ntasks = task_list%ntasks
530 tasks => task_list%tasks
532 IF (.NOT.
ASSOCIATED(task_list%grid_basis_sets))
THEN
534 ALLOCATE (task_list%grid_basis_sets(nkinds))
536 CALL get_qs_kind(qs_kind_set(ikind), basis_type=basis_type, basis_set=orb_basis_set)
542 first_sgf=first_sgf, &
549 maxco=
SIZE(sphi, 1), &
550 maxpgf=
SIZE(zet, 1), &
555 first_sgf=first_sgf, &
558 basis_set=task_list%grid_basis_sets(ikind))
563 ALLOCATE (atom_kinds(natoms), atom_positions(3, natoms))
565 atom_kinds(iatom) = particle_set(iatom)%atomic_kind%kind_number
566 atom_positions(:, iatom) =
pbc(particle_set(iatom)%r, cell)
569 ALLOCATE (level_list(ntasks), iatom_list(ntasks), jatom_list(ntasks))
570 ALLOCATE (iset_list(ntasks), jset_list(ntasks), ipgf_list(ntasks), jpgf_list(ntasks))
571 ALLOCATE (border_mask_list(ntasks), block_num_list(ntasks))
572 ALLOCATE (radius_list(ntasks), rab_list(3, ntasks))
575 level_list(itask) = tasks(itask)%grid_level
576 iatom_list(itask) = tasks(itask)%iatom
577 jatom_list(itask) = tasks(itask)%jatom
578 iset_list(itask) = tasks(itask)%iset
579 jset_list(itask) = tasks(itask)%jset
580 ipgf_list(itask) = tasks(itask)%ipgf
581 jpgf_list(itask) = tasks(itask)%jpgf
582 IF (tasks(itask)%dist_type == 2)
THEN
583 border_mask_list(itask) = iand(63, not(tasks(itask)%subpatch_pattern))
585 border_mask_list(itask) = 0
587 block_num_list(itask) = tasks(itask)%pair_index
588 radius_list(itask) = tasks(itask)%radius
589 rab_list(:, itask) = tasks(itask)%rab(:)
595 nblocks=
SIZE(task_list%pair_offsets_recv), &
596 block_offsets=task_list%pair_offsets_recv, &
597 atom_positions=atom_positions, &
598 atom_kinds=atom_kinds, &
599 basis_sets=task_list%grid_basis_sets, &
600 level_list=level_list, &
601 iatom_list=iatom_list, &
602 jatom_list=jatom_list, &
603 iset_list=iset_list, &
604 jset_list=jset_list, &
605 ipgf_list=ipgf_list, &
606 jpgf_list=jpgf_list, &
607 border_mask_list=border_mask_list, &
608 block_num_list=block_num_list, &
609 radius_list=radius_list, &
612 task_list=task_list%grid_task_list)
617 END SUBROUTINE create_grid_task_list
652 cube_info, gridlevel_info, cindex, &
653 iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, set_radius_a, set_radius_b, ra, rab, &
654 la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
656 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
657 INTEGER :: ntasks, curr_tasks
663 INTEGER :: cindex, iatom, jatom
664 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: rpgfa, rpgfb, zeta, zetb
665 REAL(kind=
dp) :: kind_radius_b
666 REAL(kind=
dp),
DIMENSION(:),
POINTER :: set_radius_a, set_radius_b
667 REAL(kind=
dp),
DIMENSION(3) :: ra, rab
668 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, npgfa, &
670 INTEGER :: nseta, nsetb
672 INTEGER :: cube_center(3), igrid_level, ipgf, iset, &
673 jpgf, jset, lb_cube(3), ub_cube(3)
674 REAL(kind=
dp) :: dab, rab2, radius, zetp
676 rab2 = rab(1)*rab(1) + rab(2)*rab(2) + rab(3)*rab(3)
679 loop_iset:
DO iset = 1, nseta
681 IF (set_radius_a(iset) + kind_radius_b < dab) cycle loop_iset
683 loop_jset:
DO jset = 1, nsetb
685 IF (set_radius_a(iset) + set_radius_b(jset) < dab) cycle loop_jset
687 loop_ipgf:
DO ipgf = 1, npgfa(iset)
689 IF (rpgfa(ipgf, iset) + set_radius_b(jset) < dab) cycle loop_ipgf
691 loop_jpgf:
DO jpgf = 1, npgfb(jset)
693 IF (rpgfa(ipgf, iset) + rpgfb(jpgf, jset) < dab) cycle loop_jpgf
695 zetp = zeta(ipgf, iset) + zetb(jpgf, jset)
698 CALL compute_pgf_properties(cube_center, lb_cube, ub_cube, radius, &
699 rs_descs(igrid_level)%rs_desc, cube_info(igrid_level), &
700 la_max(iset), zeta(ipgf, iset), la_min(iset), &
701 lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
702 ra, rab, rab2, dft_control%qs_control%eps_rho_rspace)
704 CALL pgf_to_tasks(tasks, ntasks, curr_tasks, &
705 rab, cindex, iatom, jatom, iset, jset, ipgf, jpgf, &
706 la_max(iset), lb_max(jset), rs_descs(igrid_level)%rs_desc, &
707 igrid_level, gridlevel_info%ngrid_levels, cube_center, &
708 lb_cube, ub_cube, radius)
754 SUBROUTINE compute_pgf_properties(cube_center, lb_cube, ub_cube, radius, &
755 rs_desc, cube_info, la_max, zeta, la_min, lb_max, zetb, lb_min, ra, rab, rab2, eps)
757 INTEGER,
DIMENSION(3),
INTENT(OUT) :: cube_center, lb_cube, ub_cube
758 REAL(kind=
dp),
INTENT(OUT) :: radius
761 INTEGER,
INTENT(IN) :: la_max
762 REAL(kind=
dp),
INTENT(IN) :: zeta
763 INTEGER,
INTENT(IN) :: la_min, lb_max
764 REAL(kind=
dp),
INTENT(IN) :: zetb
765 INTEGER,
INTENT(IN) :: lb_min
766 REAL(kind=
dp),
INTENT(IN) :: ra(3), rab(3), rab2, eps
769 INTEGER,
DIMENSION(:),
POINTER :: sphere_bounds
770 REAL(kind=
dp) :: cutoff, f, prefactor, rb(3), zetp
771 REAL(kind=
dp),
DIMENSION(3) :: rp
776 rp(:) = ra(:) + zetb/zetp*rab(:)
777 rb(:) = ra(:) + rab(:)
780 prefactor = exp(-zeta*f*rab2)
782 zetp=zetp, eps=eps, prefactor=prefactor, cutoff=cutoff)
786 cube_center(:) =
modulo(cube_center(:), rs_desc%npts(:))
787 cube_center(:) = cube_center(:) + rs_desc%lb(:)
789 IF (rs_desc%orthorhombic)
THEN
790 CALL return_cube(cube_info, radius, lb_cube, ub_cube, sphere_bounds)
794 extent(:) = ub_cube(:) - lb_cube(:)
795 lb_cube(:) = -extent(:)/2 - 1
796 ub_cube(:) = extent(:)/2
799 END SUBROUTINE compute_pgf_properties
815 INTEGER FUNCTION cost_model(lb_cube, ub_cube, fraction, lmax, is_ortho)
816 INTEGER,
DIMENSION(3),
INTENT(IN) :: lb_cube, ub_cube
817 REAL(kind=
dp),
INTENT(IN) :: fraction
822 REAL(kind=
dp) :: v1, v2, v3, v4, v5
824 cmax = maxval(((ub_cube - lb_cube) + 1)/2)
839 cost_model = ceiling(((lmax + v1)*(cmax + v2)**3*v3*fraction + v4 + v5*lmax**7)/1000.0_dp)
841 END FUNCTION cost_model
875 SUBROUTINE pgf_to_tasks(tasks, ntasks, curr_tasks, &
876 rab, cindex, iatom, jatom, iset, jset, ipgf, jpgf, &
877 la_max, lb_max, rs_desc, igrid_level, n_levels, &
878 cube_center, lb_cube, ub_cube, radius)
880 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
881 INTEGER,
INTENT(INOUT) :: ntasks, curr_tasks
882 REAL(kind=
dp),
DIMENSION(3),
INTENT(IN) :: rab
883 INTEGER,
INTENT(IN) :: cindex, iatom, jatom, iset, jset, ipgf, &
886 INTEGER,
INTENT(IN) :: igrid_level, n_levels
887 INTEGER,
DIMENSION(3),
INTENT(IN) :: cube_center, lb_cube, ub_cube
888 REAL(kind=
dp),
INTENT(IN) :: radius
890 INTEGER,
PARAMETER :: add_tasks = 1000
891 REAL(kind=
dp),
PARAMETER :: mult_tasks = 2.0_dp
893 INTEGER :: added_tasks, cost, j, lmax
895 REAL(kind=
dp) :: tfraction
899 IF (ntasks > curr_tasks)
THEN
900 curr_tasks = int((curr_tasks + add_tasks)*mult_tasks)
905 IF (rs_desc%distributed)
THEN
909 CALL rs_find_node(rs_desc, igrid_level, n_levels, cube_center, &
910 ntasks=ntasks, tasks=tasks, lb_cube=lb_cube, ub_cube=ub_cube, added_tasks=added_tasks)
913 tasks(ntasks)%destination = encode_rank(rs_desc%my_pos, igrid_level, n_levels)
914 tasks(ntasks)%dist_type = 0
915 tasks(ntasks)%subpatch_pattern = 0
919 lmax = la_max + lb_max
920 is_ortho = (tasks(ntasks)%dist_type == 0 .OR. tasks(ntasks)%dist_type == 1) .AND. rs_desc%orthorhombic
923 tfraction = 1.0_dp/added_tasks
925 cost = cost_model(lb_cube, ub_cube, tfraction, lmax, is_ortho)
927 DO j = 1, added_tasks
928 tasks(ntasks - added_tasks + j)%source = encode_rank(rs_desc%my_pos, igrid_level, n_levels)
929 tasks(ntasks - added_tasks + j)%cost = cost
930 tasks(ntasks - added_tasks + j)%grid_level = igrid_level
931 tasks(ntasks - added_tasks + j)%image = cindex
932 tasks(ntasks - added_tasks + j)%iatom = iatom
933 tasks(ntasks - added_tasks + j)%jatom = jatom
934 tasks(ntasks - added_tasks + j)%iset = iset
935 tasks(ntasks - added_tasks + j)%jset = jset
936 tasks(ntasks - added_tasks + j)%ipgf = ipgf
937 tasks(ntasks - added_tasks + j)%jpgf = jpgf
938 tasks(ntasks - added_tasks + j)%rab = rab
939 tasks(ntasks - added_tasks + j)%radius = radius
942 END SUBROUTINE pgf_to_tasks
954 SUBROUTINE load_balance_distributed(tasks, ntasks, rs_descs, grid_level, natoms)
956 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
960 INTEGER :: grid_level, natoms
962 CHARACTER(LEN=*),
PARAMETER :: routinen =
'load_balance_distributed'
965 INTEGER,
DIMENSION(:, :, :),
POINTER ::
list
967 CALL timeset(routinen, handle)
972 CALL create_destination_list(
list, rs_descs, grid_level)
975 CALL compute_load_list(
list, rs_descs, grid_level, tasks, ntasks, natoms, create_list=.true.)
978 CALL optimize_load_list(
list, rs_descs(1)%rs_desc%group, rs_descs(1)%rs_desc%my_pos)
981 CALL compute_load_list(
list, rs_descs, grid_level, tasks, ntasks, natoms, create_list=.false.)
985 CALL timestop(handle)
987 END SUBROUTINE load_balance_distributed
996 SUBROUTINE balance_global_list(list_global)
997 INTEGER,
DIMENSION(:, :, 0:) :: list_global
999 CHARACTER(LEN=*),
PARAMETER :: routinen =
'balance_global_list'
1000 INTEGER,
PARAMETER :: max_iter = 100
1001 REAL(kind=
dp),
PARAMETER :: tolerance_factor = 0.005_dp
1003 INTEGER :: dest, handle, icpu, idest, iflux, &
1004 ilocal, k, maxdest, ncpu, nflux
1005 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: flux_connections
1006 LOGICAL :: solution_optimal
1007 REAL(kind=
dp) :: average, load_shift, max_load_shift, &
1009 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: load, optimized_flux, optimized_load
1010 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: flux_limits
1012 CALL timeset(routinen, handle)
1014 ncpu =
SIZE(list_global, 3)
1015 maxdest =
SIZE(list_global, 2)
1016 ALLOCATE (load(0:ncpu - 1))
1018 ALLOCATE (optimized_load(0:ncpu - 1))
1023 DO icpu = 0, ncpu - 1
1024 DO idest = 1, maxdest
1025 dest = list_global(1, idest, icpu)
1026 IF (dest < ncpu .AND. dest > icpu) nflux = nflux + 1
1029 ALLOCATE (optimized_flux(nflux))
1030 ALLOCATE (flux_limits(2, nflux))
1031 ALLOCATE (flux_connections(2, nflux))
1036 DO icpu = 0, ncpu - 1
1037 load(icpu) = sum(list_global(2, :, icpu))
1038 DO idest = 1, maxdest
1039 dest = list_global(1, idest, icpu)
1040 IF (dest < ncpu)
THEN
1041 IF (dest /= icpu)
THEN
1042 IF (dest > icpu)
THEN
1044 flux_limits(2, nflux) = list_global(2, idest, icpu)
1045 flux_connections(1, nflux) = icpu
1046 flux_connections(2, nflux) = dest
1049 IF (flux_connections(1, iflux) == dest .AND. flux_connections(2, iflux) == icpu)
THEN
1050 flux_limits(1, iflux) = -list_global(2, idest, icpu)
1060 solution_optimal = .false.
1061 optimized_flux = 0.0_dp
1068 average = sum(load)/
SIZE(load)
1069 tolerance = tolerance_factor*average
1071 optimized_load(:) = load
1073 max_load_shift = 0.0_dp
1075 load_shift = (optimized_load(flux_connections(1, iflux)) - optimized_load(flux_connections(2, iflux)))/2
1076 load_shift = max(flux_limits(1, iflux) - optimized_flux(iflux), load_shift)
1077 load_shift = min(flux_limits(2, iflux) - optimized_flux(iflux), load_shift)
1078 max_load_shift = max(abs(load_shift), max_load_shift)
1079 optimized_load(flux_connections(1, iflux)) = optimized_load(flux_connections(1, iflux)) - load_shift
1080 optimized_load(flux_connections(2, iflux)) = optimized_load(flux_connections(2, iflux)) + load_shift
1081 optimized_flux(iflux) = optimized_flux(iflux) + load_shift
1083 IF (max_load_shift < tolerance)
THEN
1084 solution_optimal = .true.
1092 DO icpu = 0, ncpu - 1
1093 DO idest = 1, maxdest
1094 IF (list_global(1, idest, icpu) == icpu) ilocal = idest
1096 DO idest = 1, maxdest
1097 dest = list_global(1, idest, icpu)
1098 IF (dest < ncpu)
THEN
1099 IF (dest /= icpu)
THEN
1100 IF (dest > icpu)
THEN
1102 IF (optimized_flux(nflux) > 0)
THEN
1103 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1104 list_global(2, idest, icpu) - nint(optimized_flux(nflux))
1105 list_global(2, idest, icpu) = nint(optimized_flux(nflux))
1107 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1108 list_global(2, idest, icpu)
1109 list_global(2, idest, icpu) = 0
1113 IF (flux_connections(1, iflux) == dest .AND. flux_connections(2, iflux) == icpu)
THEN
1114 IF (optimized_flux(iflux) > 0)
THEN
1115 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1116 list_global(2, idest, icpu)
1117 list_global(2, idest, icpu) = 0
1119 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1120 list_global(2, idest, icpu) + nint(optimized_flux(iflux))
1121 list_global(2, idest, icpu) = -nint(optimized_flux(iflux))
1132 CALL timestop(handle)
1134 END SUBROUTINE balance_global_list
1147 SUBROUTINE optimize_load_list(list, group, my_pos)
1148 INTEGER,
DIMENSION(:, :, 0:) ::
list
1150 INTEGER,
INTENT(IN) :: my_pos
1152 CHARACTER(LEN=*),
PARAMETER :: routinen =
'optimize_load_list'
1153 INTEGER,
PARAMETER :: rank_of_root = 0
1155 INTEGER :: handle, icpu, idest, maxdest, ncpu
1156 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: load_all
1157 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: load_partial
1158 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: list_global
1160 CALL timeset(routinen, handle)
1162 ncpu =
SIZE(
list, 3)
1163 maxdest =
SIZE(
list, 2)
1166 ALLOCATE (load_all(maxdest*ncpu))
1167 load_all(:) = reshape(
list(2, :, :), [maxdest*ncpu])
1168 CALL group%sum(load_all(:), rank_of_root)
1171 ALLOCATE (list_global(2, maxdest, ncpu))
1172 IF (rank_of_root == my_pos)
THEN
1173 list_global(1, :, :) =
list(1, :, :)
1174 list_global(2, :, :) = reshape(load_all, [maxdest, ncpu])
1175 CALL balance_global_list(list_global)
1177 CALL group%bcast(list_global, rank_of_root)
1180 ALLOCATE (load_partial(maxdest, ncpu))
1182 CALL group%sum_partial(reshape(load_all, [maxdest, ncpu]), load_partial(:, :))
1185 DO idest = 1, maxdest
1188 IF (load_partial(idest, icpu) > list_global(2, idest, icpu))
THEN
1189 IF (load_partial(idest, icpu) -
list(2, idest, icpu - 1) < list_global(2, idest, icpu))
THEN
1190 list(2, idest, icpu - 1) = list_global(2, idest, icpu) &
1191 - (load_partial(idest, icpu) -
list(2, idest, icpu - 1))
1193 list(2, idest, icpu - 1) = 0
1201 DEALLOCATE (load_all)
1202 DEALLOCATE (list_global)
1203 DEALLOCATE (load_partial)
1205 CALL timestop(handle)
1206 END SUBROUTINE optimize_load_list
1225 SUBROUTINE compute_load_list(list, rs_descs, grid_level, tasks, ntasks, natoms, create_list)
1226 INTEGER,
DIMENSION(:, :, 0:) ::
list
1229 INTEGER :: grid_level
1230 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1231 INTEGER :: ntasks, natoms
1232 LOGICAL :: create_list
1234 CHARACTER(LEN=*),
PARAMETER :: routinen =
'compute_load_list'
1236 INTEGER :: cost, dest, handle, i, iatom, ilevel, img, img_old, iopt, ipgf, iset, itask, &
1237 itask_start, itask_stop, jatom, jpgf, jset, li, maxdest, ncpu, ndest_pair, nopt, nshort, &
1239 INTEGER(KIND=int_8) :: bit_pattern, ipair, ipair_old, natom8
1240 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: loads
1241 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: all_dests, index
1242 INTEGER,
DIMENSION(6) :: options
1244 CALL timeset(routinen, handle)
1246 ALLOCATE (loads(0:rs_descs(grid_level)%rs_desc%group_size - 1))
1247 CALL get_current_loads(loads, rs_descs, grid_level, ntasks, tasks, use_reordered_ranks=.false.)
1249 maxdest =
SIZE(
list, 2)
1250 ncpu =
SIZE(
list, 3)
1255 ipair_old = huge(ipair_old)
1257 ALLOCATE (all_dests(0))
1263 itask_start = itask_stop + 1
1264 itask_stop = itask_start
1265 IF (itask_stop > ntasks)
EXIT
1266 ilevel = tasks(itask_stop)%grid_level
1267 img_old = tasks(itask_stop)%image
1268 iatom = tasks(itask_stop)%iatom
1269 jatom = tasks(itask_stop)%jatom
1270 iset = tasks(itask_stop)%iset
1271 jset = tasks(itask_stop)%jset
1272 ipgf = tasks(itask_stop)%ipgf
1273 jpgf = tasks(itask_stop)%jpgf
1275 ipair_old = (iatom - 1)*natom8 + (jatom - 1)
1277 IF (itask_stop + 1 > ntasks)
EXIT
1278 ilevel = tasks(itask_stop + 1)%grid_level
1279 img = tasks(itask_stop + 1)%image
1280 iatom = tasks(itask_stop + 1)%iatom
1281 jatom = tasks(itask_stop + 1)%jatom
1282 iset = tasks(itask_stop + 1)%iset
1283 jset = tasks(itask_stop + 1)%jset
1284 ipgf = tasks(itask_stop + 1)%ipgf
1285 jpgf = tasks(itask_stop + 1)%jpgf
1287 ipair = (iatom - 1)*natom8 + (jatom - 1)
1288 IF (ipair == ipair_old .AND. img == img_old)
THEN
1289 itask_stop = itask_stop + 1
1295 nshort = itask_stop - itask_start + 1
1298 DEALLOCATE (all_dests)
1299 ALLOCATE (all_dests(nshort))
1301 ALLOCATE (index(nshort))
1303 ilevel = tasks(itask_start + i - 1)%grid_level
1304 img = tasks(itask_start + i - 1)%image
1305 iatom = tasks(itask_start + i - 1)%iatom
1306 jatom = tasks(itask_start + i - 1)%jatom
1307 iset = tasks(itask_start + i - 1)%iset
1308 jset = tasks(itask_start + i - 1)%jset
1309 ipgf = tasks(itask_start + i - 1)%ipgf
1310 jpgf = tasks(itask_start + i - 1)%jpgf
1312 IF (ilevel == grid_level)
THEN
1313 all_dests(i) = decode_rank(tasks(itask_start + i - 1)%destination,
SIZE(rs_descs))
1315 all_dests(i) = huge(all_dests(i))
1318 CALL sort(all_dests, nshort, index)
1321 IF ((all_dests(ndest_pair) /= all_dests(i)) .AND. (all_dests(i) /= huge(all_dests(i))))
THEN
1322 ndest_pair = ndest_pair + 1
1323 all_dests(ndest_pair) = all_dests(i)
1327 DO itask = itask_start, itask_stop
1329 dest = decode_rank(tasks(itask)%destination,
SIZE(rs_descs))
1330 ilevel = tasks(itask)%grid_level
1331 img = tasks(itask)%image
1332 iatom = tasks(itask)%iatom
1333 jatom = tasks(itask)%jatom
1334 iset = tasks(itask)%iset
1335 jset = tasks(itask)%jset
1336 ipgf = tasks(itask)%ipgf
1337 jpgf = tasks(itask)%jpgf
1340 IF (ilevel /= grid_level) cycle
1341 ipair = (iatom - 1)*natom8 + (jatom - 1)
1342 cost = int(tasks(itask)%cost)
1344 SELECT CASE (tasks(itask)%dist_type)
1346 bit_pattern = tasks(itask)%subpatch_pattern
1348 IF (btest(bit_pattern, 0))
THEN
1350 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1352 options(nopt) = rank
1355 IF (btest(bit_pattern, 1))
THEN
1357 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1359 options(nopt) = rank
1362 IF (btest(bit_pattern, 2))
THEN
1364 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1366 options(nopt) = rank
1369 IF (btest(bit_pattern, 3))
THEN
1371 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1373 options(nopt) = rank
1376 IF (btest(bit_pattern, 4))
THEN
1378 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1380 options(nopt) = rank
1383 IF (btest(bit_pattern, 5))
THEN
1385 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1387 options(nopt) = rank
1394 IF (loads(rank) > loads(options(iopt))) rank = options(iopt)
1399 li = list_index(
list, rank, dest)
1400 IF (create_list)
THEN
1401 list(2, li, dest) =
list(2, li, dest) + cost
1403 IF (
list(1, li, dest) == dest)
THEN
1404 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1406 IF (
list(2, li, dest) >= cost)
THEN
1407 list(2, li, dest) =
list(2, li, dest) - cost
1408 tasks(itask)%destination = encode_rank(
list(1, li, dest), ilevel,
SIZE(rs_descs))
1410 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1415 li = list_index(
list, dest, dest)
1416 IF (create_list)
THEN
1417 list(2, li, dest) =
list(2, li, dest) + cost
1419 IF (
list(1, li, dest) == dest)
THEN
1420 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1422 IF (
list(2, li, dest) >= cost)
THEN
1423 list(2, li, dest) =
list(2, li, dest) - cost
1424 tasks(itask)%destination = encode_rank(
list(1, li, dest), ilevel,
SIZE(rs_descs))
1426 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1438 CALL timestop(handle)
1440 END SUBROUTINE compute_load_list
1451 INTEGER FUNCTION list_index(list, rank, dest)
1452 INTEGER,
DIMENSION(:, :, 0:),
INTENT(IN) ::
list
1453 INTEGER,
INTENT(IN) :: rank, dest
1457 IF (
list(1, list_index, dest) == rank)
EXIT
1458 list_index = list_index + 1
1460 END FUNCTION list_index
1471 SUBROUTINE create_destination_list(list, rs_descs, grid_level)
1472 INTEGER,
DIMENSION(:, :, :),
POINTER ::
list
1475 INTEGER,
INTENT(IN) :: grid_level
1477 CHARACTER(LEN=*),
PARAMETER :: routinen =
'create_destination_list'
1479 INTEGER :: handle, i, icpu, j, maxcount, ncpu, &
1481 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: index, sublist
1483 CALL timeset(routinen, handle)
1485 cpassert(.NOT.
ASSOCIATED(
list))
1486 ncpu = rs_descs(grid_level)%rs_desc%group_size
1489 ALLOCATE (
list(2, ultimate_max, 0:ncpu - 1))
1491 ALLOCATE (index(ultimate_max))
1492 ALLOCATE (sublist(ultimate_max))
1493 sublist = huge(sublist)
1496 DO icpu = 0, ncpu - 1
1505 CALL sort(sublist, ultimate_max, index)
1508 IF (sublist(i) /= sublist(j))
THEN
1510 sublist(j) = sublist(i)
1513 maxcount = max(maxcount, j)
1514 sublist(j + 1:ultimate_max) = huge(sublist)
1515 list(1, :, icpu) = sublist
1516 list(2, :, icpu) = 0
1521 CALL timestop(handle)
1523 END SUBROUTINE create_destination_list
1539 SUBROUTINE get_current_loads(loads, rs_descs, grid_level, ntasks, tasks, use_reordered_ranks)
1540 INTEGER(KIND=int_8),
DIMENSION(:) :: loads
1543 INTEGER :: grid_level, ntasks
1544 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1545 LOGICAL,
INTENT(IN) :: use_reordered_ranks
1547 CHARACTER(LEN=*),
PARAMETER :: routinen =
'get_current_loads'
1549 INTEGER :: handle, i, iatom, ilevel, img, ipgf, &
1550 iset, jatom, jpgf, jset
1551 INTEGER(KIND=int_8) :: total_cost_local
1552 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: recv_buf_i, send_buf_i
1555 CALL timeset(routinen, handle)
1557 desc => rs_descs(grid_level)%rs_desc
1560 ALLOCATE (send_buf_i(desc%group_size))
1561 ALLOCATE (recv_buf_i(desc%group_size))
1569 ilevel = tasks(i)%grid_level
1570 img = tasks(i)%image
1571 iatom = tasks(i)%iatom
1572 jatom = tasks(i)%jatom
1573 iset = tasks(i)%iset
1574 jset = tasks(i)%jset
1575 ipgf = tasks(i)%ipgf
1576 jpgf = tasks(i)%jpgf
1577 IF (ilevel /= grid_level) cycle
1578 IF (use_reordered_ranks)
THEN
1579 send_buf_i(rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs))) + 1) = &
1580 send_buf_i(rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs))) + 1) &
1583 send_buf_i(decode_rank(tasks(i)%destination,
SIZE(rs_descs)) + 1) = &
1584 send_buf_i(decode_rank(tasks(i)%destination,
SIZE(rs_descs)) + 1) &
1588 CALL desc%group%alltoall(send_buf_i, recv_buf_i, 1)
1591 total_cost_local = sum(recv_buf_i)
1594 CALL desc%group%allgather(total_cost_local, loads)
1596 CALL timestop(handle)
1598 END SUBROUTINE get_current_loads
1610 SUBROUTINE load_balance_replicated(rs_descs, ntasks, tasks)
1615 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1617 CHARACTER(LEN=*),
PARAMETER :: routinen =
'load_balance_replicated'
1619 INTEGER :: handle, i, iatom, ilevel, img, ipgf, &
1620 iset, j, jatom, jpgf, jset, &
1621 no_overloaded, no_underloaded, &
1623 INTEGER(KIND=int_8) :: average_cost, cost_task_rep, count, &
1624 offset, total_cost_global
1625 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: load_imbalance, loads, recv_buf_i
1626 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: index
1629 CALL timeset(routinen, handle)
1631 desc => rs_descs(1)%rs_desc
1634 ALLOCATE (recv_buf_i(desc%group_size))
1635 ALLOCATE (loads(desc%group_size))
1638 DO i = 1,
SIZE(rs_descs)
1639 CALL get_current_loads(loads, rs_descs, i, ntasks, tasks, use_reordered_ranks=.true.)
1640 recv_buf_i(:) = recv_buf_i + loads
1643 total_cost_global = sum(recv_buf_i)
1644 average_cost = total_cost_global/desc%group_size
1652 ALLOCATE (load_imbalance(desc%group_size))
1653 ALLOCATE (index(desc%group_size))
1655 load_imbalance(:) = recv_buf_i - average_cost
1659 DO i = 1, desc%group_size
1660 IF (load_imbalance(i) > 0) no_overloaded = no_overloaded + 1
1661 IF (load_imbalance(i) < 0) no_underloaded = no_underloaded + 1
1666 CALL sort(recv_buf_i,
SIZE(recv_buf_i), index)
1672 IF (tasks(i)%dist_type == 0 &
1673 .AND. decode_rank(tasks(i)%destination,
SIZE(rs_descs)) == decode_rank(tasks(i)%source,
SIZE(rs_descs)))
THEN
1674 cost_task_rep = cost_task_rep + tasks(i)%cost
1680 CALL desc%group%allgather(cost_task_rep, recv_buf_i)
1682 DO i = 1, desc%group_size
1684 IF (load_imbalance(i) > 0) &
1685 load_imbalance(i) = min(load_imbalance(i), recv_buf_i(i))
1694 IF (load_imbalance(desc%my_pos + 1) > 0)
THEN
1700 DO i = desc%group_size, desc%group_size - no_overloaded + 1, -1
1701 IF (index(i) == desc%my_pos + 1)
THEN
1704 offset = offset + load_imbalance(index(i))
1709 proc_receiving = huge(proc_receiving)
1710 DO i = 1, no_underloaded
1711 offset = offset + load_imbalance(index(i))
1712 IF (offset <= 0)
THEN
1722 IF (tasks(j)%dist_type == 0 &
1723 .AND. decode_rank(tasks(j)%destination,
SIZE(rs_descs)) == decode_rank(tasks(j)%source,
SIZE(rs_descs)))
THEN
1726 IF (proc_receiving > no_underloaded)
EXIT
1728 ilevel = tasks(j)%grid_level
1729 img = tasks(j)%image
1730 iatom = tasks(j)%iatom
1731 jatom = tasks(j)%jatom
1732 iset = tasks(j)%iset
1733 jset = tasks(j)%jset
1734 ipgf = tasks(j)%ipgf
1735 jpgf = tasks(j)%jpgf
1736 tasks(j)%destination = encode_rank(index(proc_receiving) - 1, ilevel,
SIZE(rs_descs))
1737 offset = offset + tasks(j)%cost
1738 count = count + tasks(j)%cost
1739 IF (count >= load_imbalance(desc%my_pos + 1))
EXIT
1740 IF (offset > 0)
THEN
1741 proc_receiving = proc_receiving + 1
1744 IF (proc_receiving > no_underloaded)
EXIT
1745 offset = load_imbalance(index(proc_receiving))
1752 DEALLOCATE (load_imbalance)
1754 CALL timestop(handle)
1756 END SUBROUTINE load_balance_replicated
1770 SUBROUTINE create_local_tasks(rs_descs, ntasks, tasks, ntasks_recv, tasks_recv)
1775 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1776 INTEGER :: ntasks_recv
1777 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks_recv
1779 CHARACTER(LEN=*),
PARAMETER :: routinen =
'create_local_tasks'
1781 INTEGER :: handle, i, j, k, l, rank
1782 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: recv_buf, send_buf
1783 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: recv_disps, recv_sizes, send_disps, &
1787 CALL timeset(routinen, handle)
1789 desc => rs_descs(1)%rs_desc
1792 ALLOCATE (send_sizes(desc%group_size))
1793 ALLOCATE (recv_sizes(desc%group_size))
1794 ALLOCATE (send_disps(desc%group_size))
1795 ALLOCATE (recv_disps(desc%group_size))
1796 ALLOCATE (send_buf(desc%group_size))
1797 ALLOCATE (recv_buf(desc%group_size))
1802 rank = rs_descs(decode_level(tasks(i)%destination,
SIZE(rs_descs))) &
1803 %rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs)))
1804 send_buf(rank + 1) = send_buf(rank + 1) + 1
1807 CALL desc%group%alltoall(send_buf, recv_buf, 1)
1818 DO i = 2, desc%group_size
1821 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
1822 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
1826 DEALLOCATE (send_buf)
1827 DEALLOCATE (recv_buf)
1830 ALLOCATE (send_buf(sum(send_sizes)))
1831 ALLOCATE (recv_buf(sum(recv_sizes)))
1837 i = rs_descs(decode_level(tasks(j)%destination,
SIZE(rs_descs))) &
1838 %rs_desc%virtual2real(decode_rank(tasks(j)%destination,
SIZE(rs_descs))) + 1
1839 l = send_disps(i) + send_sizes(i)
1845 CALL desc%group%alltoall(send_buf, send_sizes, send_disps, recv_buf, recv_sizes, recv_disps)
1847 DEALLOCATE (send_buf)
1850 ALLOCATE (tasks_recv(ntasks_recv))
1854 DO i = 1, desc%group_size
1862 DEALLOCATE (recv_buf)
1863 DEALLOCATE (send_sizes)
1864 DEALLOCATE (recv_sizes)
1865 DEALLOCATE (send_disps)
1866 DEALLOCATE (recv_disps)
1868 CALL timestop(handle)
1870 END SUBROUTINE create_local_tasks
1890 tasks, atom_pair_send, atom_pair_recv, &
1891 symmetric, reorder_rs_grid_ranks, skip_load_balance_distributed)
1895 INTEGER :: ntasks, natoms
1896 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1897 TYPE(
atom_pair_type),
DIMENSION(:),
POINTER :: atom_pair_send, atom_pair_recv
1898 LOGICAL,
INTENT(IN) :: symmetric, reorder_rs_grid_ranks, &
1899 skip_load_balance_distributed
1901 CHARACTER(LEN=*),
PARAMETER :: routinen =
'distribute_tasks'
1903 INTEGER :: handle, igrid_level, irank, ntasks_recv
1904 INTEGER(KIND=int_8) :: load_gap, max_load, replicated_load
1905 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: total_loads, total_loads_tmp, trial_loads
1906 INTEGER(KIND=int_8),
DIMENSION(:, :),
POINTER :: loads
1907 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: indices, real2virtual, total_index
1908 LOGICAL :: distributed_grids, fixed_first_grid
1910 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks_recv
1912 CALL timeset(routinen, handle)
1914 cpassert(
ASSOCIATED(tasks))
1917 distributed_grids = .false.
1918 DO igrid_level = 1,
SIZE(rs_descs)
1919 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1920 distributed_grids = .true.
1923 desc => rs_descs(1)%rs_desc
1925 IF (distributed_grids)
THEN
1927 ALLOCATE (loads(0:desc%group_size - 1,
SIZE(rs_descs)))
1928 ALLOCATE (total_loads(0:desc%group_size - 1))
1934 DO igrid_level = 1,
SIZE(rs_descs)
1935 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1937 IF (.NOT. skip_load_balance_distributed) &
1938 CALL load_balance_distributed(tasks, ntasks, rs_descs, igrid_level, natoms)
1940 CALL get_current_loads(loads(:, igrid_level), rs_descs, igrid_level, ntasks, &
1941 tasks, use_reordered_ranks=.false.)
1943 total_loads(:) = total_loads + loads(:, igrid_level)
1952 DO igrid_level = 1,
SIZE(rs_descs)
1953 IF (.NOT. rs_descs(igrid_level)%rs_desc%distributed)
THEN
1954 CALL get_current_loads(loads(:, igrid_level), rs_descs, igrid_level, ntasks, &
1955 tasks, use_reordered_ranks=.false.)
1956 replicated_load = replicated_load + sum(loads(:, igrid_level))
1966 IF (reorder_rs_grid_ranks)
THEN
1967 fixed_first_grid = .false.
1968 DO igrid_level = 1,
SIZE(rs_descs)
1969 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1970 IF (fixed_first_grid .EQV. .false.)
THEN
1971 total_loads(:) = loads(:, igrid_level)
1972 fixed_first_grid = .true.
1974 ALLOCATE (trial_loads(0:desc%group_size - 1))
1976 trial_loads(:) = total_loads + loads(:, igrid_level)
1977 max_load = maxval(trial_loads)
1979 DO irank = 0, desc%group_size - 1
1980 load_gap = load_gap + max_load - trial_loads(irank)
1985 IF (load_gap > replicated_load*1.05_dp)
THEN
1987 ALLOCATE (indices(0:desc%group_size - 1))
1988 ALLOCATE (total_index(0:desc%group_size - 1))
1989 ALLOCATE (total_loads_tmp(0:desc%group_size - 1))
1990 ALLOCATE (real2virtual(0:desc%group_size - 1))
1992 total_loads_tmp(:) = total_loads
1993 CALL sort(total_loads_tmp, desc%group_size, total_index)
1994 CALL sort(loads(:, igrid_level), desc%group_size, indices)
1998 DO irank = 0, desc%group_size - 1
1999 total_loads(total_index(irank) - 1) = total_loads(total_index(irank) - 1) + &
2000 loads(desc%group_size - irank - 1, igrid_level)
2001 real2virtual(total_index(irank) - 1) = indices(desc%group_size - irank - 1) - 1
2006 DEALLOCATE (indices)
2007 DEALLOCATE (total_index)
2008 DEALLOCATE (total_loads_tmp)
2009 DEALLOCATE (real2virtual)
2011 total_loads(:) = trial_loads
2014 DEALLOCATE (trial_loads)
2022 CALL load_balance_replicated(rs_descs, ntasks, tasks)
2040 CALL create_local_tasks(rs_descs, ntasks, tasks, ntasks_recv, tasks_recv)
2046 CALL get_atom_pair(atom_pair_send, tasks, ntasks=ntasks, send=.true., symmetric=symmetric, rs_descs=rs_descs)
2055 CALL get_atom_pair(atom_pair_recv, tasks_recv, ntasks=ntasks_recv, send=.false., symmetric=symmetric, rs_descs=rs_descs)
2060 DEALLOCATE (total_loads)
2064 ntasks_recv = ntasks
2065 CALL get_atom_pair(atom_pair_recv, tasks_recv, ntasks=ntasks_recv, send=.false., symmetric=symmetric, rs_descs=rs_descs)
2070 ALLOCATE (indices(ntasks_recv))
2071 CALL tasks_sort(tasks_recv, ntasks_recv, indices)
2072 DEALLOCATE (indices)
2079 ntasks = ntasks_recv
2081 CALL timestop(handle)
2095 SUBROUTINE get_atom_pair(atom_pair, tasks, ntasks, send, symmetric, rs_descs)
2098 TYPE(
task_type),
DIMENSION(:),
INTENT(INOUT) :: tasks
2099 INTEGER,
INTENT(IN) :: ntasks
2100 LOGICAL,
INTENT(IN) :: send, symmetric
2103 INTEGER :: i, ilevel, iatom, jatom, npairs, virt_rank
2104 INTEGER,
DIMENSION(:),
ALLOCATABLE :: indices
2107 cpassert(.NOT.
ASSOCIATED(atom_pair))
2108 IF (ntasks == 0)
THEN
2109 ALLOCATE (atom_pair(0))
2115 ALLOCATE (atom_pair_tmp(ntasks))
2117 atom_pair_tmp(i)%image = tasks(i)%image
2118 iatom = tasks(i)%iatom
2119 jatom = tasks(i)%jatom
2120 IF (symmetric .AND. iatom > jatom)
THEN
2122 atom_pair_tmp(i)%row = jatom
2123 atom_pair_tmp(i)%col = iatom
2125 atom_pair_tmp(i)%row = iatom
2126 atom_pair_tmp(i)%col = jatom
2132 ilevel = tasks(i)%grid_level
2133 virt_rank = decode_rank(tasks(i)%destination,
SIZE(rs_descs))
2134 atom_pair_tmp(i)%rank = rs_descs(ilevel)%rs_desc%virtual2real(virt_rank)
2138 atom_pair_tmp(i)%rank = decode_rank(tasks(i)%source,
SIZE(rs_descs))
2143 ALLOCATE (indices(ntasks))
2144 CALL atom_pair_sort(atom_pair_tmp, ntasks, indices)
2146 tasks(indices(1))%pair_index = 1
2148 IF (atom_pair_less_than(atom_pair_tmp(i - 1), atom_pair_tmp(i)))
THEN
2150 atom_pair_tmp(npairs) = atom_pair_tmp(i)
2152 tasks(indices(i))%pair_index = npairs
2154 DEALLOCATE (indices)
2157 ALLOCATE (atom_pair(npairs))
2158 atom_pair(:) = atom_pair_tmp(:npairs)
2159 DEALLOCATE (atom_pair_tmp)
2161 END SUBROUTINE get_atom_pair
2177 nimages, scatter, hmats)
2182 TYPE(
atom_pair_type),
DIMENSION(:),
POINTER :: atom_pair_send, atom_pair_recv
2188 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_distribute_matrix'
2190 INTEGER :: acol, arow, handle, i, img, j, k, l, me, &
2191 nblkcols_total, nblkrows_total, ncol, &
2192 nrow, nthread, nthread_left
2193 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: first_col, first_row, last_col, last_row, recv_disps, &
2194 recv_pair_count, recv_pair_disps, recv_sizes, send_disps, send_pair_count, &
2195 send_pair_disps, send_sizes
2196 INTEGER,
DIMENSION(:),
POINTER :: col_blk_size, row_blk_size
2198 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: recv_buf_r, send_buf_r
2199 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: h_block, p_block
2202 REAL(kind=
dp),
DIMENSION(:),
POINTER :: vector
2206 CALL timeset(routinen, handle)
2208 IF (.NOT. scatter)
THEN
2209 cpassert(
PRESENT(hmats))
2212 desc => rs_descs(1)%rs_desc
2213 me = desc%my_pos + 1
2216 ALLOCATE (send_sizes(desc%group_size))
2217 ALLOCATE (recv_sizes(desc%group_size))
2218 ALLOCATE (send_disps(desc%group_size))
2219 ALLOCATE (recv_disps(desc%group_size))
2220 ALLOCATE (send_pair_count(desc%group_size))
2221 ALLOCATE (recv_pair_count(desc%group_size))
2222 ALLOCATE (send_pair_disps(desc%group_size))
2223 ALLOCATE (recv_pair_disps(desc%group_size))
2225 pmat => pmats(1)%matrix
2227 row_blk_size=row_blk_size, &
2228 col_blk_size=col_blk_size, &
2229 nblkrows_total=nblkrows_total, &
2230 nblkcols_total=nblkcols_total)
2231 ALLOCATE (first_row(nblkrows_total), last_row(nblkrows_total), &
2232 first_col(nblkcols_total), last_col(nblkcols_total))
2233 CALL dbcsr_convert_sizes_to_offsets(row_blk_size, first_row, last_row)
2234 CALL dbcsr_convert_sizes_to_offsets(col_blk_size, first_col, last_col)
2239 DO i = 1,
SIZE(atom_pair_send)
2240 k = atom_pair_send(i)%rank + 1
2241 arow = atom_pair_send(i)%row
2242 acol = atom_pair_send(i)%col
2243 nrow = last_row(arow) - first_row(arow) + 1
2244 ncol = last_col(acol) - first_col(acol) + 1
2245 send_sizes(k) = send_sizes(k) + nrow*ncol
2246 send_pair_count(k) = send_pair_count(k) + 1
2251 DO i = 2, desc%group_size
2252 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
2253 send_pair_disps(i) = send_pair_disps(i - 1) + send_pair_count(i - 1)
2256 ALLOCATE (send_buf_r(sum(send_sizes)))
2262 DO i = 1,
SIZE(atom_pair_recv)
2263 k = atom_pair_recv(i)%rank + 1
2264 arow = atom_pair_recv(i)%row
2265 acol = atom_pair_recv(i)%col
2266 nrow = last_row(arow) - first_row(arow) + 1
2267 ncol = last_col(acol) - first_col(acol) + 1
2268 recv_sizes(k) = recv_sizes(k) + nrow*ncol
2269 recv_pair_count(k) = recv_pair_count(k) + 1
2274 DO i = 2, desc%group_size
2275 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
2276 recv_pair_disps(i) = recv_pair_disps(i - 1) + recv_pair_count(i - 1)
2278 ALLOCATE (recv_buf_r(sum(recv_sizes)))
2297 DO l = 1, desc%group_size
2300 DO i = 1, send_pair_count(l)
2301 arow = atom_pair_send(send_pair_disps(l) + i)%row
2302 acol = atom_pair_send(send_pair_disps(l) + i)%col
2303 img = atom_pair_send(send_pair_disps(l) + i)%image
2304 nrow = last_row(arow) - first_row(arow) + 1
2305 ncol = last_col(acol) - first_col(acol) + 1
2306 pmat => pmats(img)%matrix
2307 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2312 send_buf_r(send_disps(l) + send_sizes(l) + j + (k - 1)*nrow) = p_block(j, k)
2315 send_sizes(l) = send_sizes(l) + nrow*ncol
2320 IF (.NOT. scatter)
THEN
2335 CALL desc%group%alltoall(send_buf_r, send_sizes, send_disps, &
2336 recv_buf_r, recv_sizes, recv_disps)
2341 IF (.NOT. scatter)
THEN
2345 DO i = 1, send_pair_count(me)
2346 arow = atom_pair_send(send_pair_disps(me) + i)%row
2347 acol = atom_pair_send(send_pair_disps(me) + i)%col
2348 img = atom_pair_send(send_pair_disps(me) + i)%image
2349 nrow = last_row(arow) - first_row(arow) + 1
2350 ncol = last_col(acol) - first_col(acol) + 1
2351 hmat => hmats(img)%matrix
2352 pmat => pmats(img)%matrix
2353 CALL dbcsr_get_block_p(matrix=hmat, row=arow, col=acol, block=h_block, found=found)
2355 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2361 h_block(j, k) = h_block(j, k) + p_block(j, k)
2370 pmat => pmats(img)%matrix
2372 nblks_guess=
SIZE(atom_pair_recv)/nthread, sizedata_guess=
SIZE(recv_buf_r)/nthread, &
2382 DO l = 1, desc%group_size
2385 DO i = 1, recv_pair_count(l)
2386 arow = atom_pair_recv(recv_pair_disps(l) + i)%row
2387 acol = atom_pair_recv(recv_pair_disps(l) + i)%col
2388 img = atom_pair_recv(recv_pair_disps(l) + i)%image
2389 nrow = last_row(arow) - first_row(arow) + 1
2390 ncol = last_col(acol) - first_col(acol) + 1
2391 pmat => pmats(img)%matrix
2393 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2395 IF (
PRESENT(hmats))
THEN
2396 hmat => hmats(img)%matrix
2397 CALL dbcsr_get_block_p(matrix=hmat, row=arow, col=acol, block=h_block, found=found)
2401 IF (scatter .AND. .NOT.
ASSOCIATED(p_block))
THEN
2402 vector => recv_buf_r(recv_disps(l) + recv_sizes(l) + 1:recv_disps(l) + recv_sizes(l) + nrow*ncol)
2403 CALL dbcsr_put_block(pmat, arow, acol, block=reshape(vector, [nrow, ncol]))
2405 IF (.NOT. scatter)
THEN
2409 h_block(j, k) = h_block(j, k) + recv_buf_r(recv_disps(l) + recv_sizes(l) + j + (k - 1)*nrow)
2414 recv_sizes(l) = recv_sizes(l) + nrow*ncol
2436 pmat => pmats(img)%matrix
2442 DEALLOCATE (send_buf_r)
2443 DEALLOCATE (recv_buf_r)
2445 DEALLOCATE (send_sizes)
2446 DEALLOCATE (recv_sizes)
2447 DEALLOCATE (send_disps)
2448 DEALLOCATE (recv_disps)
2449 DEALLOCATE (send_pair_count)
2450 DEALLOCATE (recv_pair_count)
2451 DEALLOCATE (send_pair_disps)
2452 DEALLOCATE (recv_pair_disps)
2454 DEALLOCATE (first_row, last_row, first_col, last_col)
2456 CALL timestop(handle)
2464 SUBROUTINE rs_calc_offsets(pairs, nsgf, group_size, &
2465 pair_offsets, rank_offsets, rank_sizes, buffer_size)
2467 INTEGER,
DIMENSION(:),
INTENT(IN) :: nsgf
2468 INTEGER,
INTENT(IN) :: group_size
2469 INTEGER,
DIMENSION(:),
POINTER :: pair_offsets, rank_offsets, rank_sizes
2470 INTEGER,
INTENT(INOUT) :: buffer_size
2472 INTEGER :: acol, arow, i, block_size, total_size, k, prev_k
2474 IF (
ASSOCIATED(pair_offsets))
DEALLOCATE (pair_offsets)
2475 IF (
ASSOCIATED(rank_offsets))
DEALLOCATE (rank_offsets)
2476 IF (
ASSOCIATED(rank_sizes))
DEALLOCATE (rank_sizes)
2479 ALLOCATE (pair_offsets(
SIZE(pairs)))
2481 DO i = 1,
SIZE(pairs)
2482 pair_offsets(i) = total_size
2485 block_size = nsgf(arow)*nsgf(acol)
2486 total_size = total_size + block_size
2488 buffer_size = total_size
2491 ALLOCATE (rank_offsets(group_size))
2492 ALLOCATE (rank_sizes(group_size))
2495 IF (
SIZE(pairs) > 0)
THEN
2496 prev_k = pairs(1)%rank + 1
2497 DO i = 1,
SIZE(pairs)
2498 k = pairs(i)%rank + 1
2499 cpassert(k >= prev_k)
2500 IF (k > prev_k)
THEN
2501 rank_offsets(k) = pair_offsets(i)
2502 rank_sizes(prev_k) = rank_offsets(k) - rank_offsets(prev_k)
2506 rank_sizes(k) = buffer_size - rank_offsets(k)
2509 END SUBROUTINE rs_calc_offsets
2516 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2521 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_scatter_matrices'
2524 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: buffer_send
2526 CALL timeset(routinen, handle)
2527 ALLOCATE (buffer_send(task_list%buffer_size_send))
2530 cpassert(
ASSOCIATED(task_list%atom_pair_send))
2531 CALL rs_pack_buffer(src_matrices=src_matrices, &
2532 dest_buffer=buffer_send, &
2533 atom_pair=task_list%atom_pair_send, &
2534 pair_offsets=task_list%pair_offsets_send)
2537 CALL group%alltoall(buffer_send, task_list%rank_sizes_send, task_list%rank_offsets_send, &
2538 dest_buffer%host_buffer, &
2539 task_list%rank_sizes_recv, task_list%rank_offsets_recv)
2541 DEALLOCATE (buffer_send)
2542 CALL timestop(handle)
2552 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2556 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_gather_matrices'
2559 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: buffer_send
2561 CALL timeset(routinen, handle)
2564 ALLOCATE (buffer_send(task_list%buffer_size_send))
2567 CALL group%alltoall(src_buffer%host_buffer, task_list%rank_sizes_recv, task_list%rank_offsets_recv, &
2568 buffer_send, task_list%rank_sizes_send, task_list%rank_offsets_send)
2571 cpassert(
ASSOCIATED(task_list%atom_pair_send))
2572 CALL rs_unpack_buffer(src_buffer=buffer_send, &
2573 dest_matrices=dest_matrices, &
2574 atom_pair=task_list%atom_pair_send, &
2575 pair_offsets=task_list%pair_offsets_send)
2577 DEALLOCATE (buffer_send)
2578 CALL timestop(handle)
2587 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2591 CALL rs_pack_buffer(src_matrices=src_matrices, &
2592 dest_buffer=dest_buffer%host_buffer, &
2593 atom_pair=task_list%atom_pair_recv, &
2594 pair_offsets=task_list%pair_offsets_recv)
2604 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2607 CALL rs_unpack_buffer(src_buffer=src_buffer%host_buffer, &
2608 dest_matrices=dest_matrices, &
2609 atom_pair=task_list%atom_pair_recv, &
2610 pair_offsets=task_list%pair_offsets_recv)
2618 SUBROUTINE rs_pack_buffer(src_matrices, dest_buffer, atom_pair, pair_offsets)
2619 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2620 REAL(kind=
dp),
DIMENSION(:),
INTENT(INOUT) :: dest_buffer
2622 INTEGER,
DIMENSION(:),
INTENT(IN) :: pair_offsets
2624 INTEGER :: acol, arow, img, i, offset, block_size
2626 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
2632 DO i = 1,
SIZE(atom_pair)
2633 arow = atom_pair(i)%row
2634 acol = atom_pair(i)%col
2635 img = atom_pair(i)%image
2637 block=block, found=found)
2639 block_size =
SIZE(block)
2640 offset = pair_offsets(i)
2641 dest_buffer(offset + 1:offset + block_size) = reshape(block, shape=[block_size])
2646 END SUBROUTINE rs_pack_buffer
2652 SUBROUTINE rs_unpack_buffer(src_buffer, dest_matrices, atom_pair, pair_offsets)
2653 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: src_buffer
2654 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2656 INTEGER,
DIMENSION(:),
INTENT(IN) :: pair_offsets
2658 INTEGER :: acol, arow, img, i, offset, &
2659 nrows, ncols, lock_num
2661 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
2662 INTEGER(kind=omp_lock_kind),
ALLOCATABLE,
DIMENSION(:) :: locks
2665 ALLOCATE (locks(10*omp_get_max_threads()))
2666 DO i = 1,
SIZE(locks)
2667 CALL omp_init_lock(locks(i))
2674 DO i = 1,
SIZE(atom_pair)
2675 arow = atom_pair(i)%row
2676 acol = atom_pair(i)%col
2677 img = atom_pair(i)%image
2679 block=block, found=found)
2681 nrows =
SIZE(block, 1)
2682 ncols =
SIZE(block, 2)
2683 offset = pair_offsets(i)
2684 lock_num =
modulo(arow,
SIZE(locks)) + 1
2686 CALL omp_set_lock(locks(lock_num))
2687 block = block + reshape(src_buffer(offset + 1:offset + nrows*ncols), shape=[nrows, ncols])
2688 CALL omp_unset_lock(locks(lock_num))
2694 DO i = 1,
SIZE(locks)
2695 CALL omp_destroy_lock(locks(i))
2699 END SUBROUTINE rs_unpack_buffer
2717 SUBROUTINE rs_find_node(rs_desc, igrid_level, n_levels, cube_center, ntasks, tasks, &
2718 lb_cube, ub_cube, added_tasks)
2721 INTEGER,
INTENT(IN) :: igrid_level, n_levels
2722 INTEGER,
DIMENSION(3),
INTENT(IN) :: cube_center
2723 INTEGER,
INTENT(INOUT) :: ntasks
2724 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
2725 INTEGER,
DIMENSION(3),
INTENT(IN) :: lb_cube, ub_cube
2726 INTEGER,
INTENT(OUT) :: added_tasks
2728 INTEGER,
PARAMETER :: add_tasks = 1000
2729 REAL(kind=
dp),
PARAMETER :: mult_tasks = 2.0_dp
2731 INTEGER :: bit_index, coord(3), curr_tasks, dest, i, icoord(3), idest, itask, ix, iy, iz, &
2732 lb_coord(3), lb_domain(3), lbc(3), ub_coord(3), ub_domain(3), ubc(3)
2733 INTEGER :: bit_pattern
2734 LOGICAL :: dir_periodic(3)
2736 coord(1) = rs_desc%x2coord(cube_center(1))
2737 coord(2) = rs_desc%y2coord(cube_center(2))
2738 coord(3) = rs_desc%z2coord(cube_center(3))
2739 dest = rs_desc%coord2rank(coord(1), coord(2), coord(3))
2742 lbc = lb_cube + cube_center
2743 ubc = ub_cube + cube_center
2745 IF (all((rs_desc%lb_global(:, dest) - rs_desc%border) <= lbc) .AND. &
2746 all((rs_desc%ub_global(:, dest) + rs_desc%border) >= ubc))
THEN
2748 tasks(ntasks)%destination = encode_rank(dest, igrid_level, n_levels)
2749 tasks(ntasks)%dist_type = 1
2750 tasks(ntasks)%subpatch_pattern = 0
2769 IF (rs_desc%perd(i) == 1)
THEN
2770 bit_pattern = ibclr(bit_pattern, bit_index)
2771 bit_index = bit_index + 1
2772 bit_pattern = ibclr(bit_pattern, bit_index)
2773 bit_index = bit_index + 1
2776 IF (ubc(i) <= rs_desc%lb_global(i, dest) - 1 + rs_desc%border)
THEN
2777 bit_pattern = ibset(bit_pattern, bit_index)
2778 bit_index = bit_index + 1
2780 bit_pattern = ibclr(bit_pattern, bit_index)
2781 bit_index = bit_index + 1
2784 IF (lbc(i) >= rs_desc%ub_global(i, dest) + 1 - rs_desc%border)
THEN
2785 bit_pattern = ibset(bit_pattern, bit_index)
2786 bit_index = bit_index + 1
2788 bit_pattern = ibclr(bit_pattern, bit_index)
2789 bit_index = bit_index + 1
2793 tasks(ntasks)%subpatch_pattern = bit_pattern
2803 lb_domain = rs_desc%lb_global(:, dest) - rs_desc%border
2804 ub_domain = rs_desc%ub_global(:, dest) + rs_desc%border
2807 IF (rs_desc%perd(i) == 0)
THEN
2810 IF (lb_domain(i) > lbc(i))
THEN
2811 lb_coord(i) = lb_coord(i) - 1
2812 icoord =
modulo(lb_coord, rs_desc%group_dim)
2813 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2814 lb_domain(i) = lb_domain(i) - (rs_desc%ub_global(i, idest) - rs_desc%lb_global(i, idest) + 1)
2821 IF (ub_domain(i) < ubc(i))
THEN
2822 ub_coord(i) = ub_coord(i) + 1
2823 icoord =
modulo(ub_coord, rs_desc%group_dim)
2824 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2825 ub_domain(i) = ub_domain(i) + (rs_desc%ub_global(i, idest) - rs_desc%lb_global(i, idest) + 1)
2835 IF (ub_domain(i) - lb_domain(i) + 1 >= rs_desc%npts(i))
THEN
2836 dir_periodic(i) = .true.
2838 ub_coord(i) = rs_desc%group_dim(i) - 1
2840 dir_periodic(i) = .false.
2844 added_tasks = product(ub_coord - lb_coord + 1)
2846 ntasks = ntasks + added_tasks - 1
2847 IF (ntasks >
SIZE(tasks))
THEN
2848 curr_tasks = int((
SIZE(tasks) + add_tasks)*mult_tasks)
2851 DO iz = lb_coord(3), ub_coord(3)
2852 DO iy = lb_coord(2), ub_coord(2)
2853 DO ix = lb_coord(1), ub_coord(1)
2854 icoord =
modulo([ix, iy, iz], rs_desc%group_dim)
2855 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2856 tasks(itask)%destination = encode_rank(idest, igrid_level, n_levels)
2857 tasks(itask)%dist_type = 2
2858 tasks(itask)%subpatch_pattern = 0
2861 IF (ix == lb_coord(1) .AND. .NOT. dir_periodic(1)) &
2862 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 0)
2863 IF (ix == ub_coord(1) .AND. .NOT. dir_periodic(1)) &
2864 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 1)
2865 IF (iy == lb_coord(2) .AND. .NOT. dir_periodic(2)) &
2866 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 2)
2867 IF (iy == ub_coord(2) .AND. .NOT. dir_periodic(2)) &
2868 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 3)
2869 IF (iz == lb_coord(3) .AND. .NOT. dir_periodic(3)) &
2870 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 4)
2871 IF (iz == ub_coord(3) .AND. .NOT. dir_periodic(3)) &
2872 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 5)
2879 END SUBROUTINE rs_find_node
2893 FUNCTION encode_rank(rank, grid_level, n_levels)
RESULT(encoded_int)
2895 INTEGER,
INTENT(IN) :: rank, grid_level, n_levels
2896 INTEGER :: encoded_int
2900 encoded_int = rank*n_levels + grid_level - 1
2902 END FUNCTION encode_rank
2910 FUNCTION decode_rank(encoded_int, n_levels)
RESULT(rank)
2912 INTEGER,
INTENT(IN) :: encoded_int
2913 INTEGER,
INTENT(IN) :: n_levels
2916 rank = int(encoded_int/n_levels)
2918 END FUNCTION decode_rank
2926 FUNCTION decode_level(encoded_int, n_levels)
RESULT(grid_level)
2928 INTEGER,
INTENT(IN) :: encoded_int
2929 INTEGER,
INTENT(IN) :: n_levels
2930 INTEGER :: grid_level
2932 grid_level = int(
modulo(encoded_int, n_levels)) + 1
2934 END FUNCTION decode_level
2950 PURE FUNCTION tasks_less_than(a, b)
RESULT(res)
2954 IF (a%grid_level /= b%grid_level)
THEN
2955 res = a%grid_level < b%grid_level
2957 ELSE IF (a%image /= b%image)
THEN
2958 res = a%image < b%image
2960 ELSE IF (a%iatom /= b%iatom)
THEN
2961 res = a%iatom < b%iatom
2963 ELSE IF (a%jatom /= b%jatom)
THEN
2964 res = a%jatom < b%jatom
2966 ELSE IF (a%iset /= b%iset)
THEN
2967 res = a%iset < b%iset
2969 ELSE IF (a%jset /= b%jset)
THEN
2970 res = a%jset < b%jset
2972 ELSE IF (a%ipgf /= b%ipgf)
THEN
2973 res = a%ipgf < b%ipgf
2976 res = a%jpgf < b%jpgf
2979 END FUNCTION tasks_less_than
2992 SUBROUTINE tasks_sort(arr, n, indices)
2993 INTEGER,
INTENT(IN) :: n
2994 TYPE(
task_type),
DIMENSION(1:n),
INTENT(INOUT) :: arr
2995 integer,
DIMENSION(1:n),
INTENT(INOUT) :: indices
2998 TYPE(
task_type),
ALLOCATABLE :: tmp_arr(:)
2999 INTEGER,
ALLOCATABLE :: tmp_idx(:)
3003 ALLOCATE (tmp_arr((n + 1)/2), tmp_idx((n + 1)/2))
3005 indices = [(i, i=1, n)]
3007 CALL tasks_sort_low(arr(1:n), indices, tmp_arr, tmp_idx)
3009 DEALLOCATE (tmp_arr, tmp_idx)
3010 ELSE IF (n > 0)
THEN
3014 END SUBROUTINE tasks_sort
3026 RECURSIVE SUBROUTINE tasks_sort_low(arr, indices, tmp_arr, tmp_idx)
3027 TYPE(
task_type),
DIMENSION(:),
INTENT(INOUT) :: arr
3028 INTEGER,
DIMENSION(size(arr)),
INTENT(INOUT) :: indices
3029 TYPE(
task_type),
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_arr
3030 INTEGER,
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_idx
3032 INTEGER :: t, m, i, j, k
3039 IF (
size(arr) <= 7)
THEN
3040 DO j =
size(arr) - 1, 1, -1
3043 IF (tasks_less_than(arr(i + 1), arr(i)))
THEN
3050 indices(i) = indices(i + 1)
3055 IF (.NOT. swapped)
EXIT
3061 m = (
size(arr) + 1)/2
3062 CALL tasks_sort_low(arr(1:m), indices(1:m), tmp_arr, tmp_idx)
3063 CALL tasks_sort_low(arr(m + 1:), indices(m + 1:), tmp_arr, tmp_idx)
3067 IF (tasks_less_than(arr(m + 1), arr(m)))
THEN
3070 tmp_arr(1:m) = arr(1:m)
3071 tmp_idx(1:m) = indices(1:m)
3076 DO WHILE (i <= m .and. j <=
size(arr) - m)
3077 IF (tasks_less_than(arr(m + j), tmp_arr(i)))
THEN
3079 indices(k) = indices(m + j)
3083 indices(k) = tmp_idx(i)
3093 indices(k) = tmp_idx(i)
3100 END SUBROUTINE tasks_sort_low
3110 PURE FUNCTION atom_pair_less_than(a, b)
RESULT(res)
3114 IF (a%rank /= b%rank)
THEN
3115 res = a%rank < b%rank
3117 ELSE IF (a%row /= b%row)
THEN
3120 ELSE IF (a%col /= b%col)
THEN
3124 res = a%image < b%image
3127 END FUNCTION atom_pair_less_than
3140 SUBROUTINE atom_pair_sort(arr, n, indices)
3141 INTEGER,
INTENT(IN) :: n
3143 integer,
DIMENSION(1:n),
INTENT(INOUT) :: indices
3147 INTEGER,
ALLOCATABLE :: tmp_idx(:)
3151 ALLOCATE (tmp_arr((n + 1)/2), tmp_idx((n + 1)/2))
3153 indices = [(i, i=1, n)]
3155 CALL atom_pair_sort_low(arr(1:n), indices, tmp_arr, tmp_idx)
3157 DEALLOCATE (tmp_arr, tmp_idx)
3158 ELSE IF (n > 0)
THEN
3162 END SUBROUTINE atom_pair_sort
3174 RECURSIVE SUBROUTINE atom_pair_sort_low(arr, indices, tmp_arr, tmp_idx)
3176 INTEGER,
DIMENSION(size(arr)),
INTENT(INOUT) :: indices
3177 TYPE(
atom_pair_type),
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_arr
3178 INTEGER,
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_idx
3180 INTEGER :: t, m, i, j, k
3187 IF (
size(arr) <= 7)
THEN
3188 DO j =
size(arr) - 1, 1, -1
3191 IF (atom_pair_less_than(arr(i + 1), arr(i)))
THEN
3198 indices(i) = indices(i + 1)
3203 IF (.NOT. swapped)
EXIT
3209 m = (
size(arr) + 1)/2
3210 CALL atom_pair_sort_low(arr(1:m), indices(1:m), tmp_arr, tmp_idx)
3211 CALL atom_pair_sort_low(arr(m + 1:), indices(m + 1:), tmp_arr, tmp_idx)
3215 IF (atom_pair_less_than(arr(m + 1), arr(m)))
THEN
3218 tmp_arr(1:m) = arr(1:m)
3219 tmp_idx(1:m) = indices(1:m)
3224 DO WHILE (i <= m .and. j <=
size(arr) - m)
3225 IF (atom_pair_less_than(arr(m + j), tmp_arr(i)))
THEN
3227 indices(k) = indices(m + j)
3231 indices(k) = tmp_idx(i)
3241 indices(k) = tmp_idx(i)
3248 END SUBROUTINE atom_pair_sort_low
void grid_create_basis_set(const int nset, const int nsgf, const int maxco, const int maxpgf, const int lmin[nset], const int lmax[nset], const int npgf[nset], const int nsgf_set[nset], const int first_sgf[nset], const double sphi[nsgf][maxco], const double zet[nset][maxpgf], grid_basis_set **basis_set_out)
Allocates a basis set which can be passed to grid_create_task_list. See grid_task_list....
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
All kind of helpful little routines.
real(kind=dp) function, public exp_radius_very_extended(la_min, la_max, lb_min, lb_max, pab, o1, o2, ra, rb, rp, zetp, eps, prefactor, cutoff, epsabs)
computes the radius of the Gaussian outside of which it is smaller than eps
subroutine, public get_gto_basis_set(gto_basis_set, name, aliases, norm_type, kind_radius, ncgf, nset, nsgf, cgf_symbol, sgf_symbol, norm_cgf, set_radius, lmax, lmin, lx, ly, lz, m, ncgf_set, npgf, nsgf_set, nshell, cphi, pgf_radius, sphi, scon, zet, first_cgf, first_sgf, l, last_cgf, last_sgf, n, gcc, maxco, maxl, maxpgf, maxsgf_set, maxshell, maxso, nco_sum, npgf_sum, nshell_sum, maxder, short_kind_radius, npgf_seg_sum)
...
Handles all functions related to the CELL.
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_work_create(matrix, nblks_guess, sizedata_guess, n, work_mutable)
...
subroutine, public dbcsr_finalize(matrix)
...
subroutine, public dbcsr_put_block(matrix, row, col, block, summation)
...
for a given dr()/dh(r) this will provide the bounds to be used if one wants to go over a sphere-subre...
subroutine, public compute_cube_center(cube_center, rs_desc, zeta, zetb, ra, rab)
unifies the computation of the cube center, so that differences in implementation,...
subroutine, public return_cube(info, radius, lb_cube, ub_cube, sphere_bounds)
...
subroutine, public return_cube_nonortho(info, radius, lb, ub, rp)
...
integer function, public gaussian_gridlevel(gridlevel_info, exponent)
...
Fortran API for the grid package, which is written in C.
subroutine, public grid_create_task_list(ntasks, natoms, nkinds, nblocks, block_offsets, atom_positions, atom_kinds, basis_sets, level_list, iatom_list, jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list, block_num_list, radius_list, rab_list, rs_grids, task_list)
Allocates a task list which can be passed to grid_collocate_task_list.
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
integer, parameter, public default_string_length
Types and basic routines needed for a kpoint calculation.
subroutine, public get_kpoint_info(kpoint, kp_scheme, nkp_grid, kp_shift, symmetry, verbose, full_grid, use_real_wfn, eps_geo, parallel_group_size, kp_range, nkp, xkp, wkp, para_env, blacs_env_all, para_env_kp, para_env_inter_kp, blacs_env, kp_env, kp_aux_env, mpools, iogrp, nkp_groups, kp_dist, cell_to_index, index_to_cell, sab_nl, sab_nl_nosym)
Retrieve information from a kpoint environment.
An array-based list which grows on demand. When the internal array is full, a new array of twice the ...
Utility routines for the memory handling.
Interface to the message passing library MPI.
Fortran API for the offload package, which is written in C.
subroutine, public offload_create_buffer(length, buffer)
Allocates a buffer of given length, ie. number of elements.
Define methods related to particle_type.
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Define the data structure for the particle information.
container for various plainwaves related things
subroutine, public pw_env_get(pw_env, pw_pools, cube_info, gridlevel_info, auxbas_pw_pool, auxbas_grid, auxbas_rs_desc, auxbas_rs_grid, rs_descs, rs_grids, xc_pw_pool, vdw_pw_pool, poisson_env, interp_section)
returns the various attributes of the pw env
Define the quickstep kind type and their sub types.
subroutine, public get_qs_kind(qs_kind, basis_set, basis_type, ncgf, nsgf, all_potential, tnadd_potential, gth_potential, sgp_potential, upf_potential, cneo_potential, se_parameter, dftb_parameter, xtb_parameter, dftb3_param, zatom, zeff, elec_conf, mao, lmax_dftb, alpha_core_charge, ccore_charge, core_charge, core_charge_radius, paw_proj_set, paw_atom, hard_radius, hard0_radius, max_rad_local, covalent_radius, vdw_radius, gpw_type_forced, harmonics, max_iso_not0, max_s_harm, grid_atom, ngrid_ang, ngrid_rad, lmax_rho0, dft_plus_u_atom, l_of_dft_plus_u, n_of_dft_plus_u, u_minus_j, u_of_dft_plus_u, j_of_dft_plus_u, alpha_of_dft_plus_u, beta_of_dft_plus_u, j0_of_dft_plus_u, occupation_of_dft_plus_u, dispersion, bs_occupation, magnetization, no_optimize, addel, laddel, naddel, orbitals, max_scf, eps_scf, smear, u_ramping, u_minus_j_target, eps_u_ramping, init_u_ramping_each_scf, reltmat, ghost, monovalent, floating, name, element_symbol, pao_basis_size, pao_model_file, pao_potentials, pao_descriptors, nelec)
Get attributes of an atomic kind.
subroutine, public get_ks_env(ks_env, v_hartree_rspace, s_mstruct_changed, rho_changed, exc_accint, potential_changed, forces_up_to_date, complex_ks, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, kinetic, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_ks_im_kp, rho, rho_xc, vppl, xcint_weights, rho_core, rho_nlcc, rho_nlcc_g, vee, neighbor_list_id, sab_orb, sab_all, sac_ae, sac_ppl, sac_lri, sap_ppnl, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_vdw, sab_scp, sab_almo, sab_kp, sab_kp_nosym, sab_cneo, task_list, task_list_soft, kpoints, do_kpoints, atomic_kind_set, qs_kind_set, cell, cell_ref, use_ref_cell, particle_set, energy, force, local_particles, local_molecules, molecule_kind_set, molecule_set, subsys, cp_subsys, virial, results, atprop, nkind, natom, dft_control, dbcsr_dist, distribution_2d, pw_env, para_env, blacs_env, nelectron_total, nelectron_spin)
...
Define the neighbor list data types and the corresponding functionality.
subroutine, public rs_grid_create(rs, desc)
...
pure integer function, public rs_grid_locate_rank(rs_desc, rank_in, shift)
returns the 1D rank of the task which is a cartesian shift away from 1D rank rank_in only possible if...
pure subroutine, public rs_grid_reorder_ranks(desc, real2virtual)
Defines a new ordering of ranks on this realspace grid, recalculating the data bounds and reallocatin...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
generate the tasks lists used by collocate and integrate routines
subroutine, public rs_copy_to_matrices(src_buffer, dest_matrices, task_list)
Copies from buffer into DBCSR matrics, replaces rs_gather_matrix for non-distributed grids.
subroutine, public rs_scatter_matrices(src_matrices, dest_buffer, task_list, group)
Scatters dbcsr matrix blocks and receives them into a buffer as needed before collocation.
subroutine, public rs_distribute_matrix(rs_descs, pmats, atom_pair_send, atom_pair_recv, nimages, scatter, hmats)
redistributes the matrix so that it can be used in realspace operations i.e. according to the task li...
subroutine, public distribute_tasks(rs_descs, ntasks, natoms, tasks, atom_pair_send, atom_pair_recv, symmetric, reorder_rs_grid_ranks, skip_load_balance_distributed)
Assembles tasks to be performed on local grid.
subroutine, public rs_gather_matrices(src_buffer, dest_matrices, task_list, group)
Gather the dbcsr matrix blocks and receives them into a buffer as needed after integration.
subroutine, public task_list_inner_loop(tasks, ntasks, curr_tasks, rs_descs, dft_control, cube_info, gridlevel_info, cindex, iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, set_radius_a, set_radius_b, ra, rab, la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
...
subroutine, public generate_qs_task_list(ks_env, task_list, basis_type, reorder_rs_grid_ranks, skip_load_balance_distributed, pw_env_external, sab_orb_external, ext_kpoints)
...
subroutine, public rs_copy_to_buffer(src_matrices, dest_buffer, task_list)
Copies the DBCSR blocks into buffer, replaces rs_scatter_matrix for non-distributed grids.
subroutine, public serialize_task(task, serialized_task)
Serialize a task into an integer array. Used for MPI communication.
subroutine, public deserialize_task(task, serialized_task)
De-serialize a task from an integer array. Used for MPI communication.
subroutine, public reallocate_tasks(tasks, new_size)
Grow an array of tasks while preserving the existing entries.
integer, parameter, public task_size_in_int8
All kind of helpful little routines.
Type defining parameters related to the simulation cell.
Contains information about kpoints.
contained for different pw related things
Provides all information about a quickstep kind.
calculation environment to calculate the ks matrix, holds all the needed vars. assumes that the core ...