28 USE cp_dbcsr_api,
ONLY: dbcsr_convert_sizes_to_offsets, &
72#include "./base/base_uses.f90"
77 LOGICAL,
PRIVATE,
PARAMETER :: debug_this_module = .false.
81 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'task_list_methods'
117 reorder_rs_grid_ranks, skip_load_balance_distributed, &
118 soft_valid, basis_type, pw_env_external, sab_orb_external)
122 LOGICAL,
INTENT(IN) :: reorder_rs_grid_ranks, &
123 skip_load_balance_distributed
124 LOGICAL,
INTENT(IN),
OPTIONAL :: soft_valid
125 CHARACTER(LEN=*),
INTENT(IN),
OPTIONAL :: basis_type
126 TYPE(
pw_env_type),
OPTIONAL,
POINTER :: pw_env_external
128 OPTIONAL,
POINTER :: sab_orb_external
130 CHARACTER(LEN=*),
PARAMETER :: routinen =
'generate_qs_task_list'
131 INTEGER,
PARAMETER :: max_tasks = 2000
133 CHARACTER(LEN=default_string_length) :: my_basis_type
134 INTEGER :: cindex, curr_tasks, handle, i, iatom, iatom_old, igrid_level, igrid_level_old, &
135 ikind, ilevel, img, img_old, ipair, ipgf, iset, itask, jatom, jatom_old, jkind, jpgf, &
136 jset, maxpgf, maxset, natoms, nimages, nkind, nseta, nsetb, slot
137 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: blocks
138 INTEGER,
DIMENSION(3) :: cellind
139 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, npgfa, &
141 INTEGER,
DIMENSION(:, :, :),
POINTER :: cell_to_index
142 LOGICAL :: dokp, my_soft
143 REAL(kind=
dp) :: kind_radius_a, kind_radius_b
144 REAL(kind=
dp),
DIMENSION(3) :: ra, rab
145 REAL(kind=
dp),
DIMENSION(:),
POINTER :: set_radius_a, set_radius_b
146 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: rpgfa, rpgfb, zeta, zetb
158 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
163 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
165 CALL timeset(routinen, handle)
168 qs_kind_set=qs_kind_set, &
170 particle_set=particle_set, &
171 dft_control=dft_control)
177 IF (
PRESENT(soft_valid)) my_soft = soft_valid
178 IF (
PRESENT(basis_type))
THEN
179 cpassert(.NOT. my_soft)
180 my_basis_type = basis_type
181 ELSEIF (my_soft)
THEN
182 my_basis_type =
"ORB_SOFT"
184 my_basis_type =
"ORB"
188 IF (
PRESENT(sab_orb_external)) sab_orb => sab_orb_external
191 IF (
PRESENT(pw_env_external)) pw_env => pw_env_external
192 CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_grids)
195 gridlevel_info => pw_env%gridlevel_info
196 cube_info => pw_env%cube_info
199 nkind =
SIZE(qs_kind_set)
200 natoms =
SIZE(particle_set)
204 qs_kind => qs_kind_set(ikind)
206 basis_set=orb_basis_set, basis_type=my_basis_type)
208 IF (.NOT.
ASSOCIATED(orb_basis_set)) cycle
211 maxset = max(nseta, maxset)
212 maxpgf = max(maxval(npgfa), maxpgf)
216 nimages = dft_control%nimages
217 IF (nimages > 1)
THEN
220 CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
224 NULLIFY (cell_to_index)
228 IF (
ASSOCIATED(task_list%atom_pair_send))
DEALLOCATE (task_list%atom_pair_send)
229 IF (
ASSOCIATED(task_list%atom_pair_recv))
DEALLOCATE (task_list%atom_pair_recv)
232 IF (.NOT.
ASSOCIATED(task_list%tasks))
THEN
236 curr_tasks =
SIZE(task_list%tasks)
238 ALLOCATE (basis_set_list(nkind))
240 qs_kind => qs_kind_set(ikind)
241 CALL get_qs_kind(qs_kind=qs_kind, basis_set=basis_set_a, &
242 basis_type=my_basis_type)
243 IF (
ASSOCIATED(basis_set_a))
THEN
244 basis_set_list(ikind)%gto_basis_set => basis_set_a
246 NULLIFY (basis_set_list(ikind)%gto_basis_set)
258 DO slot = 1, sab_orb(1)%nl_size
259 ikind = sab_orb(1)%nlist_task(slot)%ikind
260 jkind = sab_orb(1)%nlist_task(slot)%jkind
261 iatom = sab_orb(1)%nlist_task(slot)%iatom
262 jatom = sab_orb(1)%nlist_task(slot)%jatom
263 rab(1:3) = sab_orb(1)%nlist_task(slot)%r(1:3)
264 cellind(1:3) = sab_orb(1)%nlist_task(slot)%cell(1:3)
266 basis_set_a => basis_set_list(ikind)%gto_basis_set
267 IF (.NOT.
ASSOCIATED(basis_set_a)) cycle
268 basis_set_b => basis_set_list(jkind)%gto_basis_set
269 IF (.NOT.
ASSOCIATED(basis_set_b)) cycle
270 ra(:) =
pbc(particle_set(iatom)%r, cell)
272 la_max => basis_set_a%lmax
273 la_min => basis_set_a%lmin
274 npgfa => basis_set_a%npgf
275 nseta = basis_set_a%nset
276 rpgfa => basis_set_a%pgf_radius
277 set_radius_a => basis_set_a%set_radius
278 kind_radius_a = basis_set_a%kind_radius
279 zeta => basis_set_a%zet
281 lb_max => basis_set_b%lmax
282 lb_min => basis_set_b%lmin
283 npgfb => basis_set_b%npgf
284 nsetb = basis_set_b%nset
285 rpgfb => basis_set_b%pgf_radius
286 set_radius_b => basis_set_b%set_radius
287 kind_radius_b = basis_set_b%kind_radius
288 zetb => basis_set_b%zet
291 cindex = cell_to_index(cellind(1), cellind(2), cellind(3))
297 rs_descs, dft_control, cube_info, gridlevel_info, cindex, &
298 iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, &
299 set_radius_a, set_radius_b, ra, rab, &
300 la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
307 rs_descs=rs_descs, ntasks=task_list%ntasks, natoms=natoms, &
308 tasks=task_list%tasks, atom_pair_send=task_list%atom_pair_send, &
309 atom_pair_recv=task_list%atom_pair_recv, symmetric=.true., &
310 reorder_rs_grid_ranks=reorder_rs_grid_ranks, &
311 skip_load_balance_distributed=skip_load_balance_distributed)
314 ALLOCATE (nsgf(natoms))
315 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_list, nsgf=nsgf)
316 IF (
ASSOCIATED(task_list%atom_pair_send))
THEN
318 CALL rs_calc_offsets(pairs=task_list%atom_pair_send, &
320 group_size=rs_descs(1)%rs_desc%group_size, &
321 pair_offsets=task_list%pair_offsets_send, &
322 rank_offsets=task_list%rank_offsets_send, &
323 rank_sizes=task_list%rank_sizes_send, &
324 buffer_size=task_list%buffer_size_send)
326 CALL rs_calc_offsets(pairs=task_list%atom_pair_recv, &
328 group_size=rs_descs(1)%rs_desc%group_size, &
329 pair_offsets=task_list%pair_offsets_recv, &
330 rank_offsets=task_list%rank_offsets_recv, &
331 rank_sizes=task_list%rank_sizes_recv, &
332 buffer_size=task_list%buffer_size_recv)
333 DEALLOCATE (basis_set_list, nsgf)
336 IF (reorder_rs_grid_ranks)
THEN
337 DO i = 1, gridlevel_info%ngrid_levels
338 IF (rs_descs(i)%rs_desc%distributed)
THEN
345 CALL create_grid_task_list(task_list=task_list, &
346 qs_kind_set=qs_kind_set, &
347 particle_set=particle_set, &
349 basis_type=my_basis_type, &
355 IF (
ASSOCIATED(task_list%taskstart))
THEN
356 DEALLOCATE (task_list%taskstart)
358 IF (
ASSOCIATED(task_list%taskstop))
THEN
359 DEALLOCATE (task_list%taskstop)
361 IF (
ASSOCIATED(task_list%npairs))
THEN
362 DEALLOCATE (task_list%npairs)
367 ALLOCATE (task_list%npairs(
SIZE(rs_descs)))
369 iatom_old = -1; jatom_old = -1; igrid_level_old = -1; img_old = -1
373 DO i = 1, task_list%ntasks
374 igrid_level = task_list%tasks(i)%grid_level
375 img = task_list%tasks(i)%image
376 iatom = task_list%tasks(i)%iatom
377 jatom = task_list%tasks(i)%jatom
378 iset = task_list%tasks(i)%iset
379 jset = task_list%tasks(i)%jset
380 ipgf = task_list%tasks(i)%ipgf
381 jpgf = task_list%tasks(i)%jpgf
382 IF (igrid_level .NE. igrid_level_old)
THEN
383 IF (igrid_level_old .NE. -1)
THEN
384 task_list%npairs(igrid_level_old) = ipair
387 igrid_level_old = igrid_level
391 ELSE IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old .OR. img .NE. img_old)
THEN
399 IF (task_list%ntasks /= 0)
THEN
400 task_list%npairs(igrid_level) = ipair
407 ALLOCATE (task_list%taskstart(maxval(task_list%npairs),
SIZE(rs_descs)))
408 ALLOCATE (task_list%taskstop(maxval(task_list%npairs),
SIZE(rs_descs)))
410 iatom_old = -1; jatom_old = -1; igrid_level_old = -1; img_old = -1
412 task_list%taskstart = 0
413 task_list%taskstop = 0
415 DO i = 1, task_list%ntasks
416 igrid_level = task_list%tasks(i)%grid_level
417 img = task_list%tasks(i)%image
418 iatom = task_list%tasks(i)%iatom
419 jatom = task_list%tasks(i)%jatom
420 iset = task_list%tasks(i)%iset
421 jset = task_list%tasks(i)%jset
422 ipgf = task_list%tasks(i)%ipgf
423 jpgf = task_list%tasks(i)%jpgf
424 IF (igrid_level .NE. igrid_level_old)
THEN
425 IF (igrid_level_old .NE. -1)
THEN
426 task_list%taskstop(ipair, igrid_level_old) = i - 1
429 task_list%taskstart(ipair, igrid_level) = i
430 igrid_level_old = igrid_level
434 ELSE IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old .OR. img .NE. img_old)
THEN
436 task_list%taskstart(ipair, igrid_level) = i
437 task_list%taskstop(ipair - 1, igrid_level) = i - 1
444 IF (task_list%ntasks /= 0)
THEN
445 task_list%taskstop(ipair, igrid_level) = task_list%ntasks
449 IF (debug_this_module)
THEN
450 tasks => task_list%tasks
452 WRITE (6, *)
"Total number of tasks ", task_list%ntasks
453 DO igrid_level = 1, gridlevel_info%ngrid_levels
454 WRITE (6, *)
"Total number of pairs(grid_level) ", &
455 igrid_level, task_list%npairs(igrid_level)
459 DO igrid_level = 1, gridlevel_info%ngrid_levels
461 ALLOCATE (blocks(natoms, natoms, nimages))
463 DO ipair = 1, task_list%npairs(igrid_level)
464 itask = task_list%taskstart(ipair, igrid_level)
465 ilevel = task_list%tasks(itask)%grid_level
466 img = task_list%tasks(itask)%image
467 iatom = task_list%tasks(itask)%iatom
468 jatom = task_list%tasks(itask)%jatom
469 iset = task_list%tasks(itask)%iset
470 jset = task_list%tasks(itask)%jset
471 ipgf = task_list%tasks(itask)%ipgf
472 jpgf = task_list%tasks(itask)%jpgf
473 IF (blocks(iatom, jatom, img) == -1 .AND. blocks(jatom, iatom, img) == -1)
THEN
474 blocks(iatom, jatom, img) = 1
475 blocks(jatom, iatom, img) = 1
477 WRITE (6, *)
"TASK LIST CONFLICT IN PAIR ", ipair
478 WRITE (6, *)
"Reuse of iatom, jatom, image ", iatom, jatom, img
484 DO itask = task_list%taskstart(ipair, igrid_level), task_list%taskstop(ipair, igrid_level)
485 ilevel = task_list%tasks(itask)%grid_level
486 img = task_list%tasks(itask)%image
487 iatom = task_list%tasks(itask)%iatom
488 jatom = task_list%tasks(itask)%jatom
489 iset = task_list%tasks(itask)%iset
490 jset = task_list%tasks(itask)%jset
491 ipgf = task_list%tasks(itask)%ipgf
492 jpgf = task_list%tasks(itask)%jpgf
493 IF (iatom /= iatom_old .OR. jatom /= jatom_old .OR. img /= img_old)
THEN
494 WRITE (6, *)
"TASK LIST CONFLICT IN TASK ", itask
495 WRITE (6, *)
"Inconsistent iatom, jatom, image ", iatom, jatom, img
496 WRITE (6, *)
"Should be iatom, jatom, image ", iatom_old, jatom_old, img_old
507 CALL timestop(handle)
515 SUBROUTINE create_grid_task_list(task_list, qs_kind_set, particle_set, cell, basis_type, rs_grids)
517 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
520 CHARACTER(LEN=default_string_length) :: basis_type
524 INTEGER :: nset, natoms, nkinds, ntasks, &
525 ikind, iatom, itask, nsgf
526 INTEGER,
DIMENSION(:),
ALLOCATABLE :: atom_kinds, level_list, iatom_list, jatom_list, &
527 iset_list, jset_list, ipgf_list, jpgf_list, &
528 border_mask_list, block_num_list
529 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: radius_list
530 REAL(kind=
dp),
DIMENSION(:, :),
ALLOCATABLE :: rab_list, atom_positions
531 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
532 INTEGER,
DIMENSION(:, :),
POINTER :: first_sgf
533 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: sphi, zet
534 INTEGER,
DIMENSION(:),
POINTER :: lmax, lmin, npgf, nsgf_set
536 nkinds =
SIZE(qs_kind_set)
537 natoms =
SIZE(particle_set)
538 ntasks = task_list%ntasks
539 tasks => task_list%tasks
541 IF (.NOT.
ASSOCIATED(task_list%grid_basis_sets))
THEN
543 ALLOCATE (task_list%grid_basis_sets(nkinds))
545 CALL get_qs_kind(qs_kind_set(ikind), basis_type=basis_type, basis_set=orb_basis_set)
551 first_sgf=first_sgf, &
558 maxco=
SIZE(sphi, 1), &
559 maxpgf=
SIZE(zet, 1), &
564 first_sgf=first_sgf, &
567 basis_set=task_list%grid_basis_sets(ikind))
572 ALLOCATE (atom_kinds(natoms), atom_positions(3, natoms))
574 atom_kinds(iatom) = particle_set(iatom)%atomic_kind%kind_number
575 atom_positions(:, iatom) =
pbc(particle_set(iatom)%r, cell)
578 ALLOCATE (level_list(ntasks), iatom_list(ntasks), jatom_list(ntasks))
579 ALLOCATE (iset_list(ntasks), jset_list(ntasks), ipgf_list(ntasks), jpgf_list(ntasks))
580 ALLOCATE (border_mask_list(ntasks), block_num_list(ntasks))
581 ALLOCATE (radius_list(ntasks), rab_list(3, ntasks))
584 level_list(itask) = tasks(itask)%grid_level
585 iatom_list(itask) = tasks(itask)%iatom
586 jatom_list(itask) = tasks(itask)%jatom
587 iset_list(itask) = tasks(itask)%iset
588 jset_list(itask) = tasks(itask)%jset
589 ipgf_list(itask) = tasks(itask)%ipgf
590 jpgf_list(itask) = tasks(itask)%jpgf
591 IF (tasks(itask)%dist_type == 2)
THEN
592 border_mask_list(itask) = iand(63, not(tasks(itask)%subpatch_pattern))
594 border_mask_list(itask) = 0
596 block_num_list(itask) = tasks(itask)%pair_index
597 radius_list(itask) = tasks(itask)%radius
598 rab_list(:, itask) = tasks(itask)%rab(:)
604 nblocks=
SIZE(task_list%pair_offsets_recv), &
605 block_offsets=task_list%pair_offsets_recv, &
606 atom_positions=atom_positions, &
607 atom_kinds=atom_kinds, &
608 basis_sets=task_list%grid_basis_sets, &
609 level_list=level_list, &
610 iatom_list=iatom_list, &
611 jatom_list=jatom_list, &
612 iset_list=iset_list, &
613 jset_list=jset_list, &
614 ipgf_list=ipgf_list, &
615 jpgf_list=jpgf_list, &
616 border_mask_list=border_mask_list, &
617 block_num_list=block_num_list, &
618 radius_list=radius_list, &
621 task_list=task_list%grid_task_list)
626 END SUBROUTINE create_grid_task_list
661 cube_info, gridlevel_info, cindex, &
662 iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, set_radius_a, set_radius_b, ra, rab, &
663 la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
665 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
666 INTEGER :: ntasks, curr_tasks
672 INTEGER :: cindex, iatom, jatom
673 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: rpgfa, rpgfb, zeta, zetb
674 REAL(kind=
dp) :: kind_radius_b
675 REAL(kind=
dp),
DIMENSION(:),
POINTER :: set_radius_a, set_radius_b
676 REAL(kind=
dp),
DIMENSION(3) :: ra, rab
677 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, npgfa, &
679 INTEGER :: nseta, nsetb
681 INTEGER :: cube_center(3), igrid_level, ipgf, iset, &
682 jpgf, jset, lb_cube(3), ub_cube(3)
683 REAL(kind=
dp) :: dab, rab2, radius, zetp
685 rab2 = rab(1)*rab(1) + rab(2)*rab(2) + rab(3)*rab(3)
688 loop_iset:
DO iset = 1, nseta
690 IF (set_radius_a(iset) + kind_radius_b < dab) cycle
692 loop_jset:
DO jset = 1, nsetb
694 IF (set_radius_a(iset) + set_radius_b(jset) < dab) cycle
696 loop_ipgf:
DO ipgf = 1, npgfa(iset)
698 IF (rpgfa(ipgf, iset) + set_radius_b(jset) < dab) cycle
700 loop_jpgf:
DO jpgf = 1, npgfb(jset)
702 IF (rpgfa(ipgf, iset) + rpgfb(jpgf, jset) < dab) cycle
704 zetp = zeta(ipgf, iset) + zetb(jpgf, jset)
707 CALL compute_pgf_properties(cube_center, lb_cube, ub_cube, radius, &
708 rs_descs(igrid_level)%rs_desc, cube_info(igrid_level), &
709 la_max(iset), zeta(ipgf, iset), la_min(iset), &
710 lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
711 ra, rab, rab2, dft_control%qs_control%eps_rho_rspace)
713 CALL pgf_to_tasks(tasks, ntasks, curr_tasks, &
714 rab, cindex, iatom, jatom, iset, jset, ipgf, jpgf, &
715 la_max(iset), lb_max(jset), rs_descs(igrid_level)%rs_desc, &
716 igrid_level, gridlevel_info%ngrid_levels, cube_center, &
717 lb_cube, ub_cube, radius)
763 SUBROUTINE compute_pgf_properties(cube_center, lb_cube, ub_cube, radius, &
764 rs_desc, cube_info, la_max, zeta, la_min, lb_max, zetb, lb_min, ra, rab, rab2, eps)
766 INTEGER,
DIMENSION(3),
INTENT(OUT) :: cube_center, lb_cube, ub_cube
767 REAL(kind=
dp),
INTENT(OUT) :: radius
770 INTEGER,
INTENT(IN) :: la_max
771 REAL(kind=
dp),
INTENT(IN) :: zeta
772 INTEGER,
INTENT(IN) :: la_min, lb_max
773 REAL(kind=
dp),
INTENT(IN) :: zetb
774 INTEGER,
INTENT(IN) :: lb_min
775 REAL(kind=
dp),
INTENT(IN) :: ra(3), rab(3), rab2, eps
778 INTEGER,
DIMENSION(:),
POINTER :: sphere_bounds
779 REAL(kind=
dp) :: cutoff, f, prefactor, rb(3), zetp
780 REAL(kind=
dp),
DIMENSION(3) :: rp
785 rp(:) = ra(:) + zetb/zetp*rab(:)
786 rb(:) = ra(:) + rab(:)
789 prefactor = exp(-zeta*f*rab2)
791 zetp=zetp, eps=eps, prefactor=prefactor, cutoff=cutoff)
795 cube_center(:) =
modulo(cube_center(:), rs_desc%npts(:))
796 cube_center(:) = cube_center(:) + rs_desc%lb(:)
798 IF (rs_desc%orthorhombic)
THEN
799 CALL return_cube(cube_info, radius, lb_cube, ub_cube, sphere_bounds)
803 extent(:) = ub_cube(:) - lb_cube(:)
804 lb_cube(:) = -extent(:)/2 - 1
805 ub_cube(:) = extent(:)/2
808 END SUBROUTINE compute_pgf_properties
824 INTEGER FUNCTION cost_model(lb_cube, ub_cube, fraction, lmax, is_ortho)
825 INTEGER,
DIMENSION(3),
INTENT(IN) :: lb_cube, ub_cube
826 REAL(kind=
dp),
INTENT(IN) :: fraction
831 REAL(kind=
dp) :: v1, v2, v3, v4, v5
833 cmax = maxval(((ub_cube - lb_cube) + 1)/2)
848 cost_model = ceiling(((lmax + v1)*(cmax + v2)**3*v3*fraction + v4 + v5*lmax**7)/1000.0_dp)
850 END FUNCTION cost_model
884 SUBROUTINE pgf_to_tasks(tasks, ntasks, curr_tasks, &
885 rab, cindex, iatom, jatom, iset, jset, ipgf, jpgf, &
886 la_max, lb_max, rs_desc, igrid_level, n_levels, &
887 cube_center, lb_cube, ub_cube, radius)
889 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
890 INTEGER,
INTENT(INOUT) :: ntasks, curr_tasks
891 REAL(kind=
dp),
DIMENSION(3),
INTENT(IN) :: rab
892 INTEGER,
INTENT(IN) :: cindex, iatom, jatom, iset, jset, ipgf, &
895 INTEGER,
INTENT(IN) :: igrid_level, n_levels
896 INTEGER,
DIMENSION(3),
INTENT(IN) :: cube_center, lb_cube, ub_cube
897 REAL(kind=
dp),
INTENT(IN) :: radius
899 INTEGER,
PARAMETER :: add_tasks = 1000
900 REAL(kind=
dp),
PARAMETER :: mult_tasks = 2.0_dp
902 INTEGER :: added_tasks, cost, j, lmax
904 REAL(kind=
dp) :: tfraction
908 IF (ntasks > curr_tasks)
THEN
909 curr_tasks = int((curr_tasks + add_tasks)*mult_tasks)
914 IF (rs_desc%distributed)
THEN
918 CALL rs_find_node(rs_desc, igrid_level, n_levels, cube_center, &
919 ntasks=ntasks, tasks=tasks, lb_cube=lb_cube, ub_cube=ub_cube, added_tasks=added_tasks)
922 tasks(ntasks)%destination = encode_rank(rs_desc%my_pos, igrid_level, n_levels)
923 tasks(ntasks)%dist_type = 0
924 tasks(ntasks)%subpatch_pattern = 0
928 lmax = la_max + lb_max
929 is_ortho = (tasks(ntasks)%dist_type == 0 .OR. tasks(ntasks)%dist_type == 1) .AND. rs_desc%orthorhombic
932 tfraction = 1.0_dp/added_tasks
934 cost = cost_model(lb_cube, ub_cube, tfraction, lmax, is_ortho)
936 DO j = 1, added_tasks
937 tasks(ntasks - added_tasks + j)%source = encode_rank(rs_desc%my_pos, igrid_level, n_levels)
938 tasks(ntasks - added_tasks + j)%cost = cost
939 tasks(ntasks - added_tasks + j)%grid_level = igrid_level
940 tasks(ntasks - added_tasks + j)%image = cindex
941 tasks(ntasks - added_tasks + j)%iatom = iatom
942 tasks(ntasks - added_tasks + j)%jatom = jatom
943 tasks(ntasks - added_tasks + j)%iset = iset
944 tasks(ntasks - added_tasks + j)%jset = jset
945 tasks(ntasks - added_tasks + j)%ipgf = ipgf
946 tasks(ntasks - added_tasks + j)%jpgf = jpgf
947 tasks(ntasks - added_tasks + j)%rab = rab
948 tasks(ntasks - added_tasks + j)%radius = radius
951 END SUBROUTINE pgf_to_tasks
963 SUBROUTINE load_balance_distributed(tasks, ntasks, rs_descs, grid_level, natoms)
965 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
969 INTEGER :: grid_level, natoms
971 CHARACTER(LEN=*),
PARAMETER :: routinen =
'load_balance_distributed'
974 INTEGER,
DIMENSION(:, :, :),
POINTER ::
list
976 CALL timeset(routinen, handle)
981 CALL create_destination_list(
list, rs_descs, grid_level)
984 CALL compute_load_list(
list, rs_descs, grid_level, tasks, ntasks, natoms, create_list=.true.)
987 CALL optimize_load_list(
list, rs_descs(1)%rs_desc%group, rs_descs(1)%rs_desc%my_pos)
990 CALL compute_load_list(
list, rs_descs, grid_level, tasks, ntasks, natoms, create_list=.false.)
994 CALL timestop(handle)
996 END SUBROUTINE load_balance_distributed
1005 SUBROUTINE balance_global_list(list_global)
1006 INTEGER,
DIMENSION(:, :, 0:) :: list_global
1008 CHARACTER(LEN=*),
PARAMETER :: routinen =
'balance_global_list'
1009 INTEGER,
PARAMETER :: max_iter = 100
1010 REAL(kind=
dp),
PARAMETER :: tolerance_factor = 0.005_dp
1012 INTEGER :: dest, handle, icpu, idest, iflux, &
1013 ilocal, k, maxdest, ncpu, nflux
1014 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: flux_connections
1015 LOGICAL :: solution_optimal
1016 REAL(kind=
dp) :: average, load_shift, max_load_shift, &
1018 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: load, optimized_flux, optimized_load
1019 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: flux_limits
1021 CALL timeset(routinen, handle)
1023 ncpu =
SIZE(list_global, 3)
1024 maxdest =
SIZE(list_global, 2)
1025 ALLOCATE (load(0:ncpu - 1))
1027 ALLOCATE (optimized_load(0:ncpu - 1))
1032 DO icpu = 0, ncpu - 1
1033 DO idest = 1, maxdest
1034 dest = list_global(1, idest, icpu)
1035 IF (dest < ncpu .AND. dest > icpu) nflux = nflux + 1
1038 ALLOCATE (optimized_flux(nflux))
1039 ALLOCATE (flux_limits(2, nflux))
1040 ALLOCATE (flux_connections(2, nflux))
1045 DO icpu = 0, ncpu - 1
1046 load(icpu) = sum(list_global(2, :, icpu))
1047 DO idest = 1, maxdest
1048 dest = list_global(1, idest, icpu)
1049 IF (dest < ncpu)
THEN
1050 IF (dest .NE. icpu)
THEN
1051 IF (dest > icpu)
THEN
1053 flux_limits(2, nflux) = list_global(2, idest, icpu)
1054 flux_connections(1, nflux) = icpu
1055 flux_connections(2, nflux) = dest
1058 IF (flux_connections(1, iflux) == dest .AND. flux_connections(2, iflux) == icpu)
THEN
1059 flux_limits(1, iflux) = -list_global(2, idest, icpu)
1069 solution_optimal = .false.
1070 optimized_flux = 0.0_dp
1077 average = sum(load)/
SIZE(load)
1078 tolerance = tolerance_factor*average
1080 optimized_load(:) = load
1082 max_load_shift = 0.0_dp
1084 load_shift = (optimized_load(flux_connections(1, iflux)) - optimized_load(flux_connections(2, iflux)))/2
1085 load_shift = max(flux_limits(1, iflux) - optimized_flux(iflux), load_shift)
1086 load_shift = min(flux_limits(2, iflux) - optimized_flux(iflux), load_shift)
1087 max_load_shift = max(abs(load_shift), max_load_shift)
1088 optimized_load(flux_connections(1, iflux)) = optimized_load(flux_connections(1, iflux)) - load_shift
1089 optimized_load(flux_connections(2, iflux)) = optimized_load(flux_connections(2, iflux)) + load_shift
1090 optimized_flux(iflux) = optimized_flux(iflux) + load_shift
1092 IF (max_load_shift < tolerance)
THEN
1093 solution_optimal = .true.
1101 DO icpu = 0, ncpu - 1
1102 DO idest = 1, maxdest
1103 IF (list_global(1, idest, icpu) == icpu) ilocal = idest
1105 DO idest = 1, maxdest
1106 dest = list_global(1, idest, icpu)
1107 IF (dest < ncpu)
THEN
1108 IF (dest .NE. icpu)
THEN
1109 IF (dest > icpu)
THEN
1111 IF (optimized_flux(nflux) > 0)
THEN
1112 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1113 list_global(2, idest, icpu) - nint(optimized_flux(nflux))
1114 list_global(2, idest, icpu) = nint(optimized_flux(nflux))
1116 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1117 list_global(2, idest, icpu)
1118 list_global(2, idest, icpu) = 0
1122 IF (flux_connections(1, iflux) == dest .AND. flux_connections(2, iflux) == icpu)
THEN
1123 IF (optimized_flux(iflux) > 0)
THEN
1124 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1125 list_global(2, idest, icpu)
1126 list_global(2, idest, icpu) = 0
1128 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1129 list_global(2, idest, icpu) + nint(optimized_flux(iflux))
1130 list_global(2, idest, icpu) = -nint(optimized_flux(iflux))
1141 CALL timestop(handle)
1143 END SUBROUTINE balance_global_list
1156 SUBROUTINE optimize_load_list(list, group, my_pos)
1157 INTEGER,
DIMENSION(:, :, 0:) ::
list
1159 INTEGER,
INTENT(IN) :: my_pos
1161 CHARACTER(LEN=*),
PARAMETER :: routinen =
'optimize_load_list'
1162 INTEGER,
PARAMETER :: rank_of_root = 0
1164 INTEGER :: handle, icpu, idest, maxdest, ncpu
1165 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: load_all
1166 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: load_partial
1167 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: list_global
1169 CALL timeset(routinen, handle)
1171 ncpu =
SIZE(
list, 3)
1172 maxdest =
SIZE(
list, 2)
1175 ALLOCATE (load_all(maxdest*ncpu))
1176 load_all(:) = reshape(
list(2, :, :), (/maxdest*ncpu/))
1177 CALL group%sum(load_all(:), rank_of_root)
1180 ALLOCATE (list_global(2, maxdest, ncpu))
1181 IF (rank_of_root .EQ. my_pos)
THEN
1182 list_global(1, :, :) =
list(1, :, :)
1183 list_global(2, :, :) = reshape(load_all, (/maxdest, ncpu/))
1184 CALL balance_global_list(list_global)
1186 CALL group%bcast(list_global, rank_of_root)
1189 ALLOCATE (load_partial(maxdest, ncpu))
1191 CALL group%sum_partial(reshape(load_all, (/maxdest, ncpu/)), load_partial(:, :))
1194 DO idest = 1, maxdest
1197 IF (load_partial(idest, icpu) > list_global(2, idest, icpu))
THEN
1198 IF (load_partial(idest, icpu) -
list(2, idest, icpu - 1) < list_global(2, idest, icpu))
THEN
1199 list(2, idest, icpu - 1) = list_global(2, idest, icpu) &
1200 - (load_partial(idest, icpu) -
list(2, idest, icpu - 1))
1202 list(2, idest, icpu - 1) = 0
1210 DEALLOCATE (load_all)
1211 DEALLOCATE (list_global)
1212 DEALLOCATE (load_partial)
1214 CALL timestop(handle)
1215 END SUBROUTINE optimize_load_list
1234 SUBROUTINE compute_load_list(list, rs_descs, grid_level, tasks, ntasks, natoms, create_list)
1235 INTEGER,
DIMENSION(:, :, 0:) ::
list
1238 INTEGER :: grid_level
1239 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1240 INTEGER :: ntasks, natoms
1241 LOGICAL :: create_list
1243 CHARACTER(LEN=*),
PARAMETER :: routinen =
'compute_load_list'
1245 INTEGER :: cost, dest, handle, i, iatom, ilevel, img, img_old, iopt, ipgf, iset, itask, &
1246 itask_start, itask_stop, jatom, jpgf, jset, li, maxdest, ncpu, ndest_pair, nopt, nshort, &
1248 INTEGER(KIND=int_8) :: bit_pattern, ipair, ipair_old, natom8
1249 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: loads
1250 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: all_dests, index
1251 INTEGER,
DIMENSION(6) :: options
1253 CALL timeset(routinen, handle)
1255 ALLOCATE (loads(0:rs_descs(grid_level)%rs_desc%group_size - 1))
1256 CALL get_current_loads(loads, rs_descs, grid_level, ntasks, tasks, use_reordered_ranks=.false.)
1258 maxdest =
SIZE(
list, 2)
1259 ncpu =
SIZE(
list, 3)
1264 ipair_old = huge(ipair_old)
1266 ALLOCATE (all_dests(0))
1272 itask_start = itask_stop + 1
1273 itask_stop = itask_start
1274 IF (itask_stop > ntasks)
EXIT
1275 ilevel = tasks(itask_stop)%grid_level
1276 img_old = tasks(itask_stop)%image
1277 iatom = tasks(itask_stop)%iatom
1278 jatom = tasks(itask_stop)%jatom
1279 iset = tasks(itask_stop)%iset
1280 jset = tasks(itask_stop)%jset
1281 ipgf = tasks(itask_stop)%ipgf
1282 jpgf = tasks(itask_stop)%jpgf
1284 ipair_old = (iatom - 1)*natom8 + (jatom - 1)
1286 IF (itask_stop + 1 > ntasks)
EXIT
1287 ilevel = tasks(itask_stop + 1)%grid_level
1288 img = tasks(itask_stop + 1)%image
1289 iatom = tasks(itask_stop + 1)%iatom
1290 jatom = tasks(itask_stop + 1)%jatom
1291 iset = tasks(itask_stop + 1)%iset
1292 jset = tasks(itask_stop + 1)%jset
1293 ipgf = tasks(itask_stop + 1)%ipgf
1294 jpgf = tasks(itask_stop + 1)%jpgf
1296 ipair = (iatom - 1)*natom8 + (jatom - 1)
1297 IF (ipair == ipair_old .AND. img == img_old)
THEN
1298 itask_stop = itask_stop + 1
1304 nshort = itask_stop - itask_start + 1
1307 DEALLOCATE (all_dests)
1308 ALLOCATE (all_dests(nshort))
1310 ALLOCATE (index(nshort))
1312 ilevel = tasks(itask_start + i - 1)%grid_level
1313 img = tasks(itask_start + i - 1)%image
1314 iatom = tasks(itask_start + i - 1)%iatom
1315 jatom = tasks(itask_start + i - 1)%jatom
1316 iset = tasks(itask_start + i - 1)%iset
1317 jset = tasks(itask_start + i - 1)%jset
1318 ipgf = tasks(itask_start + i - 1)%ipgf
1319 jpgf = tasks(itask_start + i - 1)%jpgf
1321 IF (ilevel .EQ. grid_level)
THEN
1322 all_dests(i) = decode_rank(tasks(itask_start + i - 1)%destination,
SIZE(rs_descs))
1324 all_dests(i) = huge(all_dests(i))
1327 CALL sort(all_dests, nshort, index)
1330 IF ((all_dests(ndest_pair) .NE. all_dests(i)) .AND. (all_dests(i) .NE. huge(all_dests(i))))
THEN
1331 ndest_pair = ndest_pair + 1
1332 all_dests(ndest_pair) = all_dests(i)
1336 DO itask = itask_start, itask_stop
1338 dest = decode_rank(tasks(itask)%destination,
SIZE(rs_descs))
1339 ilevel = tasks(itask)%grid_level
1340 img = tasks(itask)%image
1341 iatom = tasks(itask)%iatom
1342 jatom = tasks(itask)%jatom
1343 iset = tasks(itask)%iset
1344 jset = tasks(itask)%jset
1345 ipgf = tasks(itask)%ipgf
1346 jpgf = tasks(itask)%jpgf
1349 IF (ilevel .NE. grid_level) cycle
1350 ipair = (iatom - 1)*natom8 + (jatom - 1)
1351 cost = int(tasks(itask)%cost)
1353 SELECT CASE (tasks(itask)%dist_type)
1355 bit_pattern = tasks(itask)%subpatch_pattern
1357 IF (btest(bit_pattern, 0))
THEN
1359 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1361 options(nopt) = rank
1364 IF (btest(bit_pattern, 1))
THEN
1366 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1368 options(nopt) = rank
1371 IF (btest(bit_pattern, 2))
THEN
1373 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1375 options(nopt) = rank
1378 IF (btest(bit_pattern, 3))
THEN
1380 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1382 options(nopt) = rank
1385 IF (btest(bit_pattern, 4))
THEN
1387 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1389 options(nopt) = rank
1392 IF (btest(bit_pattern, 5))
THEN
1394 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1396 options(nopt) = rank
1403 IF (loads(rank) > loads(options(iopt))) rank = options(iopt)
1408 li = list_index(
list, rank, dest)
1409 IF (create_list)
THEN
1410 list(2, li, dest) =
list(2, li, dest) + cost
1412 IF (
list(1, li, dest) == dest)
THEN
1413 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1415 IF (
list(2, li, dest) >= cost)
THEN
1416 list(2, li, dest) =
list(2, li, dest) - cost
1417 tasks(itask)%destination = encode_rank(
list(1, li, dest), ilevel,
SIZE(rs_descs))
1419 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1424 li = list_index(
list, dest, dest)
1425 IF (create_list)
THEN
1426 list(2, li, dest) =
list(2, li, dest) + cost
1428 IF (
list(1, li, dest) == dest)
THEN
1429 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1431 IF (
list(2, li, dest) >= cost)
THEN
1432 list(2, li, dest) =
list(2, li, dest) - cost
1433 tasks(itask)%destination = encode_rank(
list(1, li, dest), ilevel,
SIZE(rs_descs))
1435 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1447 CALL timestop(handle)
1449 END SUBROUTINE compute_load_list
1460 INTEGER FUNCTION list_index(list, rank, dest)
1461 INTEGER,
DIMENSION(:, :, 0:),
INTENT(IN) ::
list
1462 INTEGER,
INTENT(IN) :: rank, dest
1466 IF (
list(1, list_index, dest) == rank)
EXIT
1467 list_index = list_index + 1
1469 END FUNCTION list_index
1480 SUBROUTINE create_destination_list(list, rs_descs, grid_level)
1481 INTEGER,
DIMENSION(:, :, :),
POINTER ::
list
1484 INTEGER,
INTENT(IN) :: grid_level
1486 CHARACTER(LEN=*),
PARAMETER :: routinen =
'create_destination_list'
1488 INTEGER :: handle, i, icpu, j, maxcount, ncpu, &
1490 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: index, sublist
1492 CALL timeset(routinen, handle)
1494 cpassert(.NOT.
ASSOCIATED(
list))
1495 ncpu = rs_descs(grid_level)%rs_desc%group_size
1498 ALLOCATE (
list(2, ultimate_max, 0:ncpu - 1))
1500 ALLOCATE (index(ultimate_max))
1501 ALLOCATE (sublist(ultimate_max))
1502 sublist = huge(sublist)
1505 DO icpu = 0, ncpu - 1
1514 CALL sort(sublist, ultimate_max, index)
1517 IF (sublist(i) .NE. sublist(j))
THEN
1519 sublist(j) = sublist(i)
1522 maxcount = max(maxcount, j)
1523 sublist(j + 1:ultimate_max) = huge(sublist)
1524 list(1, :, icpu) = sublist
1525 list(2, :, icpu) = 0
1530 CALL timestop(handle)
1532 END SUBROUTINE create_destination_list
1548 SUBROUTINE get_current_loads(loads, rs_descs, grid_level, ntasks, tasks, use_reordered_ranks)
1549 INTEGER(KIND=int_8),
DIMENSION(:) :: loads
1552 INTEGER :: grid_level, ntasks
1553 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1554 LOGICAL,
INTENT(IN) :: use_reordered_ranks
1556 CHARACTER(LEN=*),
PARAMETER :: routinen =
'get_current_loads'
1558 INTEGER :: handle, i, iatom, ilevel, img, ipgf, &
1559 iset, jatom, jpgf, jset
1560 INTEGER(KIND=int_8) :: total_cost_local
1561 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: recv_buf_i, send_buf_i
1564 CALL timeset(routinen, handle)
1566 desc => rs_descs(grid_level)%rs_desc
1569 ALLOCATE (send_buf_i(desc%group_size))
1570 ALLOCATE (recv_buf_i(desc%group_size))
1578 ilevel = tasks(i)%grid_level
1579 img = tasks(i)%image
1580 iatom = tasks(i)%iatom
1581 jatom = tasks(i)%jatom
1582 iset = tasks(i)%iset
1583 jset = tasks(i)%jset
1584 ipgf = tasks(i)%ipgf
1585 jpgf = tasks(i)%jpgf
1586 IF (ilevel .NE. grid_level) cycle
1587 IF (use_reordered_ranks)
THEN
1588 send_buf_i(rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs))) + 1) = &
1589 send_buf_i(rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs))) + 1) &
1592 send_buf_i(decode_rank(tasks(i)%destination,
SIZE(rs_descs)) + 1) = &
1593 send_buf_i(decode_rank(tasks(i)%destination,
SIZE(rs_descs)) + 1) &
1597 CALL desc%group%alltoall(send_buf_i, recv_buf_i, 1)
1600 total_cost_local = sum(recv_buf_i)
1603 CALL desc%group%allgather(total_cost_local, loads)
1605 CALL timestop(handle)
1607 END SUBROUTINE get_current_loads
1619 SUBROUTINE load_balance_replicated(rs_descs, ntasks, tasks)
1624 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1626 CHARACTER(LEN=*),
PARAMETER :: routinen =
'load_balance_replicated'
1628 INTEGER :: handle, i, iatom, ilevel, img, ipgf, &
1629 iset, j, jatom, jpgf, jset, &
1630 no_overloaded, no_underloaded, &
1632 INTEGER(KIND=int_8) :: average_cost, cost_task_rep, count, &
1633 offset, total_cost_global
1634 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: load_imbalance, loads, recv_buf_i
1635 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: index
1638 CALL timeset(routinen, handle)
1640 desc => rs_descs(1)%rs_desc
1643 ALLOCATE (recv_buf_i(desc%group_size))
1644 ALLOCATE (loads(desc%group_size))
1647 DO i = 1,
SIZE(rs_descs)
1648 CALL get_current_loads(loads, rs_descs, i, ntasks, tasks, use_reordered_ranks=.true.)
1649 recv_buf_i(:) = recv_buf_i + loads
1652 total_cost_global = sum(recv_buf_i)
1653 average_cost = total_cost_global/desc%group_size
1661 ALLOCATE (load_imbalance(desc%group_size))
1662 ALLOCATE (index(desc%group_size))
1664 load_imbalance(:) = recv_buf_i - average_cost
1668 DO i = 1, desc%group_size
1669 IF (load_imbalance(i) .GT. 0) no_overloaded = no_overloaded + 1
1670 IF (load_imbalance(i) .LT. 0) no_underloaded = no_underloaded + 1
1675 CALL sort(recv_buf_i,
SIZE(recv_buf_i), index)
1681 IF (tasks(i)%dist_type .EQ. 0 &
1682 .AND. decode_rank(tasks(i)%destination,
SIZE(rs_descs)) == decode_rank(tasks(i)%source,
SIZE(rs_descs)))
THEN
1683 cost_task_rep = cost_task_rep + tasks(i)%cost
1689 CALL desc%group%allgather(cost_task_rep, recv_buf_i)
1691 DO i = 1, desc%group_size
1693 IF (load_imbalance(i) .GT. 0) &
1694 load_imbalance(i) = min(load_imbalance(i), recv_buf_i(i))
1703 IF (load_imbalance(desc%my_pos + 1) .GT. 0)
THEN
1709 DO i = desc%group_size, desc%group_size - no_overloaded + 1, -1
1710 IF (index(i) .EQ. desc%my_pos + 1)
THEN
1713 offset = offset + load_imbalance(index(i))
1718 proc_receiving = huge(proc_receiving)
1719 DO i = 1, no_underloaded
1720 offset = offset + load_imbalance(index(i))
1721 IF (offset .LE. 0)
THEN
1731 IF (tasks(j)%dist_type .EQ. 0 &
1732 .AND. decode_rank(tasks(j)%destination,
SIZE(rs_descs)) == decode_rank(tasks(j)%source,
SIZE(rs_descs)))
THEN
1735 IF (proc_receiving .GT. no_underloaded)
EXIT
1737 ilevel = tasks(j)%grid_level
1738 img = tasks(j)%image
1739 iatom = tasks(j)%iatom
1740 jatom = tasks(j)%jatom
1741 iset = tasks(j)%iset
1742 jset = tasks(j)%jset
1743 ipgf = tasks(j)%ipgf
1744 jpgf = tasks(j)%jpgf
1745 tasks(j)%destination = encode_rank(index(proc_receiving) - 1, ilevel,
SIZE(rs_descs))
1746 offset = offset + tasks(j)%cost
1747 count = count + tasks(j)%cost
1748 IF (count .GE. load_imbalance(desc%my_pos + 1))
EXIT
1749 IF (offset .GT. 0)
THEN
1750 proc_receiving = proc_receiving + 1
1753 IF (proc_receiving .GT. no_underloaded)
EXIT
1754 offset = load_imbalance(index(proc_receiving))
1761 DEALLOCATE (load_imbalance)
1763 CALL timestop(handle)
1765 END SUBROUTINE load_balance_replicated
1779 SUBROUTINE create_local_tasks(rs_descs, ntasks, tasks, ntasks_recv, tasks_recv)
1784 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1785 INTEGER :: ntasks_recv
1786 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks_recv
1788 CHARACTER(LEN=*),
PARAMETER :: routinen =
'create_local_tasks'
1790 INTEGER :: handle, i, j, k, l, rank
1791 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: recv_buf, send_buf
1792 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: recv_disps, recv_sizes, send_disps, &
1796 CALL timeset(routinen, handle)
1798 desc => rs_descs(1)%rs_desc
1801 ALLOCATE (send_sizes(desc%group_size))
1802 ALLOCATE (recv_sizes(desc%group_size))
1803 ALLOCATE (send_disps(desc%group_size))
1804 ALLOCATE (recv_disps(desc%group_size))
1805 ALLOCATE (send_buf(desc%group_size))
1806 ALLOCATE (recv_buf(desc%group_size))
1811 rank = rs_descs(decode_level(tasks(i)%destination,
SIZE(rs_descs))) &
1812 %rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs)))
1813 send_buf(rank + 1) = send_buf(rank + 1) + 1
1816 CALL desc%group%alltoall(send_buf, recv_buf, 1)
1827 DO i = 2, desc%group_size
1830 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
1831 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
1835 DEALLOCATE (send_buf)
1836 DEALLOCATE (recv_buf)
1839 ALLOCATE (send_buf(sum(send_sizes)))
1840 ALLOCATE (recv_buf(sum(recv_sizes)))
1846 i = rs_descs(decode_level(tasks(j)%destination,
SIZE(rs_descs))) &
1847 %rs_desc%virtual2real(decode_rank(tasks(j)%destination,
SIZE(rs_descs))) + 1
1848 l = send_disps(i) + send_sizes(i)
1854 CALL desc%group%alltoall(send_buf, send_sizes, send_disps, recv_buf, recv_sizes, recv_disps)
1856 DEALLOCATE (send_buf)
1859 ALLOCATE (tasks_recv(ntasks_recv))
1863 DO i = 1, desc%group_size
1871 DEALLOCATE (recv_buf)
1872 DEALLOCATE (send_sizes)
1873 DEALLOCATE (recv_sizes)
1874 DEALLOCATE (send_disps)
1875 DEALLOCATE (recv_disps)
1877 CALL timestop(handle)
1879 END SUBROUTINE create_local_tasks
1899 tasks, atom_pair_send, atom_pair_recv, &
1900 symmetric, reorder_rs_grid_ranks, skip_load_balance_distributed)
1904 INTEGER :: ntasks, natoms
1905 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1906 TYPE(
atom_pair_type),
DIMENSION(:),
POINTER :: atom_pair_send, atom_pair_recv
1907 LOGICAL,
INTENT(IN) :: symmetric, reorder_rs_grid_ranks, &
1908 skip_load_balance_distributed
1910 CHARACTER(LEN=*),
PARAMETER :: routinen =
'distribute_tasks'
1912 INTEGER :: handle, igrid_level, irank, ntasks_recv
1913 INTEGER(KIND=int_8) :: load_gap, max_load, replicated_load
1914 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: total_loads, total_loads_tmp, trial_loads
1915 INTEGER(KIND=int_8),
DIMENSION(:, :),
POINTER :: loads
1916 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: indices, real2virtual, total_index
1917 LOGICAL :: distributed_grids, fixed_first_grid
1919 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks_recv
1921 CALL timeset(routinen, handle)
1923 cpassert(
ASSOCIATED(tasks))
1926 distributed_grids = .false.
1927 DO igrid_level = 1,
SIZE(rs_descs)
1928 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1929 distributed_grids = .true.
1932 desc => rs_descs(1)%rs_desc
1934 IF (distributed_grids)
THEN
1936 ALLOCATE (loads(0:desc%group_size - 1,
SIZE(rs_descs)))
1937 ALLOCATE (total_loads(0:desc%group_size - 1))
1943 DO igrid_level = 1,
SIZE(rs_descs)
1944 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1946 IF (.NOT. skip_load_balance_distributed) &
1947 CALL load_balance_distributed(tasks, ntasks, rs_descs, igrid_level, natoms)
1949 CALL get_current_loads(loads(:, igrid_level), rs_descs, igrid_level, ntasks, &
1950 tasks, use_reordered_ranks=.false.)
1952 total_loads(:) = total_loads + loads(:, igrid_level)
1961 DO igrid_level = 1,
SIZE(rs_descs)
1962 IF (.NOT. rs_descs(igrid_level)%rs_desc%distributed)
THEN
1963 CALL get_current_loads(loads(:, igrid_level), rs_descs, igrid_level, ntasks, &
1964 tasks, use_reordered_ranks=.false.)
1965 replicated_load = replicated_load + sum(loads(:, igrid_level))
1975 IF (reorder_rs_grid_ranks)
THEN
1976 fixed_first_grid = .false.
1977 DO igrid_level = 1,
SIZE(rs_descs)
1978 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1979 IF (fixed_first_grid .EQV. .false.)
THEN
1980 total_loads(:) = loads(:, igrid_level)
1981 fixed_first_grid = .true.
1983 ALLOCATE (trial_loads(0:desc%group_size - 1))
1985 trial_loads(:) = total_loads + loads(:, igrid_level)
1986 max_load = maxval(trial_loads)
1988 DO irank = 0, desc%group_size - 1
1989 load_gap = load_gap + max_load - trial_loads(irank)
1994 IF (load_gap > replicated_load*1.05_dp)
THEN
1996 ALLOCATE (indices(0:desc%group_size - 1))
1997 ALLOCATE (total_index(0:desc%group_size - 1))
1998 ALLOCATE (total_loads_tmp(0:desc%group_size - 1))
1999 ALLOCATE (real2virtual(0:desc%group_size - 1))
2001 total_loads_tmp(:) = total_loads
2002 CALL sort(total_loads_tmp, desc%group_size, total_index)
2003 CALL sort(loads(:, igrid_level), desc%group_size, indices)
2007 DO irank = 0, desc%group_size - 1
2008 total_loads(total_index(irank) - 1) = total_loads(total_index(irank) - 1) + &
2009 loads(desc%group_size - irank - 1, igrid_level)
2010 real2virtual(total_index(irank) - 1) = indices(desc%group_size - irank - 1) - 1
2015 DEALLOCATE (indices)
2016 DEALLOCATE (total_index)
2017 DEALLOCATE (total_loads_tmp)
2018 DEALLOCATE (real2virtual)
2020 total_loads(:) = trial_loads
2023 DEALLOCATE (trial_loads)
2031 CALL load_balance_replicated(rs_descs, ntasks, tasks)
2049 CALL create_local_tasks(rs_descs, ntasks, tasks, ntasks_recv, tasks_recv)
2055 CALL get_atom_pair(atom_pair_send, tasks, ntasks=ntasks, send=.true., symmetric=symmetric, rs_descs=rs_descs)
2064 CALL get_atom_pair(atom_pair_recv, tasks_recv, ntasks=ntasks_recv, send=.false., symmetric=symmetric, rs_descs=rs_descs)
2069 DEALLOCATE (total_loads)
2073 ntasks_recv = ntasks
2074 CALL get_atom_pair(atom_pair_recv, tasks_recv, ntasks=ntasks_recv, send=.false., symmetric=symmetric, rs_descs=rs_descs)
2079 ALLOCATE (indices(ntasks_recv))
2080 CALL tasks_sort(tasks_recv, ntasks_recv, indices)
2081 DEALLOCATE (indices)
2088 ntasks = ntasks_recv
2090 CALL timestop(handle)
2104 SUBROUTINE get_atom_pair(atom_pair, tasks, ntasks, send, symmetric, rs_descs)
2107 TYPE(
task_type),
DIMENSION(:),
INTENT(INOUT) :: tasks
2108 INTEGER,
INTENT(IN) :: ntasks
2109 LOGICAL,
INTENT(IN) :: send, symmetric
2112 INTEGER :: i, ilevel, iatom, jatom, npairs, virt_rank
2113 INTEGER,
DIMENSION(:),
ALLOCATABLE :: indices
2116 cpassert(.NOT.
ASSOCIATED(atom_pair))
2117 IF (ntasks == 0)
THEN
2118 ALLOCATE (atom_pair(0))
2124 ALLOCATE (atom_pair_tmp(ntasks))
2126 atom_pair_tmp(i)%image = tasks(i)%image
2127 iatom = tasks(i)%iatom
2128 jatom = tasks(i)%jatom
2129 IF (symmetric .AND. iatom > jatom)
THEN
2131 atom_pair_tmp(i)%row = jatom
2132 atom_pair_tmp(i)%col = iatom
2134 atom_pair_tmp(i)%row = iatom
2135 atom_pair_tmp(i)%col = jatom
2141 ilevel = tasks(i)%grid_level
2142 virt_rank = decode_rank(tasks(i)%destination,
SIZE(rs_descs))
2143 atom_pair_tmp(i)%rank = rs_descs(ilevel)%rs_desc%virtual2real(virt_rank)
2147 atom_pair_tmp(i)%rank = decode_rank(tasks(i)%source,
SIZE(rs_descs))
2152 ALLOCATE (indices(ntasks))
2153 CALL atom_pair_sort(atom_pair_tmp, ntasks, indices)
2155 tasks(indices(1))%pair_index = 1
2157 IF (atom_pair_less_than(atom_pair_tmp(i - 1), atom_pair_tmp(i)))
THEN
2159 atom_pair_tmp(npairs) = atom_pair_tmp(i)
2161 tasks(indices(i))%pair_index = npairs
2163 DEALLOCATE (indices)
2166 ALLOCATE (atom_pair(npairs))
2167 atom_pair(:) = atom_pair_tmp(:npairs)
2168 DEALLOCATE (atom_pair_tmp)
2170 END SUBROUTINE get_atom_pair
2186 nimages, scatter, hmats)
2191 TYPE(
atom_pair_type),
DIMENSION(:),
POINTER :: atom_pair_send, atom_pair_recv
2197 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_distribute_matrix'
2199 INTEGER :: acol, arow, handle, i, img, j, k, l, me, &
2200 nblkcols_total, nblkrows_total, ncol, &
2201 nrow, nthread, nthread_left
2202 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: first_col, first_row, last_col, last_row, recv_disps, &
2203 recv_pair_count, recv_pair_disps, recv_sizes, send_disps, send_pair_count, &
2204 send_pair_disps, send_sizes
2205 INTEGER,
DIMENSION(:),
POINTER :: col_blk_size, row_blk_size
2207 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: recv_buf_r, send_buf_r
2208 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: h_block, p_block
2211 REAL(kind=
dp),
DIMENSION(:),
POINTER :: vector
2215 CALL timeset(routinen, handle)
2217 IF (.NOT. scatter)
THEN
2218 cpassert(
PRESENT(hmats))
2221 desc => rs_descs(1)%rs_desc
2222 me = desc%my_pos + 1
2225 ALLOCATE (send_sizes(desc%group_size))
2226 ALLOCATE (recv_sizes(desc%group_size))
2227 ALLOCATE (send_disps(desc%group_size))
2228 ALLOCATE (recv_disps(desc%group_size))
2229 ALLOCATE (send_pair_count(desc%group_size))
2230 ALLOCATE (recv_pair_count(desc%group_size))
2231 ALLOCATE (send_pair_disps(desc%group_size))
2232 ALLOCATE (recv_pair_disps(desc%group_size))
2234 pmat => pmats(1)%matrix
2236 row_blk_size=row_blk_size, &
2237 col_blk_size=col_blk_size, &
2238 nblkrows_total=nblkrows_total, &
2239 nblkcols_total=nblkcols_total)
2240 ALLOCATE (first_row(nblkrows_total), last_row(nblkrows_total), &
2241 first_col(nblkcols_total), last_col(nblkcols_total))
2242 CALL dbcsr_convert_sizes_to_offsets(row_blk_size, first_row, last_row)
2243 CALL dbcsr_convert_sizes_to_offsets(col_blk_size, first_col, last_col)
2248 DO i = 1,
SIZE(atom_pair_send)
2249 k = atom_pair_send(i)%rank + 1
2250 arow = atom_pair_send(i)%row
2251 acol = atom_pair_send(i)%col
2252 nrow = last_row(arow) - first_row(arow) + 1
2253 ncol = last_col(acol) - first_col(acol) + 1
2254 send_sizes(k) = send_sizes(k) + nrow*ncol
2255 send_pair_count(k) = send_pair_count(k) + 1
2260 DO i = 2, desc%group_size
2261 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
2262 send_pair_disps(i) = send_pair_disps(i - 1) + send_pair_count(i - 1)
2265 ALLOCATE (send_buf_r(sum(send_sizes)))
2271 DO i = 1,
SIZE(atom_pair_recv)
2272 k = atom_pair_recv(i)%rank + 1
2273 arow = atom_pair_recv(i)%row
2274 acol = atom_pair_recv(i)%col
2275 nrow = last_row(arow) - first_row(arow) + 1
2276 ncol = last_col(acol) - first_col(acol) + 1
2277 recv_sizes(k) = recv_sizes(k) + nrow*ncol
2278 recv_pair_count(k) = recv_pair_count(k) + 1
2283 DO i = 2, desc%group_size
2284 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
2285 recv_pair_disps(i) = recv_pair_disps(i - 1) + recv_pair_count(i - 1)
2287 ALLOCATE (recv_buf_r(sum(recv_sizes)))
2306 DO l = 1, desc%group_size
2307 IF (l .EQ. me) cycle
2309 DO i = 1, send_pair_count(l)
2310 arow = atom_pair_send(send_pair_disps(l) + i)%row
2311 acol = atom_pair_send(send_pair_disps(l) + i)%col
2312 img = atom_pair_send(send_pair_disps(l) + i)%image
2313 nrow = last_row(arow) - first_row(arow) + 1
2314 ncol = last_col(acol) - first_col(acol) + 1
2315 pmat => pmats(img)%matrix
2316 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2321 send_buf_r(send_disps(l) + send_sizes(l) + j + (k - 1)*nrow) = p_block(j, k)
2324 send_sizes(l) = send_sizes(l) + nrow*ncol
2329 IF (.NOT. scatter)
THEN
2344 CALL desc%group%alltoall(send_buf_r, send_sizes, send_disps, &
2345 recv_buf_r, recv_sizes, recv_disps)
2350 IF (.NOT. scatter)
THEN
2354 DO i = 1, send_pair_count(me)
2355 arow = atom_pair_send(send_pair_disps(me) + i)%row
2356 acol = atom_pair_send(send_pair_disps(me) + i)%col
2357 img = atom_pair_send(send_pair_disps(me) + i)%image
2358 nrow = last_row(arow) - first_row(arow) + 1
2359 ncol = last_col(acol) - first_col(acol) + 1
2360 hmat => hmats(img)%matrix
2361 pmat => pmats(img)%matrix
2362 CALL dbcsr_get_block_p(matrix=hmat, row=arow, col=acol, block=h_block, found=found)
2364 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2370 h_block(j, k) = h_block(j, k) + p_block(j, k)
2379 pmat => pmats(img)%matrix
2381 nblks_guess=
SIZE(atom_pair_recv)/nthread, sizedata_guess=
SIZE(recv_buf_r)/nthread, &
2391 DO l = 1, desc%group_size
2392 IF (l .EQ. me) cycle
2394 DO i = 1, recv_pair_count(l)
2395 arow = atom_pair_recv(recv_pair_disps(l) + i)%row
2396 acol = atom_pair_recv(recv_pair_disps(l) + i)%col
2397 img = atom_pair_recv(recv_pair_disps(l) + i)%image
2398 nrow = last_row(arow) - first_row(arow) + 1
2399 ncol = last_col(acol) - first_col(acol) + 1
2400 pmat => pmats(img)%matrix
2402 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2404 IF (
PRESENT(hmats))
THEN
2405 hmat => hmats(img)%matrix
2406 CALL dbcsr_get_block_p(matrix=hmat, row=arow, col=acol, block=h_block, found=found)
2410 IF (scatter .AND. .NOT.
ASSOCIATED(p_block))
THEN
2411 vector => recv_buf_r(recv_disps(l) + recv_sizes(l) + 1:recv_disps(l) + recv_sizes(l) + nrow*ncol)
2412 CALL dbcsr_put_block(pmat, arow, acol, block=reshape(vector, [nrow, ncol]))
2414 IF (.NOT. scatter)
THEN
2418 h_block(j, k) = h_block(j, k) + recv_buf_r(recv_disps(l) + recv_sizes(l) + j + (k - 1)*nrow)
2423 recv_sizes(l) = recv_sizes(l) + nrow*ncol
2445 pmat => pmats(img)%matrix
2451 DEALLOCATE (send_buf_r)
2452 DEALLOCATE (recv_buf_r)
2454 DEALLOCATE (send_sizes)
2455 DEALLOCATE (recv_sizes)
2456 DEALLOCATE (send_disps)
2457 DEALLOCATE (recv_disps)
2458 DEALLOCATE (send_pair_count)
2459 DEALLOCATE (recv_pair_count)
2460 DEALLOCATE (send_pair_disps)
2461 DEALLOCATE (recv_pair_disps)
2463 DEALLOCATE (first_row, last_row, first_col, last_col)
2465 CALL timestop(handle)
2473 SUBROUTINE rs_calc_offsets(pairs, nsgf, group_size, &
2474 pair_offsets, rank_offsets, rank_sizes, buffer_size)
2476 INTEGER,
DIMENSION(:),
INTENT(IN) :: nsgf
2477 INTEGER,
INTENT(IN) :: group_size
2478 INTEGER,
DIMENSION(:),
POINTER :: pair_offsets, rank_offsets, rank_sizes
2479 INTEGER,
INTENT(INOUT) :: buffer_size
2481 INTEGER :: acol, arow, i, block_size, total_size, k, prev_k
2483 IF (
ASSOCIATED(pair_offsets))
DEALLOCATE (pair_offsets)
2484 IF (
ASSOCIATED(rank_offsets))
DEALLOCATE (rank_offsets)
2485 IF (
ASSOCIATED(rank_sizes))
DEALLOCATE (rank_sizes)
2488 ALLOCATE (pair_offsets(
SIZE(pairs)))
2490 DO i = 1,
SIZE(pairs)
2491 pair_offsets(i) = total_size
2494 block_size = nsgf(arow)*nsgf(acol)
2495 total_size = total_size + block_size
2497 buffer_size = total_size
2500 ALLOCATE (rank_offsets(group_size))
2501 ALLOCATE (rank_sizes(group_size))
2504 IF (
SIZE(pairs) > 0)
THEN
2505 prev_k = pairs(1)%rank + 1
2506 DO i = 1,
SIZE(pairs)
2507 k = pairs(i)%rank + 1
2508 cpassert(k >= prev_k)
2509 IF (k > prev_k)
THEN
2510 rank_offsets(k) = pair_offsets(i)
2511 rank_sizes(prev_k) = rank_offsets(k) - rank_offsets(prev_k)
2515 rank_sizes(k) = buffer_size - rank_offsets(k)
2518 END SUBROUTINE rs_calc_offsets
2525 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2530 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_scatter_matrices'
2533 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: buffer_send
2535 CALL timeset(routinen, handle)
2536 ALLOCATE (buffer_send(task_list%buffer_size_send))
2539 cpassert(
ASSOCIATED(task_list%atom_pair_send))
2540 CALL rs_pack_buffer(src_matrices=src_matrices, &
2541 dest_buffer=buffer_send, &
2542 atom_pair=task_list%atom_pair_send, &
2543 pair_offsets=task_list%pair_offsets_send)
2546 CALL group%alltoall(buffer_send, task_list%rank_sizes_send, task_list%rank_offsets_send, &
2547 dest_buffer%host_buffer, &
2548 task_list%rank_sizes_recv, task_list%rank_offsets_recv)
2550 DEALLOCATE (buffer_send)
2551 CALL timestop(handle)
2561 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2565 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_gather_matrices'
2568 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: buffer_send
2570 CALL timeset(routinen, handle)
2573 ALLOCATE (buffer_send(task_list%buffer_size_send))
2576 CALL group%alltoall(src_buffer%host_buffer, task_list%rank_sizes_recv, task_list%rank_offsets_recv, &
2577 buffer_send, task_list%rank_sizes_send, task_list%rank_offsets_send)
2580 cpassert(
ASSOCIATED(task_list%atom_pair_send))
2581 CALL rs_unpack_buffer(src_buffer=buffer_send, &
2582 dest_matrices=dest_matrices, &
2583 atom_pair=task_list%atom_pair_send, &
2584 pair_offsets=task_list%pair_offsets_send)
2586 DEALLOCATE (buffer_send)
2587 CALL timestop(handle)
2596 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2600 CALL rs_pack_buffer(src_matrices=src_matrices, &
2601 dest_buffer=dest_buffer%host_buffer, &
2602 atom_pair=task_list%atom_pair_recv, &
2603 pair_offsets=task_list%pair_offsets_recv)
2613 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2616 CALL rs_unpack_buffer(src_buffer=src_buffer%host_buffer, &
2617 dest_matrices=dest_matrices, &
2618 atom_pair=task_list%atom_pair_recv, &
2619 pair_offsets=task_list%pair_offsets_recv)
2627 SUBROUTINE rs_pack_buffer(src_matrices, dest_buffer, atom_pair, pair_offsets)
2628 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2629 REAL(kind=
dp),
DIMENSION(:),
INTENT(INOUT) :: dest_buffer
2631 INTEGER,
DIMENSION(:),
INTENT(IN) :: pair_offsets
2633 INTEGER :: acol, arow, img, i, offset, block_size
2635 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
2641 DO i = 1,
SIZE(atom_pair)
2642 arow = atom_pair(i)%row
2643 acol = atom_pair(i)%col
2644 img = atom_pair(i)%image
2646 block=block, found=found)
2648 block_size =
SIZE(block)
2649 offset = pair_offsets(i)
2650 dest_buffer(offset + 1:offset + block_size) = reshape(block, shape=(/block_size/))
2655 END SUBROUTINE rs_pack_buffer
2661 SUBROUTINE rs_unpack_buffer(src_buffer, dest_matrices, atom_pair, pair_offsets)
2662 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: src_buffer
2663 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2665 INTEGER,
DIMENSION(:),
INTENT(IN) :: pair_offsets
2667 INTEGER :: acol, arow, img, i, offset, &
2668 nrows, ncols, lock_num
2670 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
2671 INTEGER(kind=omp_lock_kind),
ALLOCATABLE,
DIMENSION(:) :: locks
2674 ALLOCATE (locks(10*omp_get_max_threads()))
2675 DO i = 1,
SIZE(locks)
2676 CALL omp_init_lock(locks(i))
2683 DO i = 1,
SIZE(atom_pair)
2684 arow = atom_pair(i)%row
2685 acol = atom_pair(i)%col
2686 img = atom_pair(i)%image
2688 block=block, found=found)
2690 nrows =
SIZE(block, 1)
2691 ncols =
SIZE(block, 2)
2692 offset = pair_offsets(i)
2693 lock_num =
modulo(arow,
SIZE(locks)) + 1
2695 CALL omp_set_lock(locks(lock_num))
2696 block = block + reshape(src_buffer(offset + 1:offset + nrows*ncols), shape=(/nrows, ncols/))
2697 CALL omp_unset_lock(locks(lock_num))
2703 DO i = 1,
SIZE(locks)
2704 CALL omp_destroy_lock(locks(i))
2708 END SUBROUTINE rs_unpack_buffer
2726 SUBROUTINE rs_find_node(rs_desc, igrid_level, n_levels, cube_center, ntasks, tasks, &
2727 lb_cube, ub_cube, added_tasks)
2730 INTEGER,
INTENT(IN) :: igrid_level, n_levels
2731 INTEGER,
DIMENSION(3),
INTENT(IN) :: cube_center
2732 INTEGER,
INTENT(INOUT) :: ntasks
2733 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
2734 INTEGER,
DIMENSION(3),
INTENT(IN) :: lb_cube, ub_cube
2735 INTEGER,
INTENT(OUT) :: added_tasks
2737 INTEGER,
PARAMETER :: add_tasks = 1000
2738 REAL(kind=
dp),
PARAMETER :: mult_tasks = 2.0_dp
2740 INTEGER :: bit_index, coord(3), curr_tasks, dest, i, icoord(3), idest, itask, ix, iy, iz, &
2741 lb_coord(3), lb_domain(3), lbc(3), ub_coord(3), ub_domain(3), ubc(3)
2742 INTEGER :: bit_pattern
2743 LOGICAL :: dir_periodic(3)
2745 coord(1) = rs_desc%x2coord(cube_center(1))
2746 coord(2) = rs_desc%y2coord(cube_center(2))
2747 coord(3) = rs_desc%z2coord(cube_center(3))
2748 dest = rs_desc%coord2rank(coord(1), coord(2), coord(3))
2751 lbc = lb_cube + cube_center
2752 ubc = ub_cube + cube_center
2754 IF (all((rs_desc%lb_global(:, dest) - rs_desc%border) .LE. lbc) .AND. &
2755 all((rs_desc%ub_global(:, dest) + rs_desc%border) .GE. ubc))
THEN
2757 tasks(ntasks)%destination = encode_rank(dest, igrid_level, n_levels)
2758 tasks(ntasks)%dist_type = 1
2759 tasks(ntasks)%subpatch_pattern = 0
2778 IF (rs_desc%perd(i) == 1)
THEN
2779 bit_pattern = ibclr(bit_pattern, bit_index)
2780 bit_index = bit_index + 1
2781 bit_pattern = ibclr(bit_pattern, bit_index)
2782 bit_index = bit_index + 1
2785 IF (ubc(i) <= rs_desc%lb_global(i, dest) - 1 + rs_desc%border)
THEN
2786 bit_pattern = ibset(bit_pattern, bit_index)
2787 bit_index = bit_index + 1
2789 bit_pattern = ibclr(bit_pattern, bit_index)
2790 bit_index = bit_index + 1
2793 IF (lbc(i) >= rs_desc%ub_global(i, dest) + 1 - rs_desc%border)
THEN
2794 bit_pattern = ibset(bit_pattern, bit_index)
2795 bit_index = bit_index + 1
2797 bit_pattern = ibclr(bit_pattern, bit_index)
2798 bit_index = bit_index + 1
2802 tasks(ntasks)%subpatch_pattern = bit_pattern
2812 lb_domain = rs_desc%lb_global(:, dest) - rs_desc%border
2813 ub_domain = rs_desc%ub_global(:, dest) + rs_desc%border
2816 IF (rs_desc%perd(i) == 0)
THEN
2819 IF (lb_domain(i) > lbc(i))
THEN
2820 lb_coord(i) = lb_coord(i) - 1
2821 icoord =
modulo(lb_coord, rs_desc%group_dim)
2822 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2823 lb_domain(i) = lb_domain(i) - (rs_desc%ub_global(i, idest) - rs_desc%lb_global(i, idest) + 1)
2830 IF (ub_domain(i) < ubc(i))
THEN
2831 ub_coord(i) = ub_coord(i) + 1
2832 icoord =
modulo(ub_coord, rs_desc%group_dim)
2833 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2834 ub_domain(i) = ub_domain(i) + (rs_desc%ub_global(i, idest) - rs_desc%lb_global(i, idest) + 1)
2844 IF (ub_domain(i) - lb_domain(i) + 1 >= rs_desc%npts(i))
THEN
2845 dir_periodic(i) = .true.
2847 ub_coord(i) = rs_desc%group_dim(i) - 1
2849 dir_periodic(i) = .false.
2853 added_tasks = product(ub_coord - lb_coord + 1)
2855 ntasks = ntasks + added_tasks - 1
2856 IF (ntasks >
SIZE(tasks))
THEN
2857 curr_tasks = int((
SIZE(tasks) + add_tasks)*mult_tasks)
2860 DO iz = lb_coord(3), ub_coord(3)
2861 DO iy = lb_coord(2), ub_coord(2)
2862 DO ix = lb_coord(1), ub_coord(1)
2863 icoord =
modulo((/ix, iy, iz/), rs_desc%group_dim)
2864 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2865 tasks(itask)%destination = encode_rank(idest, igrid_level, n_levels)
2866 tasks(itask)%dist_type = 2
2867 tasks(itask)%subpatch_pattern = 0
2870 IF (ix == lb_coord(1) .AND. .NOT. dir_periodic(1)) &
2871 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 0)
2872 IF (ix == ub_coord(1) .AND. .NOT. dir_periodic(1)) &
2873 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 1)
2874 IF (iy == lb_coord(2) .AND. .NOT. dir_periodic(2)) &
2875 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 2)
2876 IF (iy == ub_coord(2) .AND. .NOT. dir_periodic(2)) &
2877 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 3)
2878 IF (iz == lb_coord(3) .AND. .NOT. dir_periodic(3)) &
2879 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 4)
2880 IF (iz == ub_coord(3) .AND. .NOT. dir_periodic(3)) &
2881 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 5)
2888 END SUBROUTINE rs_find_node
2902 FUNCTION encode_rank(rank, grid_level, n_levels)
RESULT(encoded_int)
2904 INTEGER,
INTENT(IN) :: rank, grid_level, n_levels
2905 INTEGER :: encoded_int
2909 encoded_int = rank*n_levels + grid_level - 1
2919 FUNCTION decode_rank(encoded_int, n_levels)
RESULT(rank)
2921 INTEGER,
INTENT(IN) :: encoded_int
2922 INTEGER,
INTENT(IN) :: n_levels
2925 rank = int(encoded_int/n_levels)
2935 FUNCTION decode_level(encoded_int, n_levels)
RESULT(grid_level)
2937 INTEGER,
INTENT(IN) :: encoded_int
2938 INTEGER,
INTENT(IN) :: n_levels
2939 INTEGER :: grid_level
2941 grid_level = int(
modulo(encoded_int, n_levels)) + 1
2943 END FUNCTION decode_level
2959 PURE FUNCTION tasks_less_than(a, b)
RESULT(res)
2963 IF (a%grid_level /= b%grid_level)
THEN
2964 res = a%grid_level < b%grid_level
2966 ELSE IF (a%image /= b%image)
THEN
2967 res = a%image < b%image
2969 ELSE IF (a%iatom /= b%iatom)
THEN
2970 res = a%iatom < b%iatom
2972 ELSE IF (a%jatom /= b%jatom)
THEN
2973 res = a%jatom < b%jatom
2975 ELSE IF (a%iset /= b%iset)
THEN
2976 res = a%iset < b%iset
2978 ELSE IF (a%jset /= b%jset)
THEN
2979 res = a%jset < b%jset
2981 ELSE IF (a%ipgf /= b%ipgf)
THEN
2982 res = a%ipgf < b%ipgf
2985 res = a%jpgf < b%jpgf
2988 END FUNCTION tasks_less_than
3001 SUBROUTINE tasks_sort(arr, n, indices)
3002 INTEGER,
INTENT(IN) :: n
3003 TYPE(
task_type),
DIMENSION(1:n),
INTENT(INOUT) :: arr
3004 integer,
DIMENSION(1:n),
INTENT(INOUT) :: indices
3007 TYPE(
task_type),
ALLOCATABLE :: tmp_arr(:)
3008 INTEGER,
ALLOCATABLE :: tmp_idx(:)
3012 ALLOCATE (tmp_arr((n + 1)/2), tmp_idx((n + 1)/2))
3014 indices = (/(i, i=1, n)/)
3016 CALL tasks_sort_low(arr(1:n), indices, tmp_arr, tmp_idx)
3018 DEALLOCATE (tmp_arr, tmp_idx)
3019 ELSE IF (n > 0)
THEN
3023 END SUBROUTINE tasks_sort
3035 RECURSIVE SUBROUTINE tasks_sort_low(arr, indices, tmp_arr, tmp_idx)
3036 TYPE(
task_type),
DIMENSION(:),
INTENT(INOUT) :: arr
3037 INTEGER,
DIMENSION(size(arr)),
INTENT(INOUT) :: indices
3038 TYPE(
task_type),
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_arr
3039 INTEGER,
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_idx
3041 INTEGER :: t, m, i, j, k
3048 IF (
size(arr) <= 7)
THEN
3049 DO j =
size(arr) - 1, 1, -1
3052 IF (tasks_less_than(arr(i + 1), arr(i)))
THEN
3059 indices(i) = indices(i + 1)
3064 IF (.NOT. swapped)
EXIT
3070 m = (
size(arr) + 1)/2
3071 CALL tasks_sort_low(arr(1:m), indices(1:m), tmp_arr, tmp_idx)
3072 CALL tasks_sort_low(arr(m + 1:), indices(m + 1:), tmp_arr, tmp_idx)
3076 IF (tasks_less_than(arr(m + 1), arr(m)))
THEN
3079 tmp_arr(1:m) = arr(1:m)
3080 tmp_idx(1:m) = indices(1:m)
3085 DO WHILE (i <= m .and. j <=
size(arr) - m)
3086 IF (tasks_less_than(arr(m + j), tmp_arr(i)))
THEN
3088 indices(k) = indices(m + j)
3092 indices(k) = tmp_idx(i)
3102 indices(k) = tmp_idx(i)
3109 END SUBROUTINE tasks_sort_low
3119 PURE FUNCTION atom_pair_less_than(a, b)
RESULT(res)
3123 IF (a%rank /= b%rank)
THEN
3124 res = a%rank < b%rank
3126 ELSE IF (a%row /= b%row)
THEN
3129 ELSE IF (a%col /= b%col)
THEN
3133 res = a%image < b%image
3136 END FUNCTION atom_pair_less_than
3149 SUBROUTINE atom_pair_sort(arr, n, indices)
3150 INTEGER,
INTENT(IN) :: n
3152 integer,
DIMENSION(1:n),
INTENT(INOUT) :: indices
3156 INTEGER,
ALLOCATABLE :: tmp_idx(:)
3160 ALLOCATE (tmp_arr((n + 1)/2), tmp_idx((n + 1)/2))
3162 indices = (/(i, i=1, n)/)
3164 CALL atom_pair_sort_low(arr(1:n), indices, tmp_arr, tmp_idx)
3166 DEALLOCATE (tmp_arr, tmp_idx)
3167 ELSE IF (n > 0)
THEN
3171 END SUBROUTINE atom_pair_sort
3183 RECURSIVE SUBROUTINE atom_pair_sort_low(arr, indices, tmp_arr, tmp_idx)
3185 INTEGER,
DIMENSION(size(arr)),
INTENT(INOUT) :: indices
3186 TYPE(
atom_pair_type),
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_arr
3187 INTEGER,
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_idx
3189 INTEGER :: t, m, i, j, k
3196 IF (
size(arr) <= 7)
THEN
3197 DO j =
size(arr) - 1, 1, -1
3200 IF (atom_pair_less_than(arr(i + 1), arr(i)))
THEN
3207 indices(i) = indices(i + 1)
3212 IF (.NOT. swapped)
EXIT
3218 m = (
size(arr) + 1)/2
3219 CALL atom_pair_sort_low(arr(1:m), indices(1:m), tmp_arr, tmp_idx)
3220 CALL atom_pair_sort_low(arr(m + 1:), indices(m + 1:), tmp_arr, tmp_idx)
3224 IF (atom_pair_less_than(arr(m + 1), arr(m)))
THEN
3227 tmp_arr(1:m) = arr(1:m)
3228 tmp_idx(1:m) = indices(1:m)
3233 DO WHILE (i <= m .and. j <=
size(arr) - m)
3234 IF (atom_pair_less_than(arr(m + j), tmp_arr(i)))
THEN
3236 indices(k) = indices(m + j)
3240 indices(k) = tmp_idx(i)
3250 indices(k) = tmp_idx(i)
3257 END SUBROUTINE atom_pair_sort_low
void grid_create_basis_set(const int nset, const int nsgf, const int maxco, const int maxpgf, const int lmin[nset], const int lmax[nset], const int npgf[nset], const int nsgf_set[nset], const int first_sgf[nset], const double sphi[nsgf][maxco], const double zet[nset][maxpgf], grid_basis_set **basis_set_out)
Allocates a basis set which can be passed to grid_create_task_list. See grid_task_list....
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
All kind of helpful little routines.
real(kind=dp) function, public exp_radius_very_extended(la_min, la_max, lb_min, lb_max, pab, o1, o2, ra, rb, rp, zetp, eps, prefactor, cutoff, epsabs)
computes the radius of the Gaussian outside of which it is smaller than eps
subroutine, public get_gto_basis_set(gto_basis_set, name, aliases, norm_type, kind_radius, ncgf, nset, nsgf, cgf_symbol, sgf_symbol, norm_cgf, set_radius, lmax, lmin, lx, ly, lz, m, ncgf_set, npgf, nsgf_set, nshell, cphi, pgf_radius, sphi, scon, zet, first_cgf, first_sgf, l, last_cgf, last_sgf, n, gcc, maxco, maxl, maxpgf, maxsgf_set, maxshell, maxso, nco_sum, npgf_sum, nshell_sum, maxder, short_kind_radius, npgf_seg_sum)
...
Handles all functions related to the CELL.
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_work_create(matrix, nblks_guess, sizedata_guess, n, work_mutable)
...
subroutine, public dbcsr_finalize(matrix)
...
subroutine, public dbcsr_put_block(matrix, row, col, block, summation)
...
for a given dr()/dh(r) this will provide the bounds to be used if one wants to go over a sphere-subre...
subroutine, public compute_cube_center(cube_center, rs_desc, zeta, zetb, ra, rab)
unifies the computation of the cube center, so that differences in implementation,...
subroutine, public return_cube(info, radius, lb_cube, ub_cube, sphere_bounds)
...
subroutine, public return_cube_nonortho(info, radius, lb, ub, rp)
...
integer function, public gaussian_gridlevel(gridlevel_info, exponent)
...
Fortran API for the grid package, which is written in C.
subroutine, public grid_create_task_list(ntasks, natoms, nkinds, nblocks, block_offsets, atom_positions, atom_kinds, basis_sets, level_list, iatom_list, jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list, block_num_list, radius_list, rab_list, rs_grids, task_list)
Allocates a task list which can be passed to grid_collocate_task_list.
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
integer, parameter, public default_string_length
Types and basic routines needed for a kpoint calculation.
subroutine, public get_kpoint_info(kpoint, kp_scheme, nkp_grid, kp_shift, symmetry, verbose, full_grid, use_real_wfn, eps_geo, parallel_group_size, kp_range, nkp, xkp, wkp, para_env, blacs_env_all, para_env_kp, para_env_inter_kp, blacs_env, kp_env, kp_aux_env, mpools, iogrp, nkp_groups, kp_dist, cell_to_index, index_to_cell, sab_nl, sab_nl_nosym)
Retrieve information from a kpoint environment.
An array-based list which grows on demand. When the internal array is full, a new array of twice the ...
Utility routines for the memory handling.
Interface to the message passing library MPI.
Fortran API for the offload package, which is written in C.
subroutine, public offload_create_buffer(length, buffer)
Allocates a buffer of given length, ie. number of elements.
Define methods related to particle_type.
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Define the data structure for the particle information.
container for various plainwaves related things
subroutine, public pw_env_get(pw_env, pw_pools, cube_info, gridlevel_info, auxbas_pw_pool, auxbas_grid, auxbas_rs_desc, auxbas_rs_grid, rs_descs, rs_grids, xc_pw_pool, vdw_pw_pool, poisson_env, interp_section)
returns the various attributes of the pw env
Define the quickstep kind type and their sub types.
subroutine, public get_qs_kind(qs_kind, basis_set, basis_type, ncgf, nsgf, all_potential, tnadd_potential, gth_potential, sgp_potential, upf_potential, se_parameter, dftb_parameter, xtb_parameter, dftb3_param, zatom, zeff, elec_conf, mao, lmax_dftb, alpha_core_charge, ccore_charge, core_charge, core_charge_radius, paw_proj_set, paw_atom, hard_radius, hard0_radius, max_rad_local, covalent_radius, vdw_radius, gpw_type_forced, harmonics, max_iso_not0, max_s_harm, grid_atom, ngrid_ang, ngrid_rad, lmax_rho0, dft_plus_u_atom, l_of_dft_plus_u, n_of_dft_plus_u, u_minus_j, u_of_dft_plus_u, j_of_dft_plus_u, alpha_of_dft_plus_u, beta_of_dft_plus_u, j0_of_dft_plus_u, occupation_of_dft_plus_u, dispersion, bs_occupation, magnetization, no_optimize, addel, laddel, naddel, orbitals, max_scf, eps_scf, smear, u_ramping, u_minus_j_target, eps_u_ramping, init_u_ramping_each_scf, reltmat, ghost, floating, name, element_symbol, pao_basis_size, pao_model_file, pao_potentials, pao_descriptors, nelec)
Get attributes of an atomic kind.
subroutine, public get_ks_env(ks_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, complex_ks, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, kinetic, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_ks_im_kp, rho, rho_xc, vppl, rho_core, rho_nlcc, rho_nlcc_g, vee, neighbor_list_id, sab_orb, sab_all, sac_ae, sac_ppl, sac_lri, sap_ppnl, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_vdw, sab_scp, sab_almo, sab_kp, sab_kp_nosym, task_list, task_list_soft, kpoints, do_kpoints, atomic_kind_set, qs_kind_set, cell, cell_ref, use_ref_cell, particle_set, energy, force, local_particles, local_molecules, molecule_kind_set, molecule_set, subsys, cp_subsys, virial, results, atprop, nkind, natom, dft_control, dbcsr_dist, distribution_2d, pw_env, para_env, blacs_env, nelectron_total, nelectron_spin)
...
Define the neighbor list data types and the corresponding functionality.
subroutine, public rs_grid_create(rs, desc)
...
pure integer function, public rs_grid_locate_rank(rs_desc, rank_in, shift)
returns the 1D rank of the task which is a cartesian shift away from 1D rank rank_in only possible if...
pure subroutine, public rs_grid_reorder_ranks(desc, real2virtual)
Defines a new ordering of ranks on this realspace grid, recalculating the data bounds and reallocatin...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
generate the tasks lists used by collocate and integrate routines
subroutine, public rs_copy_to_matrices(src_buffer, dest_matrices, task_list)
Copies from buffer into DBCSR matrics, replaces rs_gather_matrix for non-distributed grids.
subroutine, public generate_qs_task_list(ks_env, task_list, reorder_rs_grid_ranks, skip_load_balance_distributed, soft_valid, basis_type, pw_env_external, sab_orb_external)
...
subroutine, public rs_scatter_matrices(src_matrices, dest_buffer, task_list, group)
Scatters dbcsr matrix blocks and receives them into a buffer as needed before collocation.
subroutine, public rs_distribute_matrix(rs_descs, pmats, atom_pair_send, atom_pair_recv, nimages, scatter, hmats)
redistributes the matrix so that it can be used in realspace operations i.e. according to the task li...
subroutine, public distribute_tasks(rs_descs, ntasks, natoms, tasks, atom_pair_send, atom_pair_recv, symmetric, reorder_rs_grid_ranks, skip_load_balance_distributed)
Assembles tasks to be performed on local grid.
subroutine, public rs_gather_matrices(src_buffer, dest_matrices, task_list, group)
Gather the dbcsr matrix blocks and receives them into a buffer as needed after integration.
subroutine, public task_list_inner_loop(tasks, ntasks, curr_tasks, rs_descs, dft_control, cube_info, gridlevel_info, cindex, iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, set_radius_a, set_radius_b, ra, rab, la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
...
subroutine, public rs_copy_to_buffer(src_matrices, dest_buffer, task_list)
Copies the DBCSR blocks into buffer, replaces rs_scatter_matrix for non-distributed grids.
subroutine, public serialize_task(task, serialized_task)
Serialize a task into an integer array. Used for MPI communication.
subroutine, public deserialize_task(task, serialized_task)
De-serialize a task from an integer array. Used for MPI communication.
subroutine, public reallocate_tasks(tasks, new_size)
Grow an array of tasks while preserving the existing entries.
integer, parameter, public task_size_in_int8
All kind of helpful little routines.
Type defining parameters related to the simulation cell.
Contains information about kpoints.
contained for different pw related things
Provides all information about a quickstep kind.
calculation environment to calculate the ks matrix, holds all the needed vars. assumes that the core ...