28 USE dbcsr_api,
ONLY: dbcsr_convert_sizes_to_offsets, &
72#include "./base/base_uses.f90"
77 LOGICAL,
PRIVATE,
PARAMETER :: debug_this_module = .false.
81 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'task_list_methods'
117 reorder_rs_grid_ranks, skip_load_balance_distributed, &
118 soft_valid, basis_type, pw_env_external, sab_orb_external)
122 LOGICAL,
INTENT(IN) :: reorder_rs_grid_ranks, &
123 skip_load_balance_distributed
124 LOGICAL,
INTENT(IN),
OPTIONAL :: soft_valid
125 CHARACTER(LEN=*),
INTENT(IN),
OPTIONAL :: basis_type
126 TYPE(
pw_env_type),
OPTIONAL,
POINTER :: pw_env_external
128 OPTIONAL,
POINTER :: sab_orb_external
130 CHARACTER(LEN=*),
PARAMETER :: routinen =
'generate_qs_task_list'
131 INTEGER,
PARAMETER :: max_tasks = 2000
133 CHARACTER(LEN=default_string_length) :: my_basis_type
134 INTEGER :: cindex, curr_tasks, handle, i, iatom, iatom_old, igrid_level, igrid_level_old, &
135 ikind, ilevel, img, img_old, ipair, ipgf, iset, itask, jatom, jatom_old, jkind, jpgf, &
136 jset, maxpgf, maxset, natoms, nimages, nkind, nseta, nsetb, slot
137 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: blocks
138 INTEGER,
DIMENSION(3) :: cellind
139 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, npgfa, &
141 INTEGER,
DIMENSION(:, :, :),
POINTER :: cell_to_index
142 LOGICAL :: dokp, my_soft
143 REAL(kind=
dp) :: kind_radius_a, kind_radius_b
144 REAL(kind=
dp),
DIMENSION(3) :: ra, rab
145 REAL(kind=
dp),
DIMENSION(:),
POINTER :: set_radius_a, set_radius_b
146 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: rpgfa, rpgfb, zeta, zetb
158 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
163 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
165 CALL timeset(routinen, handle)
168 qs_kind_set=qs_kind_set, &
170 particle_set=particle_set, &
171 dft_control=dft_control)
177 IF (
PRESENT(soft_valid)) my_soft = soft_valid
178 IF (
PRESENT(basis_type))
THEN
179 cpassert(.NOT. my_soft)
180 my_basis_type = basis_type
181 ELSEIF (my_soft)
THEN
182 my_basis_type =
"ORB_SOFT"
184 my_basis_type =
"ORB"
188 IF (
PRESENT(sab_orb_external)) sab_orb => sab_orb_external
191 IF (
PRESENT(pw_env_external)) pw_env => pw_env_external
192 CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_grids)
195 gridlevel_info => pw_env%gridlevel_info
196 cube_info => pw_env%cube_info
199 nkind =
SIZE(qs_kind_set)
200 natoms =
SIZE(particle_set)
204 qs_kind => qs_kind_set(ikind)
206 basis_set=orb_basis_set, basis_type=my_basis_type)
208 IF (.NOT.
ASSOCIATED(orb_basis_set)) cycle
211 maxset = max(nseta, maxset)
212 maxpgf = max(maxval(npgfa), maxpgf)
216 nimages = dft_control%nimages
217 IF (nimages > 1)
THEN
220 CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
224 NULLIFY (cell_to_index)
228 IF (
ASSOCIATED(task_list%atom_pair_send))
DEALLOCATE (task_list%atom_pair_send)
229 IF (
ASSOCIATED(task_list%atom_pair_recv))
DEALLOCATE (task_list%atom_pair_recv)
232 IF (.NOT.
ASSOCIATED(task_list%tasks))
THEN
236 curr_tasks =
SIZE(task_list%tasks)
238 ALLOCATE (basis_set_list(nkind))
240 qs_kind => qs_kind_set(ikind)
241 CALL get_qs_kind(qs_kind=qs_kind, basis_set=basis_set_a, &
242 basis_type=my_basis_type)
243 IF (
ASSOCIATED(basis_set_a))
THEN
244 basis_set_list(ikind)%gto_basis_set => basis_set_a
246 NULLIFY (basis_set_list(ikind)%gto_basis_set)
258 DO slot = 1, sab_orb(1)%nl_size
259 ikind = sab_orb(1)%nlist_task(slot)%ikind
260 jkind = sab_orb(1)%nlist_task(slot)%jkind
261 iatom = sab_orb(1)%nlist_task(slot)%iatom
262 jatom = sab_orb(1)%nlist_task(slot)%jatom
263 rab(1:3) = sab_orb(1)%nlist_task(slot)%r(1:3)
264 cellind(1:3) = sab_orb(1)%nlist_task(slot)%cell(1:3)
266 basis_set_a => basis_set_list(ikind)%gto_basis_set
267 IF (.NOT.
ASSOCIATED(basis_set_a)) cycle
268 basis_set_b => basis_set_list(jkind)%gto_basis_set
269 IF (.NOT.
ASSOCIATED(basis_set_b)) cycle
270 ra(:) =
pbc(particle_set(iatom)%r, cell)
272 la_max => basis_set_a%lmax
273 la_min => basis_set_a%lmin
274 npgfa => basis_set_a%npgf
275 nseta = basis_set_a%nset
276 rpgfa => basis_set_a%pgf_radius
277 set_radius_a => basis_set_a%set_radius
278 kind_radius_a = basis_set_a%kind_radius
279 zeta => basis_set_a%zet
281 lb_max => basis_set_b%lmax
282 lb_min => basis_set_b%lmin
283 npgfb => basis_set_b%npgf
284 nsetb = basis_set_b%nset
285 rpgfb => basis_set_b%pgf_radius
286 set_radius_b => basis_set_b%set_radius
287 kind_radius_b = basis_set_b%kind_radius
288 zetb => basis_set_b%zet
291 cindex = cell_to_index(cellind(1), cellind(2), cellind(3))
297 rs_descs, dft_control, cube_info, gridlevel_info, cindex, &
298 iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, &
299 set_radius_a, set_radius_b, ra, rab, &
300 la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
307 rs_descs=rs_descs, ntasks=task_list%ntasks, natoms=natoms, &
308 tasks=task_list%tasks, atom_pair_send=task_list%atom_pair_send, &
309 atom_pair_recv=task_list%atom_pair_recv, symmetric=.true., &
310 reorder_rs_grid_ranks=reorder_rs_grid_ranks, &
311 skip_load_balance_distributed=skip_load_balance_distributed)
314 ALLOCATE (nsgf(natoms))
315 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_list, nsgf=nsgf)
316 IF (
ASSOCIATED(task_list%atom_pair_send))
THEN
318 CALL rs_calc_offsets(pairs=task_list%atom_pair_send, &
320 group_size=rs_descs(1)%rs_desc%group_size, &
321 pair_offsets=task_list%pair_offsets_send, &
322 rank_offsets=task_list%rank_offsets_send, &
323 rank_sizes=task_list%rank_sizes_send, &
324 buffer_size=task_list%buffer_size_send)
326 CALL rs_calc_offsets(pairs=task_list%atom_pair_recv, &
328 group_size=rs_descs(1)%rs_desc%group_size, &
329 pair_offsets=task_list%pair_offsets_recv, &
330 rank_offsets=task_list%rank_offsets_recv, &
331 rank_sizes=task_list%rank_sizes_recv, &
332 buffer_size=task_list%buffer_size_recv)
333 DEALLOCATE (basis_set_list, nsgf)
336 IF (reorder_rs_grid_ranks)
THEN
337 DO i = 1, gridlevel_info%ngrid_levels
338 IF (rs_descs(i)%rs_desc%distributed)
THEN
345 CALL create_grid_task_list(task_list=task_list, &
346 qs_kind_set=qs_kind_set, &
347 particle_set=particle_set, &
349 basis_type=my_basis_type, &
355 IF (
ASSOCIATED(task_list%taskstart))
THEN
356 DEALLOCATE (task_list%taskstart)
358 IF (
ASSOCIATED(task_list%taskstop))
THEN
359 DEALLOCATE (task_list%taskstop)
361 IF (
ASSOCIATED(task_list%npairs))
THEN
362 DEALLOCATE (task_list%npairs)
367 ALLOCATE (task_list%npairs(
SIZE(rs_descs)))
369 iatom_old = -1; jatom_old = -1; igrid_level_old = -1; img_old = -1
373 DO i = 1, task_list%ntasks
374 igrid_level = task_list%tasks(i)%grid_level
375 img = task_list%tasks(i)%image
376 iatom = task_list%tasks(i)%iatom
377 jatom = task_list%tasks(i)%jatom
378 iset = task_list%tasks(i)%iset
379 jset = task_list%tasks(i)%jset
380 ipgf = task_list%tasks(i)%ipgf
381 jpgf = task_list%tasks(i)%jpgf
382 IF (igrid_level .NE. igrid_level_old)
THEN
383 IF (igrid_level_old .NE. -1)
THEN
384 task_list%npairs(igrid_level_old) = ipair
387 igrid_level_old = igrid_level
391 ELSE IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old .OR. img .NE. img_old)
THEN
399 IF (task_list%ntasks /= 0)
THEN
400 task_list%npairs(igrid_level) = ipair
407 ALLOCATE (task_list%taskstart(maxval(task_list%npairs),
SIZE(rs_descs)))
408 ALLOCATE (task_list%taskstop(maxval(task_list%npairs),
SIZE(rs_descs)))
410 iatom_old = -1; jatom_old = -1; igrid_level_old = -1; img_old = -1
412 task_list%taskstart = 0
413 task_list%taskstop = 0
415 DO i = 1, task_list%ntasks
416 igrid_level = task_list%tasks(i)%grid_level
417 img = task_list%tasks(i)%image
418 iatom = task_list%tasks(i)%iatom
419 jatom = task_list%tasks(i)%jatom
420 iset = task_list%tasks(i)%iset
421 jset = task_list%tasks(i)%jset
422 ipgf = task_list%tasks(i)%ipgf
423 jpgf = task_list%tasks(i)%jpgf
424 IF (igrid_level .NE. igrid_level_old)
THEN
425 IF (igrid_level_old .NE. -1)
THEN
426 task_list%taskstop(ipair, igrid_level_old) = i - 1
429 task_list%taskstart(ipair, igrid_level) = i
430 igrid_level_old = igrid_level
434 ELSE IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old .OR. img .NE. img_old)
THEN
436 task_list%taskstart(ipair, igrid_level) = i
437 task_list%taskstop(ipair - 1, igrid_level) = i - 1
444 IF (task_list%ntasks /= 0)
THEN
445 task_list%taskstop(ipair, igrid_level) = task_list%ntasks
449 IF (debug_this_module)
THEN
450 tasks => task_list%tasks
452 WRITE (6, *)
"Total number of tasks ", task_list%ntasks
453 DO igrid_level = 1, gridlevel_info%ngrid_levels
454 WRITE (6, *)
"Total number of pairs(grid_level) ", &
455 igrid_level, task_list%npairs(igrid_level)
459 DO igrid_level = 1, gridlevel_info%ngrid_levels
461 ALLOCATE (blocks(natoms, natoms, nimages))
463 DO ipair = 1, task_list%npairs(igrid_level)
464 itask = task_list%taskstart(ipair, igrid_level)
465 ilevel = task_list%tasks(itask)%grid_level
466 img = task_list%tasks(itask)%image
467 iatom = task_list%tasks(itask)%iatom
468 jatom = task_list%tasks(itask)%jatom
469 iset = task_list%tasks(itask)%iset
470 jset = task_list%tasks(itask)%jset
471 ipgf = task_list%tasks(itask)%ipgf
472 jpgf = task_list%tasks(itask)%jpgf
473 IF (blocks(iatom, jatom, img) == -1 .AND. blocks(jatom, iatom, img) == -1)
THEN
474 blocks(iatom, jatom, img) = 1
475 blocks(jatom, iatom, img) = 1
477 WRITE (6, *)
"TASK LIST CONFLICT IN PAIR ", ipair
478 WRITE (6, *)
"Reuse of iatom, jatom, image ", iatom, jatom, img
484 DO itask = task_list%taskstart(ipair, igrid_level), task_list%taskstop(ipair, igrid_level)
485 ilevel = task_list%tasks(itask)%grid_level
486 img = task_list%tasks(itask)%image
487 iatom = task_list%tasks(itask)%iatom
488 jatom = task_list%tasks(itask)%jatom
489 iset = task_list%tasks(itask)%iset
490 jset = task_list%tasks(itask)%jset
491 ipgf = task_list%tasks(itask)%ipgf
492 jpgf = task_list%tasks(itask)%jpgf
493 IF (iatom /= iatom_old .OR. jatom /= jatom_old .OR. img /= img_old)
THEN
494 WRITE (6, *)
"TASK LIST CONFLICT IN TASK ", itask
495 WRITE (6, *)
"Inconsistent iatom, jatom, image ", iatom, jatom, img
496 WRITE (6, *)
"Should be iatom, jatom, image ", iatom_old, jatom_old, img_old
507 CALL timestop(handle)
515 SUBROUTINE create_grid_task_list(task_list, qs_kind_set, particle_set, cell, basis_type, rs_grids)
517 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
520 CHARACTER(LEN=default_string_length) :: basis_type
524 INTEGER :: nset, natoms, nkinds, ntasks, &
525 ikind, iatom, itask, nsgf
526 INTEGER,
DIMENSION(:),
ALLOCATABLE :: atom_kinds, level_list, iatom_list, jatom_list, &
527 iset_list, jset_list, ipgf_list, jpgf_list, &
528 border_mask_list, block_num_list
529 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: radius_list
530 REAL(kind=
dp),
DIMENSION(:, :),
ALLOCATABLE :: rab_list, atom_positions
531 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
532 INTEGER,
DIMENSION(:, :),
POINTER :: first_sgf
533 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: sphi, zet
534 INTEGER,
DIMENSION(:),
POINTER :: lmax, lmin, npgf, nsgf_set
536 nkinds =
SIZE(qs_kind_set)
537 natoms =
SIZE(particle_set)
538 ntasks = task_list%ntasks
539 tasks => task_list%tasks
541 IF (.NOT.
ASSOCIATED(task_list%grid_basis_sets))
THEN
543 ALLOCATE (task_list%grid_basis_sets(nkinds))
545 CALL get_qs_kind(qs_kind_set(ikind), basis_type=basis_type, basis_set=orb_basis_set)
551 first_sgf=first_sgf, &
558 maxco=
SIZE(sphi, 1), &
559 maxpgf=
SIZE(zet, 1), &
564 first_sgf=first_sgf, &
567 basis_set=task_list%grid_basis_sets(ikind))
572 ALLOCATE (atom_kinds(natoms), atom_positions(3, natoms))
574 atom_kinds(iatom) = particle_set(iatom)%atomic_kind%kind_number
575 atom_positions(:, iatom) =
pbc(particle_set(iatom)%r, cell)
578 ALLOCATE (level_list(ntasks), iatom_list(ntasks), jatom_list(ntasks))
579 ALLOCATE (iset_list(ntasks), jset_list(ntasks), ipgf_list(ntasks), jpgf_list(ntasks))
580 ALLOCATE (border_mask_list(ntasks), block_num_list(ntasks))
581 ALLOCATE (radius_list(ntasks), rab_list(3, ntasks))
584 level_list(itask) = tasks(itask)%grid_level
585 iatom_list(itask) = tasks(itask)%iatom
586 jatom_list(itask) = tasks(itask)%jatom
587 iset_list(itask) = tasks(itask)%iset
588 jset_list(itask) = tasks(itask)%jset
589 ipgf_list(itask) = tasks(itask)%ipgf
590 jpgf_list(itask) = tasks(itask)%jpgf
591 IF (tasks(itask)%dist_type == 2)
THEN
592 border_mask_list(itask) = iand(63, not(tasks(itask)%subpatch_pattern))
594 border_mask_list(itask) = 0
596 block_num_list(itask) = tasks(itask)%pair_index
597 radius_list(itask) = tasks(itask)%radius
598 rab_list(:, itask) = tasks(itask)%rab(:)
604 nblocks=
SIZE(task_list%pair_offsets_recv), &
605 block_offsets=task_list%pair_offsets_recv, &
606 atom_positions=atom_positions, &
607 atom_kinds=atom_kinds, &
608 basis_sets=task_list%grid_basis_sets, &
609 level_list=level_list, &
610 iatom_list=iatom_list, &
611 jatom_list=jatom_list, &
612 iset_list=iset_list, &
613 jset_list=jset_list, &
614 ipgf_list=ipgf_list, &
615 jpgf_list=jpgf_list, &
616 border_mask_list=border_mask_list, &
617 block_num_list=block_num_list, &
618 radius_list=radius_list, &
621 task_list=task_list%grid_task_list)
626 END SUBROUTINE create_grid_task_list
661 cube_info, gridlevel_info, cindex, &
662 iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, set_radius_a, set_radius_b, ra, rab, &
663 la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
665 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
666 INTEGER :: ntasks, curr_tasks
672 INTEGER :: cindex, iatom, jatom
673 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: rpgfa, rpgfb, zeta, zetb
674 REAL(kind=
dp) :: kind_radius_b
675 REAL(kind=
dp),
DIMENSION(:),
POINTER :: set_radius_a, set_radius_b
676 REAL(kind=
dp),
DIMENSION(3) :: ra, rab
677 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, npgfa, &
679 INTEGER :: nseta, nsetb
681 INTEGER :: cube_center(3), igrid_level, ipgf, iset, &
682 jpgf, jset, lb_cube(3), ub_cube(3)
683 REAL(kind=
dp) :: dab, rab2, radius, zetp
685 rab2 = rab(1)*rab(1) + rab(2)*rab(2) + rab(3)*rab(3)
688 loop_iset:
DO iset = 1, nseta
690 IF (set_radius_a(iset) + kind_radius_b < dab) cycle
692 loop_jset:
DO jset = 1, nsetb
694 IF (set_radius_a(iset) + set_radius_b(jset) < dab) cycle
696 loop_ipgf:
DO ipgf = 1, npgfa(iset)
698 IF (rpgfa(ipgf, iset) + set_radius_b(jset) < dab) cycle
700 loop_jpgf:
DO jpgf = 1, npgfb(jset)
702 IF (rpgfa(ipgf, iset) + rpgfb(jpgf, jset) < dab) cycle
704 zetp = zeta(ipgf, iset) + zetb(jpgf, jset)
707 CALL compute_pgf_properties(cube_center, lb_cube, ub_cube, radius, &
708 rs_descs(igrid_level)%rs_desc, cube_info(igrid_level), &
709 la_max(iset), zeta(ipgf, iset), la_min(iset), &
710 lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
711 ra, rab, rab2, dft_control%qs_control%eps_rho_rspace)
713 CALL pgf_to_tasks(tasks, ntasks, curr_tasks, &
714 rab, cindex, iatom, jatom, iset, jset, ipgf, jpgf, &
715 la_max(iset), lb_max(jset), rs_descs(igrid_level)%rs_desc, &
716 igrid_level, gridlevel_info%ngrid_levels, cube_center, &
717 lb_cube, ub_cube, radius)
763 SUBROUTINE compute_pgf_properties(cube_center, lb_cube, ub_cube, radius, &
764 rs_desc, cube_info, la_max, zeta, la_min, lb_max, zetb, lb_min, ra, rab, rab2, eps)
766 INTEGER,
DIMENSION(3),
INTENT(OUT) :: cube_center, lb_cube, ub_cube
767 REAL(kind=
dp),
INTENT(OUT) :: radius
770 INTEGER,
INTENT(IN) :: la_max
771 REAL(kind=
dp),
INTENT(IN) :: zeta
772 INTEGER,
INTENT(IN) :: la_min, lb_max
773 REAL(kind=
dp),
INTENT(IN) :: zetb
774 INTEGER,
INTENT(IN) :: lb_min
775 REAL(kind=
dp),
INTENT(IN) :: ra(3), rab(3), rab2, eps
778 INTEGER,
DIMENSION(:),
POINTER :: sphere_bounds
779 REAL(kind=
dp) :: cutoff, f, prefactor, rb(3), zetp
780 REAL(kind=
dp),
DIMENSION(3) :: rp
785 rp(:) = ra(:) + zetb/zetp*rab(:)
786 rb(:) = ra(:) + rab(:)
789 prefactor = exp(-zeta*f*rab2)
791 zetp=zetp, eps=eps, prefactor=prefactor, cutoff=cutoff)
795 cube_center(:) =
modulo(cube_center(:), rs_desc%npts(:))
796 cube_center(:) = cube_center(:) + rs_desc%lb(:)
798 IF (rs_desc%orthorhombic)
THEN
799 CALL return_cube(cube_info, radius, lb_cube, ub_cube, sphere_bounds)
803 extent(:) = ub_cube(:) - lb_cube(:)
804 lb_cube(:) = -extent(:)/2 - 1
805 ub_cube(:) = extent(:)/2
808 END SUBROUTINE compute_pgf_properties
824 INTEGER FUNCTION cost_model(lb_cube, ub_cube, fraction, lmax, is_ortho)
825 INTEGER,
DIMENSION(3),
INTENT(IN) :: lb_cube, ub_cube
826 REAL(kind=
dp),
INTENT(IN) :: fraction
831 REAL(kind=
dp) :: v1, v2, v3, v4, v5
833 cmax = maxval(((ub_cube - lb_cube) + 1)/2)
848 cost_model = ceiling(((lmax + v1)*(cmax + v2)**3*v3*fraction + v4 + v5*lmax**7)/1000.0_dp)
850 END FUNCTION cost_model
884 SUBROUTINE pgf_to_tasks(tasks, ntasks, curr_tasks, &
885 rab, cindex, iatom, jatom, iset, jset, ipgf, jpgf, &
886 la_max, lb_max, rs_desc, igrid_level, n_levels, &
887 cube_center, lb_cube, ub_cube, radius)
889 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
890 INTEGER,
INTENT(INOUT) :: ntasks, curr_tasks
891 REAL(kind=
dp),
DIMENSION(3),
INTENT(IN) :: rab
892 INTEGER,
INTENT(IN) :: cindex, iatom, jatom, iset, jset, ipgf, &
895 INTEGER,
INTENT(IN) :: igrid_level, n_levels
896 INTEGER,
DIMENSION(3),
INTENT(IN) :: cube_center, lb_cube, ub_cube
897 REAL(kind=
dp),
INTENT(IN) :: radius
899 INTEGER,
PARAMETER :: add_tasks = 1000
900 REAL(kind=
dp),
PARAMETER :: mult_tasks = 2.0_dp
902 INTEGER :: added_tasks, cost, j, lmax
904 REAL(kind=
dp) :: tfraction
908 IF (ntasks > curr_tasks)
THEN
909 curr_tasks = int((curr_tasks + add_tasks)*mult_tasks)
914 IF (rs_desc%distributed)
THEN
918 CALL rs_find_node(rs_desc, igrid_level, n_levels, cube_center, &
919 ntasks=ntasks, tasks=tasks, lb_cube=lb_cube, ub_cube=ub_cube, added_tasks=added_tasks)
922 tasks(ntasks)%destination = encode_rank(rs_desc%my_pos, igrid_level, n_levels)
923 tasks(ntasks)%dist_type = 0
924 tasks(ntasks)%subpatch_pattern = 0
928 lmax = la_max + lb_max
929 is_ortho = (tasks(ntasks)%dist_type == 0 .OR. tasks(ntasks)%dist_type == 1) .AND. rs_desc%orthorhombic
932 tfraction = 1.0_dp/added_tasks
934 cost = cost_model(lb_cube, ub_cube, tfraction, lmax, is_ortho)
936 DO j = 1, added_tasks
937 tasks(ntasks - added_tasks + j)%source = encode_rank(rs_desc%my_pos, igrid_level, n_levels)
938 tasks(ntasks - added_tasks + j)%cost = cost
939 tasks(ntasks - added_tasks + j)%grid_level = igrid_level
940 tasks(ntasks - added_tasks + j)%image = cindex
941 tasks(ntasks - added_tasks + j)%iatom = iatom
942 tasks(ntasks - added_tasks + j)%jatom = jatom
943 tasks(ntasks - added_tasks + j)%iset = iset
944 tasks(ntasks - added_tasks + j)%jset = jset
945 tasks(ntasks - added_tasks + j)%ipgf = ipgf
946 tasks(ntasks - added_tasks + j)%jpgf = jpgf
947 tasks(ntasks - added_tasks + j)%rab = rab
948 tasks(ntasks - added_tasks + j)%radius = radius
951 END SUBROUTINE pgf_to_tasks
963 SUBROUTINE load_balance_distributed(tasks, ntasks, rs_descs, grid_level, natoms)
965 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
969 INTEGER :: grid_level, natoms
971 CHARACTER(LEN=*),
PARAMETER :: routinen =
'load_balance_distributed'
974 INTEGER,
DIMENSION(:, :, :),
POINTER ::
list
976 CALL timeset(routinen, handle)
981 CALL create_destination_list(
list, rs_descs, grid_level)
984 CALL compute_load_list(
list, rs_descs, grid_level, tasks, ntasks, natoms, create_list=.true.)
987 CALL optimize_load_list(
list, rs_descs(1)%rs_desc%group, rs_descs(1)%rs_desc%my_pos)
990 CALL compute_load_list(
list, rs_descs, grid_level, tasks, ntasks, natoms, create_list=.false.)
994 CALL timestop(handle)
996 END SUBROUTINE load_balance_distributed
1005 SUBROUTINE balance_global_list(list_global)
1006 INTEGER,
DIMENSION(:, :, 0:) :: list_global
1008 CHARACTER(LEN=*),
PARAMETER :: routinen =
'balance_global_list'
1009 INTEGER,
PARAMETER :: max_iter = 100
1010 REAL(kind=
dp),
PARAMETER :: tolerance_factor = 0.005_dp
1012 INTEGER :: dest, handle, icpu, idest, iflux, &
1013 ilocal, k, maxdest, ncpu, nflux
1014 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: flux_connections
1015 LOGICAL :: solution_optimal
1016 REAL(kind=
dp) :: average, load_shift, max_load_shift, &
1018 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: load, optimized_flux, optimized_load
1019 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: flux_limits
1021 CALL timeset(routinen, handle)
1023 ncpu =
SIZE(list_global, 3)
1024 maxdest =
SIZE(list_global, 2)
1025 ALLOCATE (load(0:ncpu - 1))
1027 ALLOCATE (optimized_load(0:ncpu - 1))
1032 DO icpu = 0, ncpu - 1
1033 DO idest = 1, maxdest
1034 dest = list_global(1, idest, icpu)
1035 IF (dest < ncpu .AND. dest > icpu) nflux = nflux + 1
1038 ALLOCATE (optimized_flux(nflux))
1039 ALLOCATE (flux_limits(2, nflux))
1040 ALLOCATE (flux_connections(2, nflux))
1045 DO icpu = 0, ncpu - 1
1046 load(icpu) = sum(list_global(2, :, icpu))
1047 DO idest = 1, maxdest
1048 dest = list_global(1, idest, icpu)
1049 IF (dest < ncpu)
THEN
1050 IF (dest .NE. icpu)
THEN
1051 IF (dest > icpu)
THEN
1053 flux_limits(2, nflux) = list_global(2, idest, icpu)
1054 flux_connections(1, nflux) = icpu
1055 flux_connections(2, nflux) = dest
1058 IF (flux_connections(1, iflux) == dest .AND. flux_connections(2, iflux) == icpu)
THEN
1059 flux_limits(1, iflux) = -list_global(2, idest, icpu)
1069 solution_optimal = .false.
1070 optimized_flux = 0.0_dp
1077 average = sum(load)/
SIZE(load)
1078 tolerance = tolerance_factor*average
1080 optimized_load(:) = load
1082 max_load_shift = 0.0_dp
1084 load_shift = (optimized_load(flux_connections(1, iflux)) - optimized_load(flux_connections(2, iflux)))/2
1085 load_shift = max(flux_limits(1, iflux) - optimized_flux(iflux), load_shift)
1086 load_shift = min(flux_limits(2, iflux) - optimized_flux(iflux), load_shift)
1087 max_load_shift = max(abs(load_shift), max_load_shift)
1088 optimized_load(flux_connections(1, iflux)) = optimized_load(flux_connections(1, iflux)) - load_shift
1089 optimized_load(flux_connections(2, iflux)) = optimized_load(flux_connections(2, iflux)) + load_shift
1090 optimized_flux(iflux) = optimized_flux(iflux) + load_shift
1092 IF (max_load_shift < tolerance)
THEN
1093 solution_optimal = .true.
1101 DO icpu = 0, ncpu - 1
1102 DO idest = 1, maxdest
1103 IF (list_global(1, idest, icpu) == icpu) ilocal = idest
1105 DO idest = 1, maxdest
1106 dest = list_global(1, idest, icpu)
1107 IF (dest < ncpu)
THEN
1108 IF (dest .NE. icpu)
THEN
1109 IF (dest > icpu)
THEN
1111 IF (optimized_flux(nflux) > 0)
THEN
1112 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1113 list_global(2, idest, icpu) - nint(optimized_flux(nflux))
1114 list_global(2, idest, icpu) = nint(optimized_flux(nflux))
1116 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1117 list_global(2, idest, icpu)
1118 list_global(2, idest, icpu) = 0
1122 IF (flux_connections(1, iflux) == dest .AND. flux_connections(2, iflux) == icpu)
THEN
1123 IF (optimized_flux(iflux) > 0)
THEN
1124 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1125 list_global(2, idest, icpu)
1126 list_global(2, idest, icpu) = 0
1128 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1129 list_global(2, idest, icpu) + nint(optimized_flux(iflux))
1130 list_global(2, idest, icpu) = -nint(optimized_flux(iflux))
1141 CALL timestop(handle)
1143 END SUBROUTINE balance_global_list
1156 SUBROUTINE optimize_load_list(list, group, my_pos)
1157 INTEGER,
DIMENSION(:, :, 0:) ::
list
1159 INTEGER,
INTENT(IN) :: my_pos
1161 CHARACTER(LEN=*),
PARAMETER :: routinen =
'optimize_load_list'
1162 INTEGER,
PARAMETER :: rank_of_root = 0
1164 INTEGER :: handle, icpu, idest, maxdest, ncpu
1165 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: load_all
1166 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: load_partial
1167 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: list_global
1169 CALL timeset(routinen, handle)
1171 ncpu =
SIZE(
list, 3)
1172 maxdest =
SIZE(
list, 2)
1175 ALLOCATE (load_all(maxdest*ncpu))
1176 load_all(:) = reshape(
list(2, :, :), (/maxdest*ncpu/))
1177 CALL group%sum(load_all(:), rank_of_root)
1180 ALLOCATE (list_global(2, maxdest, ncpu))
1181 IF (rank_of_root .EQ. my_pos)
THEN
1182 list_global(1, :, :) =
list(1, :, :)
1183 list_global(2, :, :) = reshape(load_all, (/maxdest, ncpu/))
1184 CALL balance_global_list(list_global)
1186 CALL group%bcast(list_global, rank_of_root)
1189 ALLOCATE (load_partial(maxdest, ncpu))
1191 CALL group%sum_partial(reshape(load_all, (/maxdest, ncpu/)), load_partial(:, :))
1194 DO idest = 1, maxdest
1197 IF (load_partial(idest, icpu) > list_global(2, idest, icpu))
THEN
1198 IF (load_partial(idest, icpu) -
list(2, idest, icpu - 1) < list_global(2, idest, icpu))
THEN
1199 list(2, idest, icpu - 1) = list_global(2, idest, icpu) &
1200 - (load_partial(idest, icpu) -
list(2, idest, icpu - 1))
1202 list(2, idest, icpu - 1) = 0
1210 DEALLOCATE (load_all)
1211 DEALLOCATE (list_global)
1212 DEALLOCATE (load_partial)
1214 CALL timestop(handle)
1215 END SUBROUTINE optimize_load_list
1234 SUBROUTINE compute_load_list(list, rs_descs, grid_level, tasks, ntasks, natoms, create_list)
1235 INTEGER,
DIMENSION(:, :, 0:) ::
list
1238 INTEGER :: grid_level
1239 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1240 INTEGER :: ntasks, natoms
1241 LOGICAL :: create_list
1243 CHARACTER(LEN=*),
PARAMETER :: routinen =
'compute_load_list'
1245 INTEGER :: cost, dest, handle, i, iatom, ilevel, img, img_old, iopt, ipgf, iset, itask, &
1246 itask_start, itask_stop, jatom, jpgf, jset, li, maxdest, ncpu, ndest_pair, nopt, nshort, &
1248 INTEGER(KIND=int_8) :: bit_pattern, ipair, ipair_old, natom8
1249 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: loads
1250 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: all_dests, index
1251 INTEGER,
DIMENSION(6) :: options
1253 CALL timeset(routinen, handle)
1255 ALLOCATE (loads(0:rs_descs(grid_level)%rs_desc%group_size - 1))
1256 CALL get_current_loads(loads, rs_descs, grid_level, ntasks, tasks, use_reordered_ranks=.false.)
1258 maxdest =
SIZE(
list, 2)
1259 ncpu =
SIZE(
list, 3)
1264 ipair_old = huge(ipair_old)
1266 ALLOCATE (all_dests(0))
1272 itask_start = itask_stop + 1
1273 itask_stop = itask_start
1274 IF (itask_stop > ntasks)
EXIT
1275 ilevel = tasks(itask_stop)%grid_level
1276 img_old = tasks(itask_stop)%image
1277 iatom = tasks(itask_stop)%iatom
1278 jatom = tasks(itask_stop)%jatom
1279 iset = tasks(itask_stop)%iset
1280 jset = tasks(itask_stop)%jset
1281 ipgf = tasks(itask_stop)%ipgf
1282 jpgf = tasks(itask_stop)%jpgf
1284 ipair_old = (iatom - 1)*natom8 + (jatom - 1)
1286 IF (itask_stop + 1 > ntasks)
EXIT
1287 ilevel = tasks(itask_stop + 1)%grid_level
1288 img = tasks(itask_stop + 1)%image
1289 iatom = tasks(itask_stop + 1)%iatom
1290 jatom = tasks(itask_stop + 1)%jatom
1291 iset = tasks(itask_stop + 1)%iset
1292 jset = tasks(itask_stop + 1)%jset
1293 ipgf = tasks(itask_stop + 1)%ipgf
1294 jpgf = tasks(itask_stop + 1)%jpgf
1296 ipair = (iatom - 1)*natom8 + (jatom - 1)
1297 IF (ipair == ipair_old .AND. img == img_old)
THEN
1298 itask_stop = itask_stop + 1
1304 nshort = itask_stop - itask_start + 1
1307 DEALLOCATE (all_dests)
1308 ALLOCATE (all_dests(nshort))
1310 ALLOCATE (index(nshort))
1312 ilevel = tasks(itask_start + i - 1)%grid_level
1313 img = tasks(itask_start + i - 1)%image
1314 iatom = tasks(itask_start + i - 1)%iatom
1315 jatom = tasks(itask_start + i - 1)%jatom
1316 iset = tasks(itask_start + i - 1)%iset
1317 jset = tasks(itask_start + i - 1)%jset
1318 ipgf = tasks(itask_start + i - 1)%ipgf
1319 jpgf = tasks(itask_start + i - 1)%jpgf
1321 IF (ilevel .EQ. grid_level)
THEN
1322 all_dests(i) = decode_rank(tasks(itask_start + i - 1)%destination,
SIZE(rs_descs))
1324 all_dests(i) = huge(all_dests(i))
1327 CALL sort(all_dests, nshort, index)
1330 IF ((all_dests(ndest_pair) .NE. all_dests(i)) .AND. (all_dests(i) .NE. huge(all_dests(i))))
THEN
1331 ndest_pair = ndest_pair + 1
1332 all_dests(ndest_pair) = all_dests(i)
1336 DO itask = itask_start, itask_stop
1338 dest = decode_rank(tasks(itask)%destination,
SIZE(rs_descs))
1339 ilevel = tasks(itask)%grid_level
1340 img = tasks(itask)%image
1341 iatom = tasks(itask)%iatom
1342 jatom = tasks(itask)%jatom
1343 iset = tasks(itask)%iset
1344 jset = tasks(itask)%jset
1345 ipgf = tasks(itask)%ipgf
1346 jpgf = tasks(itask)%jpgf
1349 IF (ilevel .NE. grid_level) cycle
1350 ipair = (iatom - 1)*natom8 + (jatom - 1)
1351 cost = int(tasks(itask)%cost)
1353 SELECT CASE (tasks(itask)%dist_type)
1355 bit_pattern = tasks(itask)%subpatch_pattern
1357 IF (btest(bit_pattern, 0))
THEN
1359 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1361 options(nopt) = rank
1364 IF (btest(bit_pattern, 1))
THEN
1366 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1368 options(nopt) = rank
1371 IF (btest(bit_pattern, 2))
THEN
1373 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1375 options(nopt) = rank
1378 IF (btest(bit_pattern, 3))
THEN
1380 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1382 options(nopt) = rank
1385 IF (btest(bit_pattern, 4))
THEN
1387 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1389 options(nopt) = rank
1392 IF (btest(bit_pattern, 5))
THEN
1394 IF (any(all_dests(1:ndest_pair) .EQ. rank))
THEN
1396 options(nopt) = rank
1403 IF (loads(rank) > loads(options(iopt))) rank = options(iopt)
1408 li = list_index(
list, rank, dest)
1409 IF (create_list)
THEN
1410 list(2, li, dest) =
list(2, li, dest) + cost
1412 IF (
list(1, li, dest) == dest)
THEN
1413 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1415 IF (
list(2, li, dest) >= cost)
THEN
1416 list(2, li, dest) =
list(2, li, dest) - cost
1417 tasks(itask)%destination = encode_rank(
list(1, li, dest), ilevel,
SIZE(rs_descs))
1419 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1424 li = list_index(
list, dest, dest)
1425 IF (create_list)
THEN
1426 list(2, li, dest) =
list(2, li, dest) + cost
1428 IF (
list(1, li, dest) == dest)
THEN
1429 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1431 IF (
list(2, li, dest) >= cost)
THEN
1432 list(2, li, dest) =
list(2, li, dest) - cost
1433 tasks(itask)%destination = encode_rank(
list(1, li, dest), ilevel,
SIZE(rs_descs))
1435 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1447 CALL timestop(handle)
1449 END SUBROUTINE compute_load_list
1460 INTEGER FUNCTION list_index(list, rank, dest)
1461 INTEGER,
DIMENSION(:, :, 0:),
INTENT(IN) ::
list
1462 INTEGER,
INTENT(IN) :: rank, dest
1466 IF (
list(1, list_index, dest) == rank)
EXIT
1467 list_index = list_index + 1
1469 END FUNCTION list_index
1480 SUBROUTINE create_destination_list(list, rs_descs, grid_level)
1481 INTEGER,
DIMENSION(:, :, :),
POINTER ::
list
1484 INTEGER,
INTENT(IN) :: grid_level
1486 CHARACTER(LEN=*),
PARAMETER :: routinen =
'create_destination_list'
1488 INTEGER :: handle, i, icpu, j, maxcount, ncpu, &
1490 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: index, sublist
1492 CALL timeset(routinen, handle)
1494 cpassert(.NOT.
ASSOCIATED(
list))
1495 ncpu = rs_descs(grid_level)%rs_desc%group_size
1498 ALLOCATE (
list(2, ultimate_max, 0:ncpu - 1))
1500 ALLOCATE (index(ultimate_max))
1501 ALLOCATE (sublist(ultimate_max))
1502 sublist = huge(sublist)
1505 DO icpu = 0, ncpu - 1
1514 CALL sort(sublist, ultimate_max, index)
1517 IF (sublist(i) .NE. sublist(j))
THEN
1519 sublist(j) = sublist(i)
1522 maxcount = max(maxcount, j)
1523 sublist(j + 1:ultimate_max) = huge(sublist)
1524 list(1, :, icpu) = sublist
1525 list(2, :, icpu) = 0
1530 CALL timestop(handle)
1532 END SUBROUTINE create_destination_list
1548 SUBROUTINE get_current_loads(loads, rs_descs, grid_level, ntasks, tasks, use_reordered_ranks)
1549 INTEGER(KIND=int_8),
DIMENSION(:) :: loads
1552 INTEGER :: grid_level, ntasks
1553 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1554 LOGICAL,
INTENT(IN) :: use_reordered_ranks
1556 CHARACTER(LEN=*),
PARAMETER :: routinen =
'get_current_loads'
1558 INTEGER :: handle, i, iatom, ilevel, img, ipgf, &
1559 iset, jatom, jpgf, jset
1560 INTEGER(KIND=int_8) :: total_cost_local
1561 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: recv_buf_i, send_buf_i
1564 CALL timeset(routinen, handle)
1566 desc => rs_descs(grid_level)%rs_desc
1569 ALLOCATE (send_buf_i(desc%group_size))
1570 ALLOCATE (recv_buf_i(desc%group_size))
1578 ilevel = tasks(i)%grid_level
1579 img = tasks(i)%image
1580 iatom = tasks(i)%iatom
1581 jatom = tasks(i)%jatom
1582 iset = tasks(i)%iset
1583 jset = tasks(i)%jset
1584 ipgf = tasks(i)%ipgf
1585 jpgf = tasks(i)%jpgf
1586 IF (ilevel .NE. grid_level) cycle
1587 IF (use_reordered_ranks)
THEN
1588 send_buf_i(rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs))) + 1) = &
1589 send_buf_i(rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs))) + 1) &
1592 send_buf_i(decode_rank(tasks(i)%destination,
SIZE(rs_descs)) + 1) = &
1593 send_buf_i(decode_rank(tasks(i)%destination,
SIZE(rs_descs)) + 1) &
1597 CALL desc%group%alltoall(send_buf_i, recv_buf_i, 1)
1600 total_cost_local = sum(recv_buf_i)
1603 CALL desc%group%allgather(total_cost_local, loads)
1605 CALL timestop(handle)
1607 END SUBROUTINE get_current_loads
1619 SUBROUTINE load_balance_replicated(rs_descs, ntasks, tasks)
1624 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1626 CHARACTER(LEN=*),
PARAMETER :: routinen =
'load_balance_replicated'
1628 INTEGER :: handle, i, iatom, ilevel, img, ipgf, &
1629 iset, j, jatom, jpgf, jset, &
1630 no_overloaded, no_underloaded, &
1632 INTEGER(KIND=int_8) :: average_cost, cost_task_rep, count, &
1633 offset, total_cost_global
1634 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: load_imbalance, loads, recv_buf_i
1635 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: index
1638 CALL timeset(routinen, handle)
1640 desc => rs_descs(1)%rs_desc
1643 ALLOCATE (recv_buf_i(desc%group_size))
1644 ALLOCATE (loads(desc%group_size))
1647 DO i = 1,
SIZE(rs_descs)
1648 CALL get_current_loads(loads, rs_descs, i, ntasks, tasks, use_reordered_ranks=.true.)
1649 recv_buf_i(:) = recv_buf_i + loads
1652 total_cost_global = sum(recv_buf_i)
1653 average_cost = total_cost_global/desc%group_size
1661 ALLOCATE (load_imbalance(desc%group_size))
1662 ALLOCATE (index(desc%group_size))
1664 load_imbalance(:) = recv_buf_i - average_cost
1668 DO i = 1, desc%group_size
1669 IF (load_imbalance(i) .GT. 0) no_overloaded = no_overloaded + 1
1670 IF (load_imbalance(i) .LT. 0) no_underloaded = no_underloaded + 1
1675 CALL sort(recv_buf_i,
SIZE(recv_buf_i), index)
1681 IF (tasks(i)%dist_type .EQ. 0 &
1682 .AND. decode_rank(tasks(i)%destination,
SIZE(rs_descs)) == decode_rank(tasks(i)%source,
SIZE(rs_descs)))
THEN
1683 cost_task_rep = cost_task_rep + tasks(i)%cost
1689 CALL desc%group%allgather(cost_task_rep, recv_buf_i)
1691 DO i = 1, desc%group_size
1693 IF (load_imbalance(i) .GT. 0) &
1694 load_imbalance(i) = min(load_imbalance(i), recv_buf_i(i))
1703 IF (load_imbalance(desc%my_pos + 1) .GT. 0)
THEN
1709 DO i = desc%group_size, desc%group_size - no_overloaded + 1, -1
1710 IF (index(i) .EQ. desc%my_pos + 1)
THEN
1713 offset = offset + load_imbalance(index(i))
1718 proc_receiving = huge(proc_receiving)
1719 DO i = 1, no_underloaded
1720 offset = offset + load_imbalance(index(i))
1721 IF (offset .LE. 0)
THEN
1731 IF (tasks(j)%dist_type .EQ. 0 &
1732 .AND. decode_rank(tasks(j)%destination,
SIZE(rs_descs)) == decode_rank(tasks(j)%source,
SIZE(rs_descs)))
THEN
1735 IF (proc_receiving .GT. no_underloaded)
EXIT
1737 ilevel = tasks(j)%grid_level
1738 img = tasks(j)%image
1739 iatom = tasks(j)%iatom
1740 jatom = tasks(j)%jatom
1741 iset = tasks(j)%iset
1742 jset = tasks(j)%jset
1743 ipgf = tasks(j)%ipgf
1744 jpgf = tasks(j)%jpgf
1745 tasks(j)%destination = encode_rank(index(proc_receiving) - 1, ilevel,
SIZE(rs_descs))
1746 offset = offset + tasks(j)%cost
1747 count = count + tasks(j)%cost
1748 IF (count .GE. load_imbalance(desc%my_pos + 1))
EXIT
1749 IF (offset .GT. 0)
THEN
1750 proc_receiving = proc_receiving + 1
1753 IF (proc_receiving .GT. no_underloaded)
EXIT
1754 offset = load_imbalance(index(proc_receiving))
1761 DEALLOCATE (load_imbalance)
1763 CALL timestop(handle)
1765 END SUBROUTINE load_balance_replicated
1779 SUBROUTINE create_local_tasks(rs_descs, ntasks, tasks, ntasks_recv, tasks_recv)
1784 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1785 INTEGER :: ntasks_recv
1786 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks_recv
1788 CHARACTER(LEN=*),
PARAMETER :: routinen =
'create_local_tasks'
1790 INTEGER :: handle, i, j, k, l, rank
1791 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: recv_buf, send_buf
1792 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: recv_disps, recv_sizes, send_disps, &
1796 CALL timeset(routinen, handle)
1798 desc => rs_descs(1)%rs_desc
1801 ALLOCATE (send_sizes(desc%group_size))
1802 ALLOCATE (recv_sizes(desc%group_size))
1803 ALLOCATE (send_disps(desc%group_size))
1804 ALLOCATE (recv_disps(desc%group_size))
1805 ALLOCATE (send_buf(desc%group_size))
1806 ALLOCATE (recv_buf(desc%group_size))
1811 rank = rs_descs(decode_level(tasks(i)%destination,
SIZE(rs_descs))) &
1812 %rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs)))
1813 send_buf(rank + 1) = send_buf(rank + 1) + 1
1816 CALL desc%group%alltoall(send_buf, recv_buf, 1)
1827 DO i = 2, desc%group_size
1830 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
1831 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
1835 DEALLOCATE (send_buf)
1836 DEALLOCATE (recv_buf)
1839 ALLOCATE (send_buf(sum(send_sizes)))
1840 ALLOCATE (recv_buf(sum(recv_sizes)))
1846 i = rs_descs(decode_level(tasks(j)%destination,
SIZE(rs_descs))) &
1847 %rs_desc%virtual2real(decode_rank(tasks(j)%destination,
SIZE(rs_descs))) + 1
1848 l = send_disps(i) + send_sizes(i)
1854 CALL desc%group%alltoall(send_buf, send_sizes, send_disps, recv_buf, recv_sizes, recv_disps)
1856 DEALLOCATE (send_buf)
1859 ALLOCATE (tasks_recv(ntasks_recv))
1863 DO i = 1, desc%group_size
1871 DEALLOCATE (recv_buf)
1872 DEALLOCATE (send_sizes)
1873 DEALLOCATE (recv_sizes)
1874 DEALLOCATE (send_disps)
1875 DEALLOCATE (recv_disps)
1877 CALL timestop(handle)
1879 END SUBROUTINE create_local_tasks
1899 tasks, atom_pair_send, atom_pair_recv, &
1900 symmetric, reorder_rs_grid_ranks, skip_load_balance_distributed)
1904 INTEGER :: ntasks, natoms
1905 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1906 TYPE(
atom_pair_type),
DIMENSION(:),
POINTER :: atom_pair_send, atom_pair_recv
1907 LOGICAL,
INTENT(IN) :: symmetric, reorder_rs_grid_ranks, &
1908 skip_load_balance_distributed
1910 CHARACTER(LEN=*),
PARAMETER :: routinen =
'distribute_tasks'
1912 INTEGER :: handle, igrid_level, irank, ntasks_recv
1913 INTEGER(KIND=int_8) :: load_gap, max_load, replicated_load
1914 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: total_loads, total_loads_tmp, trial_loads
1915 INTEGER(KIND=int_8),
DIMENSION(:, :),
POINTER :: loads
1916 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: indices, real2virtual, total_index
1917 LOGICAL :: distributed_grids, fixed_first_grid
1919 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks_recv
1921 CALL timeset(routinen, handle)
1923 cpassert(
ASSOCIATED(tasks))
1926 distributed_grids = .false.
1927 DO igrid_level = 1,
SIZE(rs_descs)
1928 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1929 distributed_grids = .true.
1932 desc => rs_descs(1)%rs_desc
1934 IF (distributed_grids)
THEN
1936 ALLOCATE (loads(0:desc%group_size - 1,
SIZE(rs_descs)))
1937 ALLOCATE (total_loads(0:desc%group_size - 1))
1943 DO igrid_level = 1,
SIZE(rs_descs)
1944 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1946 IF (.NOT. skip_load_balance_distributed) &
1947 CALL load_balance_distributed(tasks, ntasks, rs_descs, igrid_level, natoms)
1949 CALL get_current_loads(loads(:, igrid_level), rs_descs, igrid_level, ntasks, &
1950 tasks, use_reordered_ranks=.false.)
1952 total_loads(:) = total_loads + loads(:, igrid_level)
1961 DO igrid_level = 1,
SIZE(rs_descs)
1962 IF (.NOT. rs_descs(igrid_level)%rs_desc%distributed)
THEN
1963 CALL get_current_loads(loads(:, igrid_level), rs_descs, igrid_level, ntasks, &
1964 tasks, use_reordered_ranks=.false.)
1965 replicated_load = replicated_load + sum(loads(:, igrid_level))
1975 IF (reorder_rs_grid_ranks)
THEN
1976 fixed_first_grid = .false.
1977 DO igrid_level = 1,
SIZE(rs_descs)
1978 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1979 IF (fixed_first_grid .EQV. .false.)
THEN
1980 total_loads(:) = loads(:, igrid_level)
1981 fixed_first_grid = .true.
1983 ALLOCATE (trial_loads(0:desc%group_size - 1))
1985 trial_loads(:) = total_loads + loads(:, igrid_level)
1986 max_load = maxval(trial_loads)
1988 DO irank = 0, desc%group_size - 1
1989 load_gap = load_gap + max_load - trial_loads(irank)
1994 IF (load_gap > replicated_load*1.05_dp)
THEN
1996 ALLOCATE (indices(0:desc%group_size - 1))
1997 ALLOCATE (total_index(0:desc%group_size - 1))
1998 ALLOCATE (total_loads_tmp(0:desc%group_size - 1))
1999 ALLOCATE (real2virtual(0:desc%group_size - 1))
2001 total_loads_tmp(:) = total_loads
2002 CALL sort(total_loads_tmp, desc%group_size, total_index)
2003 CALL sort(loads(:, igrid_level), desc%group_size, indices)
2007 DO irank = 0, desc%group_size - 1
2008 total_loads(total_index(irank) - 1) = total_loads(total_index(irank) - 1) + &
2009 loads(desc%group_size - irank - 1, igrid_level)
2010 real2virtual(total_index(irank) - 1) = indices(desc%group_size - irank - 1) - 1
2015 DEALLOCATE (indices)
2016 DEALLOCATE (total_index)
2017 DEALLOCATE (total_loads_tmp)
2018 DEALLOCATE (real2virtual)
2020 total_loads(:) = trial_loads
2023 DEALLOCATE (trial_loads)
2031 CALL load_balance_replicated(rs_descs, ntasks, tasks)
2049 CALL create_local_tasks(rs_descs, ntasks, tasks, ntasks_recv, tasks_recv)
2055 CALL get_atom_pair(atom_pair_send, tasks, ntasks=ntasks, send=.true., symmetric=symmetric, rs_descs=rs_descs)
2064 CALL get_atom_pair(atom_pair_recv, tasks_recv, ntasks=ntasks_recv, send=.false., symmetric=symmetric, rs_descs=rs_descs)
2069 DEALLOCATE (total_loads)
2073 ntasks_recv = ntasks
2074 CALL get_atom_pair(atom_pair_recv, tasks_recv, ntasks=ntasks_recv, send=.false., symmetric=symmetric, rs_descs=rs_descs)
2079 ALLOCATE (indices(ntasks_recv))
2080 CALL tasks_sort(tasks_recv, ntasks_recv, indices)
2081 DEALLOCATE (indices)
2088 ntasks = ntasks_recv
2090 CALL timestop(handle)
2104 SUBROUTINE get_atom_pair(atom_pair, tasks, ntasks, send, symmetric, rs_descs)
2107 TYPE(
task_type),
DIMENSION(:),
INTENT(INOUT) :: tasks
2108 INTEGER,
INTENT(IN) :: ntasks
2109 LOGICAL,
INTENT(IN) :: send, symmetric
2112 INTEGER :: i, ilevel, iatom, jatom, npairs, virt_rank
2113 INTEGER,
DIMENSION(:),
ALLOCATABLE :: indices
2116 cpassert(.NOT.
ASSOCIATED(atom_pair))
2117 IF (ntasks == 0)
THEN
2118 ALLOCATE (atom_pair(0))
2124 ALLOCATE (atom_pair_tmp(ntasks))
2126 atom_pair_tmp(i)%image = tasks(i)%image
2127 iatom = tasks(i)%iatom
2128 jatom = tasks(i)%jatom
2129 IF (symmetric .AND. iatom > jatom)
THEN
2131 atom_pair_tmp(i)%row = jatom
2132 atom_pair_tmp(i)%col = iatom
2134 atom_pair_tmp(i)%row = iatom
2135 atom_pair_tmp(i)%col = jatom
2141 ilevel = tasks(i)%grid_level
2142 virt_rank = decode_rank(tasks(i)%destination,
SIZE(rs_descs))
2143 atom_pair_tmp(i)%rank = rs_descs(ilevel)%rs_desc%virtual2real(virt_rank)
2147 atom_pair_tmp(i)%rank = decode_rank(tasks(i)%source,
SIZE(rs_descs))
2152 ALLOCATE (indices(ntasks))
2153 CALL atom_pair_sort(atom_pair_tmp, ntasks, indices)
2155 tasks(indices(1))%pair_index = 1
2157 IF (atom_pair_less_than(atom_pair_tmp(i - 1), atom_pair_tmp(i)))
THEN
2159 atom_pair_tmp(npairs) = atom_pair_tmp(i)
2161 tasks(indices(i))%pair_index = npairs
2163 DEALLOCATE (indices)
2166 ALLOCATE (atom_pair(npairs))
2167 atom_pair(:) = atom_pair_tmp(:npairs)
2168 DEALLOCATE (atom_pair_tmp)
2170 END SUBROUTINE get_atom_pair
2186 nimages, scatter, hmats)
2190 TYPE(dbcsr_p_type),
DIMENSION(:),
POINTER :: pmats
2191 TYPE(
atom_pair_type),
DIMENSION(:),
POINTER :: atom_pair_send, atom_pair_recv
2194 TYPE(dbcsr_p_type),
DIMENSION(:),
OPTIONAL, &
2197 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_distribute_matrix'
2199 INTEGER :: acol, arow, handle, i, img, j, k, l, me, &
2200 nblkcols_total, nblkrows_total, ncol, &
2201 nrow, nthread, nthread_left
2202 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: first_col, first_row, last_col, last_row, recv_disps, &
2203 recv_pair_count, recv_pair_disps, recv_sizes, send_disps, send_pair_count, &
2204 send_pair_disps, send_sizes
2205 INTEGER,
DIMENSION(:),
POINTER :: col_blk_size, row_blk_size
2207 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: recv_buf_r, send_buf_r
2208 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: h_block, p_block
2209 TYPE(dbcsr_type),
POINTER :: hmat, pmat
2214 CALL timeset(routinen, handle)
2216 IF (.NOT. scatter)
THEN
2217 cpassert(
PRESENT(hmats))
2220 desc => rs_descs(1)%rs_desc
2221 me = desc%my_pos + 1
2224 ALLOCATE (send_sizes(desc%group_size))
2225 ALLOCATE (recv_sizes(desc%group_size))
2226 ALLOCATE (send_disps(desc%group_size))
2227 ALLOCATE (recv_disps(desc%group_size))
2228 ALLOCATE (send_pair_count(desc%group_size))
2229 ALLOCATE (recv_pair_count(desc%group_size))
2230 ALLOCATE (send_pair_disps(desc%group_size))
2231 ALLOCATE (recv_pair_disps(desc%group_size))
2233 pmat => pmats(1)%matrix
2234 CALL dbcsr_get_info(pmat, &
2235 row_blk_size=row_blk_size, &
2236 col_blk_size=col_blk_size, &
2237 nblkrows_total=nblkrows_total, &
2238 nblkcols_total=nblkcols_total)
2239 ALLOCATE (first_row(nblkrows_total), last_row(nblkrows_total), &
2240 first_col(nblkcols_total), last_col(nblkcols_total))
2241 CALL dbcsr_convert_sizes_to_offsets(row_blk_size, first_row, last_row)
2242 CALL dbcsr_convert_sizes_to_offsets(col_blk_size, first_col, last_col)
2247 DO i = 1,
SIZE(atom_pair_send)
2248 k = atom_pair_send(i)%rank + 1
2249 arow = atom_pair_send(i)%row
2250 acol = atom_pair_send(i)%col
2251 nrow = last_row(arow) - first_row(arow) + 1
2252 ncol = last_col(acol) - first_col(acol) + 1
2253 send_sizes(k) = send_sizes(k) + nrow*ncol
2254 send_pair_count(k) = send_pair_count(k) + 1
2259 DO i = 2, desc%group_size
2260 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
2261 send_pair_disps(i) = send_pair_disps(i - 1) + send_pair_count(i - 1)
2264 ALLOCATE (send_buf_r(sum(send_sizes)))
2270 DO i = 1,
SIZE(atom_pair_recv)
2271 k = atom_pair_recv(i)%rank + 1
2272 arow = atom_pair_recv(i)%row
2273 acol = atom_pair_recv(i)%col
2274 nrow = last_row(arow) - first_row(arow) + 1
2275 ncol = last_col(acol) - first_col(acol) + 1
2276 recv_sizes(k) = recv_sizes(k) + nrow*ncol
2277 recv_pair_count(k) = recv_pair_count(k) + 1
2282 DO i = 2, desc%group_size
2283 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
2284 recv_pair_disps(i) = recv_pair_disps(i - 1) + recv_pair_count(i - 1)
2286 ALLOCATE (recv_buf_r(sum(recv_sizes)))
2305 DO l = 1, desc%group_size
2306 IF (l .EQ. me) cycle
2308 DO i = 1, send_pair_count(l)
2309 arow = atom_pair_send(send_pair_disps(l) + i)%row
2310 acol = atom_pair_send(send_pair_disps(l) + i)%col
2311 img = atom_pair_send(send_pair_disps(l) + i)%image
2312 nrow = last_row(arow) - first_row(arow) + 1
2313 ncol = last_col(acol) - first_col(acol) + 1
2314 pmat => pmats(img)%matrix
2315 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2320 send_buf_r(send_disps(l) + send_sizes(l) + j + (k - 1)*nrow) = p_block(j, k)
2323 send_sizes(l) = send_sizes(l) + nrow*ncol
2328 IF (.NOT. scatter)
THEN
2343 CALL desc%group%alltoall(send_buf_r, send_sizes, send_disps, &
2344 recv_buf_r, recv_sizes, recv_disps)
2349 IF (.NOT. scatter)
THEN
2353 DO i = 1, send_pair_count(me)
2354 arow = atom_pair_send(send_pair_disps(me) + i)%row
2355 acol = atom_pair_send(send_pair_disps(me) + i)%col
2356 img = atom_pair_send(send_pair_disps(me) + i)%image
2357 nrow = last_row(arow) - first_row(arow) + 1
2358 ncol = last_col(acol) - first_col(acol) + 1
2359 hmat => hmats(img)%matrix
2360 pmat => pmats(img)%matrix
2361 CALL dbcsr_get_block_p(matrix=hmat, row=arow, col=acol, block=h_block, found=found)
2363 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2369 h_block(j, k) = h_block(j, k) + p_block(j, k)
2378 pmat => pmats(img)%matrix
2379 CALL dbcsr_work_create(pmat, work_mutable=.true., &
2380 nblks_guess=
SIZE(atom_pair_recv)/nthread, sizedata_guess=
SIZE(recv_buf_r)/nthread, &
2390 DO l = 1, desc%group_size
2391 IF (l .EQ. me) cycle
2393 DO i = 1, recv_pair_count(l)
2394 arow = atom_pair_recv(recv_pair_disps(l) + i)%row
2395 acol = atom_pair_recv(recv_pair_disps(l) + i)%col
2396 img = atom_pair_recv(recv_pair_disps(l) + i)%image
2397 nrow = last_row(arow) - first_row(arow) + 1
2398 ncol = last_col(acol) - first_col(acol) + 1
2399 pmat => pmats(img)%matrix
2401 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2403 IF (
PRESENT(hmats))
THEN
2404 hmat => hmats(img)%matrix
2405 CALL dbcsr_get_block_p(matrix=hmat, row=arow, col=acol, block=h_block, found=found)
2409 IF (scatter .AND. .NOT.
ASSOCIATED(p_block))
THEN
2410 CALL dbcsr_put_block(pmat, arow, acol, &
2411 block=recv_buf_r(recv_disps(l) + recv_sizes(l) + 1:recv_disps(l) + recv_sizes(l) + nrow*ncol))
2413 IF (.NOT. scatter)
THEN
2417 h_block(j, k) = h_block(j, k) + recv_buf_r(recv_disps(l) + recv_sizes(l) + j + (k - 1)*nrow)
2422 recv_sizes(l) = recv_sizes(l) + nrow*ncol
2444 pmat => pmats(img)%matrix
2445 CALL dbcsr_finalize(pmat)
2450 DEALLOCATE (send_buf_r)
2451 DEALLOCATE (recv_buf_r)
2453 DEALLOCATE (send_sizes)
2454 DEALLOCATE (recv_sizes)
2455 DEALLOCATE (send_disps)
2456 DEALLOCATE (recv_disps)
2457 DEALLOCATE (send_pair_count)
2458 DEALLOCATE (recv_pair_count)
2459 DEALLOCATE (send_pair_disps)
2460 DEALLOCATE (recv_pair_disps)
2462 DEALLOCATE (first_row, last_row, first_col, last_col)
2464 CALL timestop(handle)
2472 SUBROUTINE rs_calc_offsets(pairs, nsgf, group_size, &
2473 pair_offsets, rank_offsets, rank_sizes, buffer_size)
2475 INTEGER,
DIMENSION(:),
INTENT(IN) :: nsgf
2476 INTEGER,
INTENT(IN) :: group_size
2477 INTEGER,
DIMENSION(:),
POINTER :: pair_offsets, rank_offsets, rank_sizes
2478 INTEGER,
INTENT(INOUT) :: buffer_size
2480 INTEGER :: acol, arow, i, block_size, total_size, k, prev_k
2482 IF (
ASSOCIATED(pair_offsets))
DEALLOCATE (pair_offsets)
2483 IF (
ASSOCIATED(rank_offsets))
DEALLOCATE (rank_offsets)
2484 IF (
ASSOCIATED(rank_sizes))
DEALLOCATE (rank_sizes)
2487 ALLOCATE (pair_offsets(
SIZE(pairs)))
2489 DO i = 1,
SIZE(pairs)
2490 pair_offsets(i) = total_size
2493 block_size = nsgf(arow)*nsgf(acol)
2494 total_size = total_size + block_size
2496 buffer_size = total_size
2499 ALLOCATE (rank_offsets(group_size))
2500 ALLOCATE (rank_sizes(group_size))
2503 IF (
SIZE(pairs) > 0)
THEN
2504 prev_k = pairs(1)%rank + 1
2505 DO i = 1,
SIZE(pairs)
2506 k = pairs(i)%rank + 1
2507 cpassert(k >= prev_k)
2508 IF (k > prev_k)
THEN
2509 rank_offsets(k) = pair_offsets(i)
2510 rank_sizes(prev_k) = rank_offsets(k) - rank_offsets(prev_k)
2514 rank_sizes(k) = buffer_size - rank_offsets(k)
2517 END SUBROUTINE rs_calc_offsets
2524 TYPE(dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2529 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_scatter_matrices'
2532 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: buffer_send
2534 CALL timeset(routinen, handle)
2535 ALLOCATE (buffer_send(task_list%buffer_size_send))
2538 cpassert(
ASSOCIATED(task_list%atom_pair_send))
2539 CALL rs_pack_buffer(src_matrices=src_matrices, &
2540 dest_buffer=buffer_send, &
2541 atom_pair=task_list%atom_pair_send, &
2542 pair_offsets=task_list%pair_offsets_send)
2545 CALL group%alltoall(buffer_send, task_list%rank_sizes_send, task_list%rank_offsets_send, &
2546 dest_buffer%host_buffer, &
2547 task_list%rank_sizes_recv, task_list%rank_offsets_recv)
2549 DEALLOCATE (buffer_send)
2550 CALL timestop(handle)
2560 TYPE(dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2564 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_gather_matrices'
2567 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: buffer_send
2569 CALL timeset(routinen, handle)
2572 ALLOCATE (buffer_send(task_list%buffer_size_send))
2575 CALL group%alltoall(src_buffer%host_buffer, task_list%rank_sizes_recv, task_list%rank_offsets_recv, &
2576 buffer_send, task_list%rank_sizes_send, task_list%rank_offsets_send)
2579 cpassert(
ASSOCIATED(task_list%atom_pair_send))
2580 CALL rs_unpack_buffer(src_buffer=buffer_send, &
2581 dest_matrices=dest_matrices, &
2582 atom_pair=task_list%atom_pair_send, &
2583 pair_offsets=task_list%pair_offsets_send)
2585 DEALLOCATE (buffer_send)
2586 CALL timestop(handle)
2595 TYPE(dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2599 CALL rs_pack_buffer(src_matrices=src_matrices, &
2600 dest_buffer=dest_buffer%host_buffer, &
2601 atom_pair=task_list%atom_pair_recv, &
2602 pair_offsets=task_list%pair_offsets_recv)
2612 TYPE(dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2615 CALL rs_unpack_buffer(src_buffer=src_buffer%host_buffer, &
2616 dest_matrices=dest_matrices, &
2617 atom_pair=task_list%atom_pair_recv, &
2618 pair_offsets=task_list%pair_offsets_recv)
2626 SUBROUTINE rs_pack_buffer(src_matrices, dest_buffer, atom_pair, pair_offsets)
2627 TYPE(dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2628 REAL(kind=
dp),
DIMENSION(:),
INTENT(INOUT) :: dest_buffer
2630 INTEGER,
DIMENSION(:),
INTENT(IN) :: pair_offsets
2632 INTEGER :: acol, arow, img, i, offset, block_size
2634 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
2640 DO i = 1,
SIZE(atom_pair)
2641 arow = atom_pair(i)%row
2642 acol = atom_pair(i)%col
2643 img = atom_pair(i)%image
2644 CALL dbcsr_get_block_p(matrix=src_matrices(img)%matrix, row=arow, col=acol, &
2645 block=block, found=found)
2647 block_size =
SIZE(block)
2648 offset = pair_offsets(i)
2649 dest_buffer(offset + 1:offset + block_size) = reshape(block, shape=(/block_size/))
2654 END SUBROUTINE rs_pack_buffer
2660 SUBROUTINE rs_unpack_buffer(src_buffer, dest_matrices, atom_pair, pair_offsets)
2661 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: src_buffer
2662 TYPE(dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2664 INTEGER,
DIMENSION(:),
INTENT(IN) :: pair_offsets
2666 INTEGER :: acol, arow, img, i, offset, &
2667 nrows, ncols, lock_num
2669 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
2670 INTEGER(kind=omp_lock_kind),
ALLOCATABLE,
DIMENSION(:) :: locks
2673 ALLOCATE (locks(10*omp_get_max_threads()))
2674 DO i = 1,
SIZE(locks)
2675 CALL omp_init_lock(locks(i))
2682 DO i = 1,
SIZE(atom_pair)
2683 arow = atom_pair(i)%row
2684 acol = atom_pair(i)%col
2685 img = atom_pair(i)%image
2686 CALL dbcsr_get_block_p(matrix=dest_matrices(img)%matrix, row=arow, col=acol, &
2687 block=block, found=found)
2689 nrows =
SIZE(block, 1)
2690 ncols =
SIZE(block, 2)
2691 offset = pair_offsets(i)
2692 lock_num =
modulo(arow,
SIZE(locks)) + 1
2694 CALL omp_set_lock(locks(lock_num))
2695 block = block + reshape(src_buffer(offset + 1:offset + nrows*ncols), shape=(/nrows, ncols/))
2696 CALL omp_unset_lock(locks(lock_num))
2702 DO i = 1,
SIZE(locks)
2703 CALL omp_destroy_lock(locks(i))
2707 END SUBROUTINE rs_unpack_buffer
2725 SUBROUTINE rs_find_node(rs_desc, igrid_level, n_levels, cube_center, ntasks, tasks, &
2726 lb_cube, ub_cube, added_tasks)
2729 INTEGER,
INTENT(IN) :: igrid_level, n_levels
2730 INTEGER,
DIMENSION(3),
INTENT(IN) :: cube_center
2731 INTEGER,
INTENT(INOUT) :: ntasks
2732 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
2733 INTEGER,
DIMENSION(3),
INTENT(IN) :: lb_cube, ub_cube
2734 INTEGER,
INTENT(OUT) :: added_tasks
2736 INTEGER,
PARAMETER :: add_tasks = 1000
2737 REAL(kind=
dp),
PARAMETER :: mult_tasks = 2.0_dp
2739 INTEGER :: bit_index, coord(3), curr_tasks, dest, i, icoord(3), idest, itask, ix, iy, iz, &
2740 lb_coord(3), lb_domain(3), lbc(3), ub_coord(3), ub_domain(3), ubc(3)
2741 INTEGER :: bit_pattern
2742 LOGICAL :: dir_periodic(3)
2744 coord(1) = rs_desc%x2coord(cube_center(1))
2745 coord(2) = rs_desc%y2coord(cube_center(2))
2746 coord(3) = rs_desc%z2coord(cube_center(3))
2747 dest = rs_desc%coord2rank(coord(1), coord(2), coord(3))
2750 lbc = lb_cube + cube_center
2751 ubc = ub_cube + cube_center
2753 IF (all((rs_desc%lb_global(:, dest) - rs_desc%border) .LE. lbc) .AND. &
2754 all((rs_desc%ub_global(:, dest) + rs_desc%border) .GE. ubc))
THEN
2756 tasks(ntasks)%destination = encode_rank(dest, igrid_level, n_levels)
2757 tasks(ntasks)%dist_type = 1
2758 tasks(ntasks)%subpatch_pattern = 0
2777 IF (rs_desc%perd(i) == 1)
THEN
2778 bit_pattern = ibclr(bit_pattern, bit_index)
2779 bit_index = bit_index + 1
2780 bit_pattern = ibclr(bit_pattern, bit_index)
2781 bit_index = bit_index + 1
2784 IF (ubc(i) <= rs_desc%lb_global(i, dest) - 1 + rs_desc%border)
THEN
2785 bit_pattern = ibset(bit_pattern, bit_index)
2786 bit_index = bit_index + 1
2788 bit_pattern = ibclr(bit_pattern, bit_index)
2789 bit_index = bit_index + 1
2792 IF (lbc(i) >= rs_desc%ub_global(i, dest) + 1 - rs_desc%border)
THEN
2793 bit_pattern = ibset(bit_pattern, bit_index)
2794 bit_index = bit_index + 1
2796 bit_pattern = ibclr(bit_pattern, bit_index)
2797 bit_index = bit_index + 1
2801 tasks(ntasks)%subpatch_pattern = bit_pattern
2811 lb_domain = rs_desc%lb_global(:, dest) - rs_desc%border
2812 ub_domain = rs_desc%ub_global(:, dest) + rs_desc%border
2815 IF (rs_desc%perd(i) == 0)
THEN
2818 IF (lb_domain(i) > lbc(i))
THEN
2819 lb_coord(i) = lb_coord(i) - 1
2820 icoord =
modulo(lb_coord, rs_desc%group_dim)
2821 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2822 lb_domain(i) = lb_domain(i) - (rs_desc%ub_global(i, idest) - rs_desc%lb_global(i, idest) + 1)
2829 IF (ub_domain(i) < ubc(i))
THEN
2830 ub_coord(i) = ub_coord(i) + 1
2831 icoord =
modulo(ub_coord, rs_desc%group_dim)
2832 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2833 ub_domain(i) = ub_domain(i) + (rs_desc%ub_global(i, idest) - rs_desc%lb_global(i, idest) + 1)
2843 IF (ub_domain(i) - lb_domain(i) + 1 >= rs_desc%npts(i))
THEN
2844 dir_periodic(i) = .true.
2846 ub_coord(i) = rs_desc%group_dim(i) - 1
2848 dir_periodic(i) = .false.
2852 added_tasks = product(ub_coord - lb_coord + 1)
2854 ntasks = ntasks + added_tasks - 1
2855 IF (ntasks >
SIZE(tasks))
THEN
2856 curr_tasks = int((
SIZE(tasks) + add_tasks)*mult_tasks)
2859 DO iz = lb_coord(3), ub_coord(3)
2860 DO iy = lb_coord(2), ub_coord(2)
2861 DO ix = lb_coord(1), ub_coord(1)
2862 icoord =
modulo((/ix, iy, iz/), rs_desc%group_dim)
2863 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2864 tasks(itask)%destination = encode_rank(idest, igrid_level, n_levels)
2865 tasks(itask)%dist_type = 2
2866 tasks(itask)%subpatch_pattern = 0
2869 IF (ix == lb_coord(1) .AND. .NOT. dir_periodic(1)) &
2870 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 0)
2871 IF (ix == ub_coord(1) .AND. .NOT. dir_periodic(1)) &
2872 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 1)
2873 IF (iy == lb_coord(2) .AND. .NOT. dir_periodic(2)) &
2874 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 2)
2875 IF (iy == ub_coord(2) .AND. .NOT. dir_periodic(2)) &
2876 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 3)
2877 IF (iz == lb_coord(3) .AND. .NOT. dir_periodic(3)) &
2878 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 4)
2879 IF (iz == ub_coord(3) .AND. .NOT. dir_periodic(3)) &
2880 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 5)
2887 END SUBROUTINE rs_find_node
2901 FUNCTION encode_rank(rank, grid_level, n_levels)
RESULT(encoded_int)
2903 INTEGER,
INTENT(IN) :: rank, grid_level, n_levels
2904 INTEGER :: encoded_int
2908 encoded_int = rank*n_levels + grid_level - 1
2918 FUNCTION decode_rank(encoded_int, n_levels)
RESULT(rank)
2920 INTEGER,
INTENT(IN) :: encoded_int
2921 INTEGER,
INTENT(IN) :: n_levels
2924 rank = int(encoded_int/n_levels)
2934 FUNCTION decode_level(encoded_int, n_levels)
RESULT(grid_level)
2936 INTEGER,
INTENT(IN) :: encoded_int
2937 INTEGER,
INTENT(IN) :: n_levels
2938 INTEGER :: grid_level
2940 grid_level = int(
modulo(encoded_int, n_levels)) + 1
2942 END FUNCTION decode_level
2958 PURE FUNCTION tasks_less_than(a, b)
RESULT(res)
2962 IF (a%grid_level /= b%grid_level)
THEN
2963 res = a%grid_level < b%grid_level
2965 ELSE IF (a%image /= b%image)
THEN
2966 res = a%image < b%image
2968 ELSE IF (a%iatom /= b%iatom)
THEN
2969 res = a%iatom < b%iatom
2971 ELSE IF (a%jatom /= b%jatom)
THEN
2972 res = a%jatom < b%jatom
2974 ELSE IF (a%iset /= b%iset)
THEN
2975 res = a%iset < b%iset
2977 ELSE IF (a%jset /= b%jset)
THEN
2978 res = a%jset < b%jset
2980 ELSE IF (a%ipgf /= b%ipgf)
THEN
2981 res = a%ipgf < b%ipgf
2984 res = a%jpgf < b%jpgf
2987 END FUNCTION tasks_less_than
3000 SUBROUTINE tasks_sort(arr, n, indices)
3001 INTEGER,
INTENT(IN) :: n
3002 TYPE(
task_type),
DIMENSION(1:n),
INTENT(INOUT) :: arr
3003 integer,
DIMENSION(1:n),
INTENT(INOUT) :: indices
3006 TYPE(
task_type),
ALLOCATABLE :: tmp_arr(:)
3007 INTEGER,
ALLOCATABLE :: tmp_idx(:)
3011 ALLOCATE (tmp_arr((n + 1)/2), tmp_idx((n + 1)/2))
3013 indices = (/(i, i=1, n)/)
3015 CALL tasks_sort_low(arr(1:n), indices, tmp_arr, tmp_idx)
3017 DEALLOCATE (tmp_arr, tmp_idx)
3018 ELSE IF (n > 0)
THEN
3022 END SUBROUTINE tasks_sort
3034 RECURSIVE SUBROUTINE tasks_sort_low(arr, indices, tmp_arr, tmp_idx)
3035 TYPE(
task_type),
DIMENSION(:),
INTENT(INOUT) :: arr
3036 INTEGER,
DIMENSION(size(arr)),
INTENT(INOUT) :: indices
3037 TYPE(
task_type),
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_arr
3038 INTEGER,
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_idx
3040 INTEGER :: t, m, i, j, k
3047 IF (
size(arr) <= 7)
THEN
3048 DO j =
size(arr) - 1, 1, -1
3051 IF (tasks_less_than(arr(i + 1), arr(i)))
THEN
3058 indices(i) = indices(i + 1)
3063 IF (.NOT. swapped)
EXIT
3069 m = (
size(arr) + 1)/2
3070 CALL tasks_sort_low(arr(1:m), indices(1:m), tmp_arr, tmp_idx)
3071 CALL tasks_sort_low(arr(m + 1:), indices(m + 1:), tmp_arr, tmp_idx)
3075 IF (tasks_less_than(arr(m + 1), arr(m)))
THEN
3078 tmp_arr(1:m) = arr(1:m)
3079 tmp_idx(1:m) = indices(1:m)
3084 DO WHILE (i <= m .and. j <=
size(arr) - m)
3085 IF (tasks_less_than(arr(m + j), tmp_arr(i)))
THEN
3087 indices(k) = indices(m + j)
3091 indices(k) = tmp_idx(i)
3101 indices(k) = tmp_idx(i)
3108 END SUBROUTINE tasks_sort_low
3118 PURE FUNCTION atom_pair_less_than(a, b)
RESULT(res)
3122 IF (a%rank /= b%rank)
THEN
3123 res = a%rank < b%rank
3125 ELSE IF (a%row /= b%row)
THEN
3128 ELSE IF (a%col /= b%col)
THEN
3132 res = a%image < b%image
3135 END FUNCTION atom_pair_less_than
3148 SUBROUTINE atom_pair_sort(arr, n, indices)
3149 INTEGER,
INTENT(IN) :: n
3151 integer,
DIMENSION(1:n),
INTENT(INOUT) :: indices
3155 INTEGER,
ALLOCATABLE :: tmp_idx(:)
3159 ALLOCATE (tmp_arr((n + 1)/2), tmp_idx((n + 1)/2))
3161 indices = (/(i, i=1, n)/)
3163 CALL atom_pair_sort_low(arr(1:n), indices, tmp_arr, tmp_idx)
3165 DEALLOCATE (tmp_arr, tmp_idx)
3166 ELSE IF (n > 0)
THEN
3170 END SUBROUTINE atom_pair_sort
3182 RECURSIVE SUBROUTINE atom_pair_sort_low(arr, indices, tmp_arr, tmp_idx)
3184 INTEGER,
DIMENSION(size(arr)),
INTENT(INOUT) :: indices
3185 TYPE(
atom_pair_type),
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_arr
3186 INTEGER,
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_idx
3188 INTEGER :: t, m, i, j, k
3195 IF (
size(arr) <= 7)
THEN
3196 DO j =
size(arr) - 1, 1, -1
3199 IF (atom_pair_less_than(arr(i + 1), arr(i)))
THEN
3206 indices(i) = indices(i + 1)
3211 IF (.NOT. swapped)
EXIT
3217 m = (
size(arr) + 1)/2
3218 CALL atom_pair_sort_low(arr(1:m), indices(1:m), tmp_arr, tmp_idx)
3219 CALL atom_pair_sort_low(arr(m + 1:), indices(m + 1:), tmp_arr, tmp_idx)
3223 IF (atom_pair_less_than(arr(m + 1), arr(m)))
THEN
3226 tmp_arr(1:m) = arr(1:m)
3227 tmp_idx(1:m) = indices(1:m)
3232 DO WHILE (i <= m .and. j <=
size(arr) - m)
3233 IF (atom_pair_less_than(arr(m + j), tmp_arr(i)))
THEN
3235 indices(k) = indices(m + j)
3239 indices(k) = tmp_idx(i)
3249 indices(k) = tmp_idx(i)
3256 END SUBROUTINE atom_pair_sort_low
void grid_create_basis_set(const int nset, const int nsgf, const int maxco, const int maxpgf, const int lmin[nset], const int lmax[nset], const int npgf[nset], const int nsgf_set[nset], const int first_sgf[nset], const double sphi[nsgf][maxco], const double zet[nset][maxpgf], grid_basis_set **basis_set_out)
Allocates a basis set which can be passed to grid_create_task_list. See grid_task_list....
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
All kind of helpful little routines.
real(kind=dp) function, public exp_radius_very_extended(la_min, la_max, lb_min, lb_max, pab, o1, o2, ra, rb, rp, zetp, eps, prefactor, cutoff, epsabs)
computes the radius of the Gaussian outside of which it is smaller than eps
subroutine, public get_gto_basis_set(gto_basis_set, name, aliases, norm_type, kind_radius, ncgf, nset, nsgf, cgf_symbol, sgf_symbol, norm_cgf, set_radius, lmax, lmin, lx, ly, lz, m, ncgf_set, npgf, nsgf_set, nshell, cphi, pgf_radius, sphi, scon, zet, first_cgf, first_sgf, l, last_cgf, last_sgf, n, gcc, maxco, maxl, maxpgf, maxsgf_set, maxshell, maxso, nco_sum, npgf_sum, nshell_sum, maxder, short_kind_radius)
...
Handles all functions related to the CELL.
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
for a given dr()/dh(r) this will provide the bounds to be used if one wants to go over a sphere-subre...
subroutine, public compute_cube_center(cube_center, rs_desc, zeta, zetb, ra, rab)
unifies the computation of the cube center, so that differences in implementation,...
subroutine, public return_cube(info, radius, lb_cube, ub_cube, sphere_bounds)
...
subroutine, public return_cube_nonortho(info, radius, lb, ub, rp)
...
integer function, public gaussian_gridlevel(gridlevel_info, exponent)
...
Fortran API for the grid package, which is written in C.
subroutine, public grid_create_task_list(ntasks, natoms, nkinds, nblocks, block_offsets, atom_positions, atom_kinds, basis_sets, level_list, iatom_list, jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list, block_num_list, radius_list, rab_list, rs_grids, task_list)
Allocates a task list which can be passed to grid_collocate_task_list.
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
integer, parameter, public default_string_length
Types and basic routines needed for a kpoint calculation.
subroutine, public get_kpoint_info(kpoint, kp_scheme, nkp_grid, kp_shift, symmetry, verbose, full_grid, use_real_wfn, eps_geo, parallel_group_size, kp_range, nkp, xkp, wkp, para_env, blacs_env_all, para_env_kp, para_env_inter_kp, blacs_env, kp_env, kp_aux_env, mpools, iogrp, nkp_groups, kp_dist, cell_to_index, index_to_cell, sab_nl, sab_nl_nosym)
Retrieve information from a kpoint environment.
An array-based list which grows on demand. When the internal array is full, a new array of twice the ...
Utility routines for the memory handling.
Interface to the message passing library MPI.
Fortran API for the offload package, which is written in C.
subroutine, public offload_create_buffer(length, buffer)
Allocates a buffer of given length, ie. number of elements.
Define methods related to particle_type.
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Define the data structure for the particle information.
container for various plainwaves related things
subroutine, public pw_env_get(pw_env, pw_pools, cube_info, gridlevel_info, auxbas_pw_pool, auxbas_grid, auxbas_rs_desc, auxbas_rs_grid, rs_descs, rs_grids, xc_pw_pool, vdw_pw_pool, poisson_env, interp_section)
returns the various attributes of the pw env
Define the quickstep kind type and their sub types.
subroutine, public get_qs_kind(qs_kind, basis_set, basis_type, ncgf, nsgf, all_potential, tnadd_potential, gth_potential, sgp_potential, upf_potential, se_parameter, dftb_parameter, xtb_parameter, dftb3_param, zeff, elec_conf, mao, lmax_dftb, alpha_core_charge, ccore_charge, core_charge, core_charge_radius, paw_proj_set, paw_atom, hard_radius, hard0_radius, max_rad_local, covalent_radius, vdw_radius, gpw_r3d_rs_type_forced, harmonics, max_iso_not0, max_s_harm, grid_atom, ngrid_ang, ngrid_rad, lmax_rho0, dft_plus_u_atom, l_of_dft_plus_u, n_of_dft_plus_u, u_minus_j, u_of_dft_plus_u, j_of_dft_plus_u, alpha_of_dft_plus_u, beta_of_dft_plus_u, j0_of_dft_plus_u, occupation_of_dft_plus_u, dispersion, bs_occupation, magnetization, no_optimize, addel, laddel, naddel, orbitals, max_scf, eps_scf, smear, u_ramping, u_minus_j_target, eps_u_ramping, init_u_ramping_each_scf, reltmat, ghost, floating, name, element_symbol, pao_basis_size, pao_potentials, pao_descriptors, nelec)
Get attributes of an atomic kind.
subroutine, public get_ks_env(ks_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, complex_ks, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, kinetic, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_ks_im_kp, rho, rho_xc, vppl, rho_core, rho_nlcc, rho_nlcc_g, vee, neighbor_list_id, sab_orb, sab_all, sac_ae, sac_ppl, sac_lri, sap_ppnl, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_nonbond, sab_vdw, sab_scp, sab_almo, sab_kp, sab_kp_nosym, task_list, task_list_soft, kpoints, do_kpoints, atomic_kind_set, qs_kind_set, cell, cell_ref, use_ref_cell, particle_set, energy, force, local_particles, local_molecules, molecule_kind_set, molecule_set, subsys, cp_subsys, virial, results, atprop, nkind, natom, dft_control, dbcsr_dist, distribution_2d, pw_env, para_env, blacs_env, nelectron_total, nelectron_spin)
...
Define the neighbor list data types and the corresponding functionality.
subroutine, public rs_grid_create(rs, desc)
...
pure integer function, public rs_grid_locate_rank(rs_desc, rank_in, shift)
returns the 1D rank of the task which is a cartesian shift away from 1D rank rank_in only possible if...
pure subroutine, public rs_grid_reorder_ranks(desc, real2virtual)
Defines a new ordering of ranks on this realspace grid, recalculating the data bounds and reallocatin...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
generate the tasks lists used by collocate and integrate routines
subroutine, public rs_copy_to_matrices(src_buffer, dest_matrices, task_list)
Copies from buffer into DBCSR matrics, replaces rs_gather_matrix for non-distributed grids.
subroutine, public generate_qs_task_list(ks_env, task_list, reorder_rs_grid_ranks, skip_load_balance_distributed, soft_valid, basis_type, pw_env_external, sab_orb_external)
...
subroutine, public rs_scatter_matrices(src_matrices, dest_buffer, task_list, group)
Scatters dbcsr matrix blocks and receives them into a buffer as needed before collocation.
subroutine, public rs_distribute_matrix(rs_descs, pmats, atom_pair_send, atom_pair_recv, nimages, scatter, hmats)
redistributes the matrix so that it can be used in realspace operations i.e. according to the task li...
subroutine, public distribute_tasks(rs_descs, ntasks, natoms, tasks, atom_pair_send, atom_pair_recv, symmetric, reorder_rs_grid_ranks, skip_load_balance_distributed)
Assembles tasks to be performed on local grid.
subroutine, public rs_gather_matrices(src_buffer, dest_matrices, task_list, group)
Gather the dbcsr matrix blocks and receives them into a buffer as needed after integration.
subroutine, public task_list_inner_loop(tasks, ntasks, curr_tasks, rs_descs, dft_control, cube_info, gridlevel_info, cindex, iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, set_radius_a, set_radius_b, ra, rab, la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
...
subroutine, public rs_copy_to_buffer(src_matrices, dest_buffer, task_list)
Copies the DBCSR blocks into buffer, replaces rs_scatter_matrix for non-distributed grids.
subroutine, public serialize_task(task, serialized_task)
Serialize a task into an integer array. Used for MPI communication.
subroutine, public deserialize_task(task, serialized_task)
De-serialize a task from an integer array. Used for MPI communication.
subroutine, public reallocate_tasks(tasks, new_size)
Grow an array of tasks while preserving the existing entries.
integer, parameter, public task_size_in_int8
All kind of helpful little routines.
Type defining parameters related to the simulation cell.
Contains information about kpoints.
contained for different pw related things
Provides all information about a quickstep kind.
calculation environment to calculate the ks matrix, holds all the needed vars. assumes that the core ...