28 USE cp_dbcsr_api,
ONLY: dbcsr_convert_sizes_to_offsets, &
72#include "./base/base_uses.f90"
77 LOGICAL,
PRIVATE,
PARAMETER :: debug_this_module = .false.
81 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'task_list_methods'
116 reorder_rs_grid_ranks, skip_load_balance_distributed, &
117 pw_env_external, sab_orb_external)
121 CHARACTER(LEN=*),
INTENT(IN) :: basis_type
122 LOGICAL,
INTENT(IN) :: reorder_rs_grid_ranks, &
123 skip_load_balance_distributed
124 TYPE(
pw_env_type),
OPTIONAL,
POINTER :: pw_env_external
126 OPTIONAL,
POINTER :: sab_orb_external
128 CHARACTER(LEN=*),
PARAMETER :: routinen =
'generate_qs_task_list'
129 INTEGER,
PARAMETER :: max_tasks = 2000
131 INTEGER :: cindex, curr_tasks, handle, i, iatom, iatom_old, igrid_level, igrid_level_old, &
132 ikind, ilevel, img, img_old, ipair, ipgf, iset, itask, jatom, jatom_old, jkind, jpgf, &
133 jset, maxpgf, maxset, natoms, nimages, nkind, nseta, nsetb, slot
134 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: blocks
135 INTEGER,
DIMENSION(3) :: cellind
136 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, npgfa, &
138 INTEGER,
DIMENSION(:, :, :),
POINTER :: cell_to_index
140 REAL(kind=
dp) :: kind_radius_a, kind_radius_b
141 REAL(kind=
dp),
DIMENSION(3) :: ra, rab
142 REAL(kind=
dp),
DIMENSION(:),
POINTER :: set_radius_a, set_radius_b
143 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: rpgfa, rpgfb, zeta, zetb
155 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
160 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
162 CALL timeset(routinen, handle)
165 qs_kind_set=qs_kind_set, &
167 particle_set=particle_set, &
168 dft_control=dft_control)
171 IF (
PRESENT(sab_orb_external)) sab_orb => sab_orb_external
174 IF (
PRESENT(pw_env_external)) pw_env => pw_env_external
175 CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_grids)
178 gridlevel_info => pw_env%gridlevel_info
179 cube_info => pw_env%cube_info
182 nkind =
SIZE(qs_kind_set)
183 natoms =
SIZE(particle_set)
187 qs_kind => qs_kind_set(ikind)
189 basis_set=orb_basis_set, basis_type=basis_type)
191 IF (.NOT.
ASSOCIATED(orb_basis_set)) cycle
194 maxset = max(nseta, maxset)
195 maxpgf = max(maxval(npgfa), maxpgf)
199 nimages = dft_control%nimages
200 IF (nimages > 1)
THEN
203 CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
207 NULLIFY (cell_to_index)
211 IF (
ASSOCIATED(task_list%atom_pair_send))
DEALLOCATE (task_list%atom_pair_send)
212 IF (
ASSOCIATED(task_list%atom_pair_recv))
DEALLOCATE (task_list%atom_pair_recv)
215 IF (.NOT.
ASSOCIATED(task_list%tasks))
THEN
219 curr_tasks =
SIZE(task_list%tasks)
221 ALLOCATE (basis_set_list(nkind))
223 qs_kind => qs_kind_set(ikind)
224 CALL get_qs_kind(qs_kind=qs_kind, basis_set=basis_set_a, &
225 basis_type=basis_type)
226 IF (
ASSOCIATED(basis_set_a))
THEN
227 basis_set_list(ikind)%gto_basis_set => basis_set_a
229 NULLIFY (basis_set_list(ikind)%gto_basis_set)
241 DO slot = 1, sab_orb(1)%nl_size
242 ikind = sab_orb(1)%nlist_task(slot)%ikind
243 jkind = sab_orb(1)%nlist_task(slot)%jkind
244 iatom = sab_orb(1)%nlist_task(slot)%iatom
245 jatom = sab_orb(1)%nlist_task(slot)%jatom
246 rab(1:3) = sab_orb(1)%nlist_task(slot)%r(1:3)
247 cellind(1:3) = sab_orb(1)%nlist_task(slot)%cell(1:3)
249 basis_set_a => basis_set_list(ikind)%gto_basis_set
250 IF (.NOT.
ASSOCIATED(basis_set_a)) cycle
251 basis_set_b => basis_set_list(jkind)%gto_basis_set
252 IF (.NOT.
ASSOCIATED(basis_set_b)) cycle
253 ra(:) =
pbc(particle_set(iatom)%r, cell)
255 la_max => basis_set_a%lmax
256 la_min => basis_set_a%lmin
257 npgfa => basis_set_a%npgf
258 nseta = basis_set_a%nset
259 rpgfa => basis_set_a%pgf_radius
260 set_radius_a => basis_set_a%set_radius
261 kind_radius_a = basis_set_a%kind_radius
262 zeta => basis_set_a%zet
264 lb_max => basis_set_b%lmax
265 lb_min => basis_set_b%lmin
266 npgfb => basis_set_b%npgf
267 nsetb = basis_set_b%nset
268 rpgfb => basis_set_b%pgf_radius
269 set_radius_b => basis_set_b%set_radius
270 kind_radius_b = basis_set_b%kind_radius
271 zetb => basis_set_b%zet
274 cindex = cell_to_index(cellind(1), cellind(2), cellind(3))
280 rs_descs, dft_control, cube_info, gridlevel_info, cindex, &
281 iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, &
282 set_radius_a, set_radius_b, ra, rab, &
283 la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
290 rs_descs=rs_descs, ntasks=task_list%ntasks, natoms=natoms, &
291 tasks=task_list%tasks, atom_pair_send=task_list%atom_pair_send, &
292 atom_pair_recv=task_list%atom_pair_recv, symmetric=.true., &
293 reorder_rs_grid_ranks=reorder_rs_grid_ranks, &
294 skip_load_balance_distributed=skip_load_balance_distributed)
297 ALLOCATE (nsgf(natoms))
298 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_list, nsgf=nsgf)
299 IF (
ASSOCIATED(task_list%atom_pair_send))
THEN
301 CALL rs_calc_offsets(pairs=task_list%atom_pair_send, &
303 group_size=rs_descs(1)%rs_desc%group_size, &
304 pair_offsets=task_list%pair_offsets_send, &
305 rank_offsets=task_list%rank_offsets_send, &
306 rank_sizes=task_list%rank_sizes_send, &
307 buffer_size=task_list%buffer_size_send)
309 CALL rs_calc_offsets(pairs=task_list%atom_pair_recv, &
311 group_size=rs_descs(1)%rs_desc%group_size, &
312 pair_offsets=task_list%pair_offsets_recv, &
313 rank_offsets=task_list%rank_offsets_recv, &
314 rank_sizes=task_list%rank_sizes_recv, &
315 buffer_size=task_list%buffer_size_recv)
316 DEALLOCATE (basis_set_list, nsgf)
319 IF (reorder_rs_grid_ranks)
THEN
320 DO i = 1, gridlevel_info%ngrid_levels
321 IF (rs_descs(i)%rs_desc%distributed)
THEN
328 CALL create_grid_task_list(task_list=task_list, &
329 qs_kind_set=qs_kind_set, &
330 particle_set=particle_set, &
332 basis_type=basis_type, &
338 IF (
ASSOCIATED(task_list%taskstart))
THEN
339 DEALLOCATE (task_list%taskstart)
341 IF (
ASSOCIATED(task_list%taskstop))
THEN
342 DEALLOCATE (task_list%taskstop)
344 IF (
ASSOCIATED(task_list%npairs))
THEN
345 DEALLOCATE (task_list%npairs)
350 ALLOCATE (task_list%npairs(
SIZE(rs_descs)))
352 iatom_old = -1; jatom_old = -1; igrid_level_old = -1; img_old = -1
356 DO i = 1, task_list%ntasks
357 igrid_level = task_list%tasks(i)%grid_level
358 img = task_list%tasks(i)%image
359 iatom = task_list%tasks(i)%iatom
360 jatom = task_list%tasks(i)%jatom
361 iset = task_list%tasks(i)%iset
362 jset = task_list%tasks(i)%jset
363 ipgf = task_list%tasks(i)%ipgf
364 jpgf = task_list%tasks(i)%jpgf
365 IF (igrid_level /= igrid_level_old)
THEN
366 IF (igrid_level_old /= -1)
THEN
367 task_list%npairs(igrid_level_old) = ipair
370 igrid_level_old = igrid_level
374 ELSE IF (iatom /= iatom_old .OR. jatom /= jatom_old .OR. img /= img_old)
THEN
382 IF (task_list%ntasks /= 0)
THEN
383 task_list%npairs(igrid_level) = ipair
390 ALLOCATE (task_list%taskstart(maxval(task_list%npairs),
SIZE(rs_descs)))
391 ALLOCATE (task_list%taskstop(maxval(task_list%npairs),
SIZE(rs_descs)))
393 iatom_old = -1; jatom_old = -1; igrid_level_old = -1; img_old = -1
395 task_list%taskstart = 0
396 task_list%taskstop = 0
398 DO i = 1, task_list%ntasks
399 igrid_level = task_list%tasks(i)%grid_level
400 img = task_list%tasks(i)%image
401 iatom = task_list%tasks(i)%iatom
402 jatom = task_list%tasks(i)%jatom
403 iset = task_list%tasks(i)%iset
404 jset = task_list%tasks(i)%jset
405 ipgf = task_list%tasks(i)%ipgf
406 jpgf = task_list%tasks(i)%jpgf
407 IF (igrid_level /= igrid_level_old)
THEN
408 IF (igrid_level_old /= -1)
THEN
409 task_list%taskstop(ipair, igrid_level_old) = i - 1
412 task_list%taskstart(ipair, igrid_level) = i
413 igrid_level_old = igrid_level
417 ELSE IF (iatom /= iatom_old .OR. jatom /= jatom_old .OR. img /= img_old)
THEN
419 task_list%taskstart(ipair, igrid_level) = i
420 task_list%taskstop(ipair - 1, igrid_level) = i - 1
427 IF (task_list%ntasks /= 0)
THEN
428 task_list%taskstop(ipair, igrid_level) = task_list%ntasks
432 IF (debug_this_module)
THEN
433 tasks => task_list%tasks
435 WRITE (6, *)
"Total number of tasks ", task_list%ntasks
436 DO igrid_level = 1, gridlevel_info%ngrid_levels
437 WRITE (6, *)
"Total number of pairs(grid_level) ", &
438 igrid_level, task_list%npairs(igrid_level)
442 DO igrid_level = 1, gridlevel_info%ngrid_levels
444 ALLOCATE (blocks(natoms, natoms, nimages))
446 DO ipair = 1, task_list%npairs(igrid_level)
447 itask = task_list%taskstart(ipair, igrid_level)
448 ilevel = task_list%tasks(itask)%grid_level
449 img = task_list%tasks(itask)%image
450 iatom = task_list%tasks(itask)%iatom
451 jatom = task_list%tasks(itask)%jatom
452 iset = task_list%tasks(itask)%iset
453 jset = task_list%tasks(itask)%jset
454 ipgf = task_list%tasks(itask)%ipgf
455 jpgf = task_list%tasks(itask)%jpgf
456 IF (blocks(iatom, jatom, img) == -1 .AND. blocks(jatom, iatom, img) == -1)
THEN
457 blocks(iatom, jatom, img) = 1
458 blocks(jatom, iatom, img) = 1
460 WRITE (6, *)
"TASK LIST CONFLICT IN PAIR ", ipair
461 WRITE (6, *)
"Reuse of iatom, jatom, image ", iatom, jatom, img
467 DO itask = task_list%taskstart(ipair, igrid_level), task_list%taskstop(ipair, igrid_level)
468 ilevel = task_list%tasks(itask)%grid_level
469 img = task_list%tasks(itask)%image
470 iatom = task_list%tasks(itask)%iatom
471 jatom = task_list%tasks(itask)%jatom
472 iset = task_list%tasks(itask)%iset
473 jset = task_list%tasks(itask)%jset
474 ipgf = task_list%tasks(itask)%ipgf
475 jpgf = task_list%tasks(itask)%jpgf
476 IF (iatom /= iatom_old .OR. jatom /= jatom_old .OR. img /= img_old)
THEN
477 WRITE (6, *)
"TASK LIST CONFLICT IN TASK ", itask
478 WRITE (6, *)
"Inconsistent iatom, jatom, image ", iatom, jatom, img
479 WRITE (6, *)
"Should be iatom, jatom, image ", iatom_old, jatom_old, img_old
490 CALL timestop(handle)
498 SUBROUTINE create_grid_task_list(task_list, qs_kind_set, particle_set, cell, basis_type, rs_grids)
500 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
503 CHARACTER(LEN=*) :: basis_type
507 INTEGER :: nset, natoms, nkinds, ntasks, &
508 ikind, iatom, itask, nsgf
509 INTEGER,
DIMENSION(:),
ALLOCATABLE :: atom_kinds, level_list, iatom_list, jatom_list, &
510 iset_list, jset_list, ipgf_list, jpgf_list, &
511 border_mask_list, block_num_list
512 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: radius_list
513 REAL(kind=
dp),
DIMENSION(:, :),
ALLOCATABLE :: rab_list, atom_positions
514 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
515 INTEGER,
DIMENSION(:, :),
POINTER :: first_sgf
516 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: sphi, zet
517 INTEGER,
DIMENSION(:),
POINTER :: lmax, lmin, npgf, nsgf_set
519 nkinds =
SIZE(qs_kind_set)
520 natoms =
SIZE(particle_set)
521 ntasks = task_list%ntasks
522 tasks => task_list%tasks
524 IF (.NOT.
ASSOCIATED(task_list%grid_basis_sets))
THEN
526 ALLOCATE (task_list%grid_basis_sets(nkinds))
528 CALL get_qs_kind(qs_kind_set(ikind), basis_type=basis_type, basis_set=orb_basis_set)
534 first_sgf=first_sgf, &
541 maxco=
SIZE(sphi, 1), &
542 maxpgf=
SIZE(zet, 1), &
547 first_sgf=first_sgf, &
550 basis_set=task_list%grid_basis_sets(ikind))
555 ALLOCATE (atom_kinds(natoms), atom_positions(3, natoms))
557 atom_kinds(iatom) = particle_set(iatom)%atomic_kind%kind_number
558 atom_positions(:, iatom) =
pbc(particle_set(iatom)%r, cell)
561 ALLOCATE (level_list(ntasks), iatom_list(ntasks), jatom_list(ntasks))
562 ALLOCATE (iset_list(ntasks), jset_list(ntasks), ipgf_list(ntasks), jpgf_list(ntasks))
563 ALLOCATE (border_mask_list(ntasks), block_num_list(ntasks))
564 ALLOCATE (radius_list(ntasks), rab_list(3, ntasks))
567 level_list(itask) = tasks(itask)%grid_level
568 iatom_list(itask) = tasks(itask)%iatom
569 jatom_list(itask) = tasks(itask)%jatom
570 iset_list(itask) = tasks(itask)%iset
571 jset_list(itask) = tasks(itask)%jset
572 ipgf_list(itask) = tasks(itask)%ipgf
573 jpgf_list(itask) = tasks(itask)%jpgf
574 IF (tasks(itask)%dist_type == 2)
THEN
575 border_mask_list(itask) = iand(63, not(tasks(itask)%subpatch_pattern))
577 border_mask_list(itask) = 0
579 block_num_list(itask) = tasks(itask)%pair_index
580 radius_list(itask) = tasks(itask)%radius
581 rab_list(:, itask) = tasks(itask)%rab(:)
587 nblocks=
SIZE(task_list%pair_offsets_recv), &
588 block_offsets=task_list%pair_offsets_recv, &
589 atom_positions=atom_positions, &
590 atom_kinds=atom_kinds, &
591 basis_sets=task_list%grid_basis_sets, &
592 level_list=level_list, &
593 iatom_list=iatom_list, &
594 jatom_list=jatom_list, &
595 iset_list=iset_list, &
596 jset_list=jset_list, &
597 ipgf_list=ipgf_list, &
598 jpgf_list=jpgf_list, &
599 border_mask_list=border_mask_list, &
600 block_num_list=block_num_list, &
601 radius_list=radius_list, &
604 task_list=task_list%grid_task_list)
609 END SUBROUTINE create_grid_task_list
644 cube_info, gridlevel_info, cindex, &
645 iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, set_radius_a, set_radius_b, ra, rab, &
646 la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
648 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
649 INTEGER :: ntasks, curr_tasks
655 INTEGER :: cindex, iatom, jatom
656 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: rpgfa, rpgfb, zeta, zetb
657 REAL(kind=
dp) :: kind_radius_b
658 REAL(kind=
dp),
DIMENSION(:),
POINTER :: set_radius_a, set_radius_b
659 REAL(kind=
dp),
DIMENSION(3) :: ra, rab
660 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, npgfa, &
662 INTEGER :: nseta, nsetb
664 INTEGER :: cube_center(3), igrid_level, ipgf, iset, &
665 jpgf, jset, lb_cube(3), ub_cube(3)
666 REAL(kind=
dp) :: dab, rab2, radius, zetp
668 rab2 = rab(1)*rab(1) + rab(2)*rab(2) + rab(3)*rab(3)
671 loop_iset:
DO iset = 1, nseta
673 IF (set_radius_a(iset) + kind_radius_b < dab) cycle loop_iset
675 loop_jset:
DO jset = 1, nsetb
677 IF (set_radius_a(iset) + set_radius_b(jset) < dab) cycle loop_jset
679 loop_ipgf:
DO ipgf = 1, npgfa(iset)
681 IF (rpgfa(ipgf, iset) + set_radius_b(jset) < dab) cycle loop_ipgf
683 loop_jpgf:
DO jpgf = 1, npgfb(jset)
685 IF (rpgfa(ipgf, iset) + rpgfb(jpgf, jset) < dab) cycle loop_jpgf
687 zetp = zeta(ipgf, iset) + zetb(jpgf, jset)
690 CALL compute_pgf_properties(cube_center, lb_cube, ub_cube, radius, &
691 rs_descs(igrid_level)%rs_desc, cube_info(igrid_level), &
692 la_max(iset), zeta(ipgf, iset), la_min(iset), &
693 lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
694 ra, rab, rab2, dft_control%qs_control%eps_rho_rspace)
696 CALL pgf_to_tasks(tasks, ntasks, curr_tasks, &
697 rab, cindex, iatom, jatom, iset, jset, ipgf, jpgf, &
698 la_max(iset), lb_max(jset), rs_descs(igrid_level)%rs_desc, &
699 igrid_level, gridlevel_info%ngrid_levels, cube_center, &
700 lb_cube, ub_cube, radius)
746 SUBROUTINE compute_pgf_properties(cube_center, lb_cube, ub_cube, radius, &
747 rs_desc, cube_info, la_max, zeta, la_min, lb_max, zetb, lb_min, ra, rab, rab2, eps)
749 INTEGER,
DIMENSION(3),
INTENT(OUT) :: cube_center, lb_cube, ub_cube
750 REAL(kind=
dp),
INTENT(OUT) :: radius
753 INTEGER,
INTENT(IN) :: la_max
754 REAL(kind=
dp),
INTENT(IN) :: zeta
755 INTEGER,
INTENT(IN) :: la_min, lb_max
756 REAL(kind=
dp),
INTENT(IN) :: zetb
757 INTEGER,
INTENT(IN) :: lb_min
758 REAL(kind=
dp),
INTENT(IN) :: ra(3), rab(3), rab2, eps
761 INTEGER,
DIMENSION(:),
POINTER :: sphere_bounds
762 REAL(kind=
dp) :: cutoff, f, prefactor, rb(3), zetp
763 REAL(kind=
dp),
DIMENSION(3) :: rp
768 rp(:) = ra(:) + zetb/zetp*rab(:)
769 rb(:) = ra(:) + rab(:)
772 prefactor = exp(-zeta*f*rab2)
774 zetp=zetp, eps=eps, prefactor=prefactor, cutoff=cutoff)
778 cube_center(:) =
modulo(cube_center(:), rs_desc%npts(:))
779 cube_center(:) = cube_center(:) + rs_desc%lb(:)
781 IF (rs_desc%orthorhombic)
THEN
782 CALL return_cube(cube_info, radius, lb_cube, ub_cube, sphere_bounds)
786 extent(:) = ub_cube(:) - lb_cube(:)
787 lb_cube(:) = -extent(:)/2 - 1
788 ub_cube(:) = extent(:)/2
791 END SUBROUTINE compute_pgf_properties
807 INTEGER FUNCTION cost_model(lb_cube, ub_cube, fraction, lmax, is_ortho)
808 INTEGER,
DIMENSION(3),
INTENT(IN) :: lb_cube, ub_cube
809 REAL(kind=
dp),
INTENT(IN) :: fraction
814 REAL(kind=
dp) :: v1, v2, v3, v4, v5
816 cmax = maxval(((ub_cube - lb_cube) + 1)/2)
831 cost_model = ceiling(((lmax + v1)*(cmax + v2)**3*v3*fraction + v4 + v5*lmax**7)/1000.0_dp)
833 END FUNCTION cost_model
867 SUBROUTINE pgf_to_tasks(tasks, ntasks, curr_tasks, &
868 rab, cindex, iatom, jatom, iset, jset, ipgf, jpgf, &
869 la_max, lb_max, rs_desc, igrid_level, n_levels, &
870 cube_center, lb_cube, ub_cube, radius)
872 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
873 INTEGER,
INTENT(INOUT) :: ntasks, curr_tasks
874 REAL(kind=
dp),
DIMENSION(3),
INTENT(IN) :: rab
875 INTEGER,
INTENT(IN) :: cindex, iatom, jatom, iset, jset, ipgf, &
878 INTEGER,
INTENT(IN) :: igrid_level, n_levels
879 INTEGER,
DIMENSION(3),
INTENT(IN) :: cube_center, lb_cube, ub_cube
880 REAL(kind=
dp),
INTENT(IN) :: radius
882 INTEGER,
PARAMETER :: add_tasks = 1000
883 REAL(kind=
dp),
PARAMETER :: mult_tasks = 2.0_dp
885 INTEGER :: added_tasks, cost, j, lmax
887 REAL(kind=
dp) :: tfraction
891 IF (ntasks > curr_tasks)
THEN
892 curr_tasks = int((curr_tasks + add_tasks)*mult_tasks)
897 IF (rs_desc%distributed)
THEN
901 CALL rs_find_node(rs_desc, igrid_level, n_levels, cube_center, &
902 ntasks=ntasks, tasks=tasks, lb_cube=lb_cube, ub_cube=ub_cube, added_tasks=added_tasks)
905 tasks(ntasks)%destination = encode_rank(rs_desc%my_pos, igrid_level, n_levels)
906 tasks(ntasks)%dist_type = 0
907 tasks(ntasks)%subpatch_pattern = 0
911 lmax = la_max + lb_max
912 is_ortho = (tasks(ntasks)%dist_type == 0 .OR. tasks(ntasks)%dist_type == 1) .AND. rs_desc%orthorhombic
915 tfraction = 1.0_dp/added_tasks
917 cost = cost_model(lb_cube, ub_cube, tfraction, lmax, is_ortho)
919 DO j = 1, added_tasks
920 tasks(ntasks - added_tasks + j)%source = encode_rank(rs_desc%my_pos, igrid_level, n_levels)
921 tasks(ntasks - added_tasks + j)%cost = cost
922 tasks(ntasks - added_tasks + j)%grid_level = igrid_level
923 tasks(ntasks - added_tasks + j)%image = cindex
924 tasks(ntasks - added_tasks + j)%iatom = iatom
925 tasks(ntasks - added_tasks + j)%jatom = jatom
926 tasks(ntasks - added_tasks + j)%iset = iset
927 tasks(ntasks - added_tasks + j)%jset = jset
928 tasks(ntasks - added_tasks + j)%ipgf = ipgf
929 tasks(ntasks - added_tasks + j)%jpgf = jpgf
930 tasks(ntasks - added_tasks + j)%rab = rab
931 tasks(ntasks - added_tasks + j)%radius = radius
934 END SUBROUTINE pgf_to_tasks
946 SUBROUTINE load_balance_distributed(tasks, ntasks, rs_descs, grid_level, natoms)
948 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
952 INTEGER :: grid_level, natoms
954 CHARACTER(LEN=*),
PARAMETER :: routinen =
'load_balance_distributed'
957 INTEGER,
DIMENSION(:, :, :),
POINTER ::
list
959 CALL timeset(routinen, handle)
964 CALL create_destination_list(
list, rs_descs, grid_level)
967 CALL compute_load_list(
list, rs_descs, grid_level, tasks, ntasks, natoms, create_list=.true.)
970 CALL optimize_load_list(
list, rs_descs(1)%rs_desc%group, rs_descs(1)%rs_desc%my_pos)
973 CALL compute_load_list(
list, rs_descs, grid_level, tasks, ntasks, natoms, create_list=.false.)
977 CALL timestop(handle)
979 END SUBROUTINE load_balance_distributed
988 SUBROUTINE balance_global_list(list_global)
989 INTEGER,
DIMENSION(:, :, 0:) :: list_global
991 CHARACTER(LEN=*),
PARAMETER :: routinen =
'balance_global_list'
992 INTEGER,
PARAMETER :: max_iter = 100
993 REAL(kind=
dp),
PARAMETER :: tolerance_factor = 0.005_dp
995 INTEGER :: dest, handle, icpu, idest, iflux, &
996 ilocal, k, maxdest, ncpu, nflux
997 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: flux_connections
998 LOGICAL :: solution_optimal
999 REAL(kind=
dp) :: average, load_shift, max_load_shift, &
1001 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: load, optimized_flux, optimized_load
1002 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: flux_limits
1004 CALL timeset(routinen, handle)
1006 ncpu =
SIZE(list_global, 3)
1007 maxdest =
SIZE(list_global, 2)
1008 ALLOCATE (load(0:ncpu - 1))
1010 ALLOCATE (optimized_load(0:ncpu - 1))
1015 DO icpu = 0, ncpu - 1
1016 DO idest = 1, maxdest
1017 dest = list_global(1, idest, icpu)
1018 IF (dest < ncpu .AND. dest > icpu) nflux = nflux + 1
1021 ALLOCATE (optimized_flux(nflux))
1022 ALLOCATE (flux_limits(2, nflux))
1023 ALLOCATE (flux_connections(2, nflux))
1028 DO icpu = 0, ncpu - 1
1029 load(icpu) = sum(list_global(2, :, icpu))
1030 DO idest = 1, maxdest
1031 dest = list_global(1, idest, icpu)
1032 IF (dest < ncpu)
THEN
1033 IF (dest /= icpu)
THEN
1034 IF (dest > icpu)
THEN
1036 flux_limits(2, nflux) = list_global(2, idest, icpu)
1037 flux_connections(1, nflux) = icpu
1038 flux_connections(2, nflux) = dest
1041 IF (flux_connections(1, iflux) == dest .AND. flux_connections(2, iflux) == icpu)
THEN
1042 flux_limits(1, iflux) = -list_global(2, idest, icpu)
1052 solution_optimal = .false.
1053 optimized_flux = 0.0_dp
1060 average = sum(load)/
SIZE(load)
1061 tolerance = tolerance_factor*average
1063 optimized_load(:) = load
1065 max_load_shift = 0.0_dp
1067 load_shift = (optimized_load(flux_connections(1, iflux)) - optimized_load(flux_connections(2, iflux)))/2
1068 load_shift = max(flux_limits(1, iflux) - optimized_flux(iflux), load_shift)
1069 load_shift = min(flux_limits(2, iflux) - optimized_flux(iflux), load_shift)
1070 max_load_shift = max(abs(load_shift), max_load_shift)
1071 optimized_load(flux_connections(1, iflux)) = optimized_load(flux_connections(1, iflux)) - load_shift
1072 optimized_load(flux_connections(2, iflux)) = optimized_load(flux_connections(2, iflux)) + load_shift
1073 optimized_flux(iflux) = optimized_flux(iflux) + load_shift
1075 IF (max_load_shift < tolerance)
THEN
1076 solution_optimal = .true.
1084 DO icpu = 0, ncpu - 1
1085 DO idest = 1, maxdest
1086 IF (list_global(1, idest, icpu) == icpu) ilocal = idest
1088 DO idest = 1, maxdest
1089 dest = list_global(1, idest, icpu)
1090 IF (dest < ncpu)
THEN
1091 IF (dest /= icpu)
THEN
1092 IF (dest > icpu)
THEN
1094 IF (optimized_flux(nflux) > 0)
THEN
1095 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1096 list_global(2, idest, icpu) - nint(optimized_flux(nflux))
1097 list_global(2, idest, icpu) = nint(optimized_flux(nflux))
1099 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1100 list_global(2, idest, icpu)
1101 list_global(2, idest, icpu) = 0
1105 IF (flux_connections(1, iflux) == dest .AND. flux_connections(2, iflux) == icpu)
THEN
1106 IF (optimized_flux(iflux) > 0)
THEN
1107 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1108 list_global(2, idest, icpu)
1109 list_global(2, idest, icpu) = 0
1111 list_global(2, ilocal, icpu) = list_global(2, ilocal, icpu) + &
1112 list_global(2, idest, icpu) + nint(optimized_flux(iflux))
1113 list_global(2, idest, icpu) = -nint(optimized_flux(iflux))
1124 CALL timestop(handle)
1126 END SUBROUTINE balance_global_list
1139 SUBROUTINE optimize_load_list(list, group, my_pos)
1140 INTEGER,
DIMENSION(:, :, 0:) ::
list
1142 INTEGER,
INTENT(IN) :: my_pos
1144 CHARACTER(LEN=*),
PARAMETER :: routinen =
'optimize_load_list'
1145 INTEGER,
PARAMETER :: rank_of_root = 0
1147 INTEGER :: handle, icpu, idest, maxdest, ncpu
1148 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: load_all
1149 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: load_partial
1150 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: list_global
1152 CALL timeset(routinen, handle)
1154 ncpu =
SIZE(
list, 3)
1155 maxdest =
SIZE(
list, 2)
1158 ALLOCATE (load_all(maxdest*ncpu))
1159 load_all(:) = reshape(
list(2, :, :), [maxdest*ncpu])
1160 CALL group%sum(load_all(:), rank_of_root)
1163 ALLOCATE (list_global(2, maxdest, ncpu))
1164 IF (rank_of_root == my_pos)
THEN
1165 list_global(1, :, :) =
list(1, :, :)
1166 list_global(2, :, :) = reshape(load_all, [maxdest, ncpu])
1167 CALL balance_global_list(list_global)
1169 CALL group%bcast(list_global, rank_of_root)
1172 ALLOCATE (load_partial(maxdest, ncpu))
1174 CALL group%sum_partial(reshape(load_all, [maxdest, ncpu]), load_partial(:, :))
1177 DO idest = 1, maxdest
1180 IF (load_partial(idest, icpu) > list_global(2, idest, icpu))
THEN
1181 IF (load_partial(idest, icpu) -
list(2, idest, icpu - 1) < list_global(2, idest, icpu))
THEN
1182 list(2, idest, icpu - 1) = list_global(2, idest, icpu) &
1183 - (load_partial(idest, icpu) -
list(2, idest, icpu - 1))
1185 list(2, idest, icpu - 1) = 0
1193 DEALLOCATE (load_all)
1194 DEALLOCATE (list_global)
1195 DEALLOCATE (load_partial)
1197 CALL timestop(handle)
1198 END SUBROUTINE optimize_load_list
1217 SUBROUTINE compute_load_list(list, rs_descs, grid_level, tasks, ntasks, natoms, create_list)
1218 INTEGER,
DIMENSION(:, :, 0:) ::
list
1221 INTEGER :: grid_level
1222 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1223 INTEGER :: ntasks, natoms
1224 LOGICAL :: create_list
1226 CHARACTER(LEN=*),
PARAMETER :: routinen =
'compute_load_list'
1228 INTEGER :: cost, dest, handle, i, iatom, ilevel, img, img_old, iopt, ipgf, iset, itask, &
1229 itask_start, itask_stop, jatom, jpgf, jset, li, maxdest, ncpu, ndest_pair, nopt, nshort, &
1231 INTEGER(KIND=int_8) :: bit_pattern, ipair, ipair_old, natom8
1232 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: loads
1233 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: all_dests, index
1234 INTEGER,
DIMENSION(6) :: options
1236 CALL timeset(routinen, handle)
1238 ALLOCATE (loads(0:rs_descs(grid_level)%rs_desc%group_size - 1))
1239 CALL get_current_loads(loads, rs_descs, grid_level, ntasks, tasks, use_reordered_ranks=.false.)
1241 maxdest =
SIZE(
list, 2)
1242 ncpu =
SIZE(
list, 3)
1247 ipair_old = huge(ipair_old)
1249 ALLOCATE (all_dests(0))
1255 itask_start = itask_stop + 1
1256 itask_stop = itask_start
1257 IF (itask_stop > ntasks)
EXIT
1258 ilevel = tasks(itask_stop)%grid_level
1259 img_old = tasks(itask_stop)%image
1260 iatom = tasks(itask_stop)%iatom
1261 jatom = tasks(itask_stop)%jatom
1262 iset = tasks(itask_stop)%iset
1263 jset = tasks(itask_stop)%jset
1264 ipgf = tasks(itask_stop)%ipgf
1265 jpgf = tasks(itask_stop)%jpgf
1267 ipair_old = (iatom - 1)*natom8 + (jatom - 1)
1269 IF (itask_stop + 1 > ntasks)
EXIT
1270 ilevel = tasks(itask_stop + 1)%grid_level
1271 img = tasks(itask_stop + 1)%image
1272 iatom = tasks(itask_stop + 1)%iatom
1273 jatom = tasks(itask_stop + 1)%jatom
1274 iset = tasks(itask_stop + 1)%iset
1275 jset = tasks(itask_stop + 1)%jset
1276 ipgf = tasks(itask_stop + 1)%ipgf
1277 jpgf = tasks(itask_stop + 1)%jpgf
1279 ipair = (iatom - 1)*natom8 + (jatom - 1)
1280 IF (ipair == ipair_old .AND. img == img_old)
THEN
1281 itask_stop = itask_stop + 1
1287 nshort = itask_stop - itask_start + 1
1290 DEALLOCATE (all_dests)
1291 ALLOCATE (all_dests(nshort))
1293 ALLOCATE (index(nshort))
1295 ilevel = tasks(itask_start + i - 1)%grid_level
1296 img = tasks(itask_start + i - 1)%image
1297 iatom = tasks(itask_start + i - 1)%iatom
1298 jatom = tasks(itask_start + i - 1)%jatom
1299 iset = tasks(itask_start + i - 1)%iset
1300 jset = tasks(itask_start + i - 1)%jset
1301 ipgf = tasks(itask_start + i - 1)%ipgf
1302 jpgf = tasks(itask_start + i - 1)%jpgf
1304 IF (ilevel == grid_level)
THEN
1305 all_dests(i) = decode_rank(tasks(itask_start + i - 1)%destination,
SIZE(rs_descs))
1307 all_dests(i) = huge(all_dests(i))
1310 CALL sort(all_dests, nshort, index)
1313 IF ((all_dests(ndest_pair) /= all_dests(i)) .AND. (all_dests(i) /= huge(all_dests(i))))
THEN
1314 ndest_pair = ndest_pair + 1
1315 all_dests(ndest_pair) = all_dests(i)
1319 DO itask = itask_start, itask_stop
1321 dest = decode_rank(tasks(itask)%destination,
SIZE(rs_descs))
1322 ilevel = tasks(itask)%grid_level
1323 img = tasks(itask)%image
1324 iatom = tasks(itask)%iatom
1325 jatom = tasks(itask)%jatom
1326 iset = tasks(itask)%iset
1327 jset = tasks(itask)%jset
1328 ipgf = tasks(itask)%ipgf
1329 jpgf = tasks(itask)%jpgf
1332 IF (ilevel /= grid_level) cycle
1333 ipair = (iatom - 1)*natom8 + (jatom - 1)
1334 cost = int(tasks(itask)%cost)
1336 SELECT CASE (tasks(itask)%dist_type)
1338 bit_pattern = tasks(itask)%subpatch_pattern
1340 IF (btest(bit_pattern, 0))
THEN
1342 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1344 options(nopt) = rank
1347 IF (btest(bit_pattern, 1))
THEN
1349 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1351 options(nopt) = rank
1354 IF (btest(bit_pattern, 2))
THEN
1356 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1358 options(nopt) = rank
1361 IF (btest(bit_pattern, 3))
THEN
1363 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1365 options(nopt) = rank
1368 IF (btest(bit_pattern, 4))
THEN
1370 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1372 options(nopt) = rank
1375 IF (btest(bit_pattern, 5))
THEN
1377 IF (any(all_dests(1:ndest_pair) == rank))
THEN
1379 options(nopt) = rank
1386 IF (loads(rank) > loads(options(iopt))) rank = options(iopt)
1391 li = list_index(
list, rank, dest)
1392 IF (create_list)
THEN
1393 list(2, li, dest) =
list(2, li, dest) + cost
1395 IF (
list(1, li, dest) == dest)
THEN
1396 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1398 IF (
list(2, li, dest) >= cost)
THEN
1399 list(2, li, dest) =
list(2, li, dest) - cost
1400 tasks(itask)%destination = encode_rank(
list(1, li, dest), ilevel,
SIZE(rs_descs))
1402 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1407 li = list_index(
list, dest, dest)
1408 IF (create_list)
THEN
1409 list(2, li, dest) =
list(2, li, dest) + cost
1411 IF (
list(1, li, dest) == dest)
THEN
1412 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1414 IF (
list(2, li, dest) >= cost)
THEN
1415 list(2, li, dest) =
list(2, li, dest) - cost
1416 tasks(itask)%destination = encode_rank(
list(1, li, dest), ilevel,
SIZE(rs_descs))
1418 tasks(itask)%destination = encode_rank(dest, ilevel,
SIZE(rs_descs))
1430 CALL timestop(handle)
1432 END SUBROUTINE compute_load_list
1443 INTEGER FUNCTION list_index(list, rank, dest)
1444 INTEGER,
DIMENSION(:, :, 0:),
INTENT(IN) ::
list
1445 INTEGER,
INTENT(IN) :: rank, dest
1449 IF (
list(1, list_index, dest) == rank)
EXIT
1450 list_index = list_index + 1
1452 END FUNCTION list_index
1463 SUBROUTINE create_destination_list(list, rs_descs, grid_level)
1464 INTEGER,
DIMENSION(:, :, :),
POINTER ::
list
1467 INTEGER,
INTENT(IN) :: grid_level
1469 CHARACTER(LEN=*),
PARAMETER :: routinen =
'create_destination_list'
1471 INTEGER :: handle, i, icpu, j, maxcount, ncpu, &
1473 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: index, sublist
1475 CALL timeset(routinen, handle)
1477 cpassert(.NOT.
ASSOCIATED(
list))
1478 ncpu = rs_descs(grid_level)%rs_desc%group_size
1481 ALLOCATE (
list(2, ultimate_max, 0:ncpu - 1))
1483 ALLOCATE (index(ultimate_max))
1484 ALLOCATE (sublist(ultimate_max))
1485 sublist = huge(sublist)
1488 DO icpu = 0, ncpu - 1
1497 CALL sort(sublist, ultimate_max, index)
1500 IF (sublist(i) /= sublist(j))
THEN
1502 sublist(j) = sublist(i)
1505 maxcount = max(maxcount, j)
1506 sublist(j + 1:ultimate_max) = huge(sublist)
1507 list(1, :, icpu) = sublist
1508 list(2, :, icpu) = 0
1513 CALL timestop(handle)
1515 END SUBROUTINE create_destination_list
1531 SUBROUTINE get_current_loads(loads, rs_descs, grid_level, ntasks, tasks, use_reordered_ranks)
1532 INTEGER(KIND=int_8),
DIMENSION(:) :: loads
1535 INTEGER :: grid_level, ntasks
1536 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1537 LOGICAL,
INTENT(IN) :: use_reordered_ranks
1539 CHARACTER(LEN=*),
PARAMETER :: routinen =
'get_current_loads'
1541 INTEGER :: handle, i, iatom, ilevel, img, ipgf, &
1542 iset, jatom, jpgf, jset
1543 INTEGER(KIND=int_8) :: total_cost_local
1544 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: recv_buf_i, send_buf_i
1547 CALL timeset(routinen, handle)
1549 desc => rs_descs(grid_level)%rs_desc
1552 ALLOCATE (send_buf_i(desc%group_size))
1553 ALLOCATE (recv_buf_i(desc%group_size))
1561 ilevel = tasks(i)%grid_level
1562 img = tasks(i)%image
1563 iatom = tasks(i)%iatom
1564 jatom = tasks(i)%jatom
1565 iset = tasks(i)%iset
1566 jset = tasks(i)%jset
1567 ipgf = tasks(i)%ipgf
1568 jpgf = tasks(i)%jpgf
1569 IF (ilevel /= grid_level) cycle
1570 IF (use_reordered_ranks)
THEN
1571 send_buf_i(rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs))) + 1) = &
1572 send_buf_i(rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs))) + 1) &
1575 send_buf_i(decode_rank(tasks(i)%destination,
SIZE(rs_descs)) + 1) = &
1576 send_buf_i(decode_rank(tasks(i)%destination,
SIZE(rs_descs)) + 1) &
1580 CALL desc%group%alltoall(send_buf_i, recv_buf_i, 1)
1583 total_cost_local = sum(recv_buf_i)
1586 CALL desc%group%allgather(total_cost_local, loads)
1588 CALL timestop(handle)
1590 END SUBROUTINE get_current_loads
1602 SUBROUTINE load_balance_replicated(rs_descs, ntasks, tasks)
1607 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1609 CHARACTER(LEN=*),
PARAMETER :: routinen =
'load_balance_replicated'
1611 INTEGER :: handle, i, iatom, ilevel, img, ipgf, &
1612 iset, j, jatom, jpgf, jset, &
1613 no_overloaded, no_underloaded, &
1615 INTEGER(KIND=int_8) :: average_cost, cost_task_rep, count, &
1616 offset, total_cost_global
1617 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: load_imbalance, loads, recv_buf_i
1618 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: index
1621 CALL timeset(routinen, handle)
1623 desc => rs_descs(1)%rs_desc
1626 ALLOCATE (recv_buf_i(desc%group_size))
1627 ALLOCATE (loads(desc%group_size))
1630 DO i = 1,
SIZE(rs_descs)
1631 CALL get_current_loads(loads, rs_descs, i, ntasks, tasks, use_reordered_ranks=.true.)
1632 recv_buf_i(:) = recv_buf_i + loads
1635 total_cost_global = sum(recv_buf_i)
1636 average_cost = total_cost_global/desc%group_size
1644 ALLOCATE (load_imbalance(desc%group_size))
1645 ALLOCATE (index(desc%group_size))
1647 load_imbalance(:) = recv_buf_i - average_cost
1651 DO i = 1, desc%group_size
1652 IF (load_imbalance(i) > 0) no_overloaded = no_overloaded + 1
1653 IF (load_imbalance(i) < 0) no_underloaded = no_underloaded + 1
1658 CALL sort(recv_buf_i,
SIZE(recv_buf_i), index)
1664 IF (tasks(i)%dist_type == 0 &
1665 .AND. decode_rank(tasks(i)%destination,
SIZE(rs_descs)) == decode_rank(tasks(i)%source,
SIZE(rs_descs)))
THEN
1666 cost_task_rep = cost_task_rep + tasks(i)%cost
1672 CALL desc%group%allgather(cost_task_rep, recv_buf_i)
1674 DO i = 1, desc%group_size
1676 IF (load_imbalance(i) > 0) &
1677 load_imbalance(i) = min(load_imbalance(i), recv_buf_i(i))
1686 IF (load_imbalance(desc%my_pos + 1) > 0)
THEN
1692 DO i = desc%group_size, desc%group_size - no_overloaded + 1, -1
1693 IF (index(i) == desc%my_pos + 1)
THEN
1696 offset = offset + load_imbalance(index(i))
1701 proc_receiving = huge(proc_receiving)
1702 DO i = 1, no_underloaded
1703 offset = offset + load_imbalance(index(i))
1704 IF (offset <= 0)
THEN
1714 IF (tasks(j)%dist_type == 0 &
1715 .AND. decode_rank(tasks(j)%destination,
SIZE(rs_descs)) == decode_rank(tasks(j)%source,
SIZE(rs_descs)))
THEN
1718 IF (proc_receiving > no_underloaded)
EXIT
1720 ilevel = tasks(j)%grid_level
1721 img = tasks(j)%image
1722 iatom = tasks(j)%iatom
1723 jatom = tasks(j)%jatom
1724 iset = tasks(j)%iset
1725 jset = tasks(j)%jset
1726 ipgf = tasks(j)%ipgf
1727 jpgf = tasks(j)%jpgf
1728 tasks(j)%destination = encode_rank(index(proc_receiving) - 1, ilevel,
SIZE(rs_descs))
1729 offset = offset + tasks(j)%cost
1730 count = count + tasks(j)%cost
1731 IF (count >= load_imbalance(desc%my_pos + 1))
EXIT
1732 IF (offset > 0)
THEN
1733 proc_receiving = proc_receiving + 1
1736 IF (proc_receiving > no_underloaded)
EXIT
1737 offset = load_imbalance(index(proc_receiving))
1744 DEALLOCATE (load_imbalance)
1746 CALL timestop(handle)
1748 END SUBROUTINE load_balance_replicated
1762 SUBROUTINE create_local_tasks(rs_descs, ntasks, tasks, ntasks_recv, tasks_recv)
1767 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1768 INTEGER :: ntasks_recv
1769 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks_recv
1771 CHARACTER(LEN=*),
PARAMETER :: routinen =
'create_local_tasks'
1773 INTEGER :: handle, i, j, k, l, rank
1774 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: recv_buf, send_buf
1775 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: recv_disps, recv_sizes, send_disps, &
1779 CALL timeset(routinen, handle)
1781 desc => rs_descs(1)%rs_desc
1784 ALLOCATE (send_sizes(desc%group_size))
1785 ALLOCATE (recv_sizes(desc%group_size))
1786 ALLOCATE (send_disps(desc%group_size))
1787 ALLOCATE (recv_disps(desc%group_size))
1788 ALLOCATE (send_buf(desc%group_size))
1789 ALLOCATE (recv_buf(desc%group_size))
1794 rank = rs_descs(decode_level(tasks(i)%destination,
SIZE(rs_descs))) &
1795 %rs_desc%virtual2real(decode_rank(tasks(i)%destination,
SIZE(rs_descs)))
1796 send_buf(rank + 1) = send_buf(rank + 1) + 1
1799 CALL desc%group%alltoall(send_buf, recv_buf, 1)
1810 DO i = 2, desc%group_size
1813 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
1814 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
1818 DEALLOCATE (send_buf)
1819 DEALLOCATE (recv_buf)
1822 ALLOCATE (send_buf(sum(send_sizes)))
1823 ALLOCATE (recv_buf(sum(recv_sizes)))
1829 i = rs_descs(decode_level(tasks(j)%destination,
SIZE(rs_descs))) &
1830 %rs_desc%virtual2real(decode_rank(tasks(j)%destination,
SIZE(rs_descs))) + 1
1831 l = send_disps(i) + send_sizes(i)
1837 CALL desc%group%alltoall(send_buf, send_sizes, send_disps, recv_buf, recv_sizes, recv_disps)
1839 DEALLOCATE (send_buf)
1842 ALLOCATE (tasks_recv(ntasks_recv))
1846 DO i = 1, desc%group_size
1854 DEALLOCATE (recv_buf)
1855 DEALLOCATE (send_sizes)
1856 DEALLOCATE (recv_sizes)
1857 DEALLOCATE (send_disps)
1858 DEALLOCATE (recv_disps)
1860 CALL timestop(handle)
1862 END SUBROUTINE create_local_tasks
1882 tasks, atom_pair_send, atom_pair_recv, &
1883 symmetric, reorder_rs_grid_ranks, skip_load_balance_distributed)
1887 INTEGER :: ntasks, natoms
1888 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
1889 TYPE(
atom_pair_type),
DIMENSION(:),
POINTER :: atom_pair_send, atom_pair_recv
1890 LOGICAL,
INTENT(IN) :: symmetric, reorder_rs_grid_ranks, &
1891 skip_load_balance_distributed
1893 CHARACTER(LEN=*),
PARAMETER :: routinen =
'distribute_tasks'
1895 INTEGER :: handle, igrid_level, irank, ntasks_recv
1896 INTEGER(KIND=int_8) :: load_gap, max_load, replicated_load
1897 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: total_loads, total_loads_tmp, trial_loads
1898 INTEGER(KIND=int_8),
DIMENSION(:, :),
POINTER :: loads
1899 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: indices, real2virtual, total_index
1900 LOGICAL :: distributed_grids, fixed_first_grid
1902 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks_recv
1904 CALL timeset(routinen, handle)
1906 cpassert(
ASSOCIATED(tasks))
1909 distributed_grids = .false.
1910 DO igrid_level = 1,
SIZE(rs_descs)
1911 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1912 distributed_grids = .true.
1915 desc => rs_descs(1)%rs_desc
1917 IF (distributed_grids)
THEN
1919 ALLOCATE (loads(0:desc%group_size - 1,
SIZE(rs_descs)))
1920 ALLOCATE (total_loads(0:desc%group_size - 1))
1926 DO igrid_level = 1,
SIZE(rs_descs)
1927 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1929 IF (.NOT. skip_load_balance_distributed) &
1930 CALL load_balance_distributed(tasks, ntasks, rs_descs, igrid_level, natoms)
1932 CALL get_current_loads(loads(:, igrid_level), rs_descs, igrid_level, ntasks, &
1933 tasks, use_reordered_ranks=.false.)
1935 total_loads(:) = total_loads + loads(:, igrid_level)
1944 DO igrid_level = 1,
SIZE(rs_descs)
1945 IF (.NOT. rs_descs(igrid_level)%rs_desc%distributed)
THEN
1946 CALL get_current_loads(loads(:, igrid_level), rs_descs, igrid_level, ntasks, &
1947 tasks, use_reordered_ranks=.false.)
1948 replicated_load = replicated_load + sum(loads(:, igrid_level))
1958 IF (reorder_rs_grid_ranks)
THEN
1959 fixed_first_grid = .false.
1960 DO igrid_level = 1,
SIZE(rs_descs)
1961 IF (rs_descs(igrid_level)%rs_desc%distributed)
THEN
1962 IF (fixed_first_grid .EQV. .false.)
THEN
1963 total_loads(:) = loads(:, igrid_level)
1964 fixed_first_grid = .true.
1966 ALLOCATE (trial_loads(0:desc%group_size - 1))
1968 trial_loads(:) = total_loads + loads(:, igrid_level)
1969 max_load = maxval(trial_loads)
1971 DO irank = 0, desc%group_size - 1
1972 load_gap = load_gap + max_load - trial_loads(irank)
1977 IF (load_gap > replicated_load*1.05_dp)
THEN
1979 ALLOCATE (indices(0:desc%group_size - 1))
1980 ALLOCATE (total_index(0:desc%group_size - 1))
1981 ALLOCATE (total_loads_tmp(0:desc%group_size - 1))
1982 ALLOCATE (real2virtual(0:desc%group_size - 1))
1984 total_loads_tmp(:) = total_loads
1985 CALL sort(total_loads_tmp, desc%group_size, total_index)
1986 CALL sort(loads(:, igrid_level), desc%group_size, indices)
1990 DO irank = 0, desc%group_size - 1
1991 total_loads(total_index(irank) - 1) = total_loads(total_index(irank) - 1) + &
1992 loads(desc%group_size - irank - 1, igrid_level)
1993 real2virtual(total_index(irank) - 1) = indices(desc%group_size - irank - 1) - 1
1998 DEALLOCATE (indices)
1999 DEALLOCATE (total_index)
2000 DEALLOCATE (total_loads_tmp)
2001 DEALLOCATE (real2virtual)
2003 total_loads(:) = trial_loads
2006 DEALLOCATE (trial_loads)
2014 CALL load_balance_replicated(rs_descs, ntasks, tasks)
2032 CALL create_local_tasks(rs_descs, ntasks, tasks, ntasks_recv, tasks_recv)
2038 CALL get_atom_pair(atom_pair_send, tasks, ntasks=ntasks, send=.true., symmetric=symmetric, rs_descs=rs_descs)
2047 CALL get_atom_pair(atom_pair_recv, tasks_recv, ntasks=ntasks_recv, send=.false., symmetric=symmetric, rs_descs=rs_descs)
2052 DEALLOCATE (total_loads)
2056 ntasks_recv = ntasks
2057 CALL get_atom_pair(atom_pair_recv, tasks_recv, ntasks=ntasks_recv, send=.false., symmetric=symmetric, rs_descs=rs_descs)
2062 ALLOCATE (indices(ntasks_recv))
2063 CALL tasks_sort(tasks_recv, ntasks_recv, indices)
2064 DEALLOCATE (indices)
2071 ntasks = ntasks_recv
2073 CALL timestop(handle)
2087 SUBROUTINE get_atom_pair(atom_pair, tasks, ntasks, send, symmetric, rs_descs)
2090 TYPE(
task_type),
DIMENSION(:),
INTENT(INOUT) :: tasks
2091 INTEGER,
INTENT(IN) :: ntasks
2092 LOGICAL,
INTENT(IN) :: send, symmetric
2095 INTEGER :: i, ilevel, iatom, jatom, npairs, virt_rank
2096 INTEGER,
DIMENSION(:),
ALLOCATABLE :: indices
2099 cpassert(.NOT.
ASSOCIATED(atom_pair))
2100 IF (ntasks == 0)
THEN
2101 ALLOCATE (atom_pair(0))
2107 ALLOCATE (atom_pair_tmp(ntasks))
2109 atom_pair_tmp(i)%image = tasks(i)%image
2110 iatom = tasks(i)%iatom
2111 jatom = tasks(i)%jatom
2112 IF (symmetric .AND. iatom > jatom)
THEN
2114 atom_pair_tmp(i)%row = jatom
2115 atom_pair_tmp(i)%col = iatom
2117 atom_pair_tmp(i)%row = iatom
2118 atom_pair_tmp(i)%col = jatom
2124 ilevel = tasks(i)%grid_level
2125 virt_rank = decode_rank(tasks(i)%destination,
SIZE(rs_descs))
2126 atom_pair_tmp(i)%rank = rs_descs(ilevel)%rs_desc%virtual2real(virt_rank)
2130 atom_pair_tmp(i)%rank = decode_rank(tasks(i)%source,
SIZE(rs_descs))
2135 ALLOCATE (indices(ntasks))
2136 CALL atom_pair_sort(atom_pair_tmp, ntasks, indices)
2138 tasks(indices(1))%pair_index = 1
2140 IF (atom_pair_less_than(atom_pair_tmp(i - 1), atom_pair_tmp(i)))
THEN
2142 atom_pair_tmp(npairs) = atom_pair_tmp(i)
2144 tasks(indices(i))%pair_index = npairs
2146 DEALLOCATE (indices)
2149 ALLOCATE (atom_pair(npairs))
2150 atom_pair(:) = atom_pair_tmp(:npairs)
2151 DEALLOCATE (atom_pair_tmp)
2153 END SUBROUTINE get_atom_pair
2169 nimages, scatter, hmats)
2174 TYPE(
atom_pair_type),
DIMENSION(:),
POINTER :: atom_pair_send, atom_pair_recv
2180 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_distribute_matrix'
2182 INTEGER :: acol, arow, handle, i, img, j, k, l, me, &
2183 nblkcols_total, nblkrows_total, ncol, &
2184 nrow, nthread, nthread_left
2185 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: first_col, first_row, last_col, last_row, recv_disps, &
2186 recv_pair_count, recv_pair_disps, recv_sizes, send_disps, send_pair_count, &
2187 send_pair_disps, send_sizes
2188 INTEGER,
DIMENSION(:),
POINTER :: col_blk_size, row_blk_size
2190 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: recv_buf_r, send_buf_r
2191 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: h_block, p_block
2194 REAL(kind=
dp),
DIMENSION(:),
POINTER :: vector
2198 CALL timeset(routinen, handle)
2200 IF (.NOT. scatter)
THEN
2201 cpassert(
PRESENT(hmats))
2204 desc => rs_descs(1)%rs_desc
2205 me = desc%my_pos + 1
2208 ALLOCATE (send_sizes(desc%group_size))
2209 ALLOCATE (recv_sizes(desc%group_size))
2210 ALLOCATE (send_disps(desc%group_size))
2211 ALLOCATE (recv_disps(desc%group_size))
2212 ALLOCATE (send_pair_count(desc%group_size))
2213 ALLOCATE (recv_pair_count(desc%group_size))
2214 ALLOCATE (send_pair_disps(desc%group_size))
2215 ALLOCATE (recv_pair_disps(desc%group_size))
2217 pmat => pmats(1)%matrix
2219 row_blk_size=row_blk_size, &
2220 col_blk_size=col_blk_size, &
2221 nblkrows_total=nblkrows_total, &
2222 nblkcols_total=nblkcols_total)
2223 ALLOCATE (first_row(nblkrows_total), last_row(nblkrows_total), &
2224 first_col(nblkcols_total), last_col(nblkcols_total))
2225 CALL dbcsr_convert_sizes_to_offsets(row_blk_size, first_row, last_row)
2226 CALL dbcsr_convert_sizes_to_offsets(col_blk_size, first_col, last_col)
2231 DO i = 1,
SIZE(atom_pair_send)
2232 k = atom_pair_send(i)%rank + 1
2233 arow = atom_pair_send(i)%row
2234 acol = atom_pair_send(i)%col
2235 nrow = last_row(arow) - first_row(arow) + 1
2236 ncol = last_col(acol) - first_col(acol) + 1
2237 send_sizes(k) = send_sizes(k) + nrow*ncol
2238 send_pair_count(k) = send_pair_count(k) + 1
2243 DO i = 2, desc%group_size
2244 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1)
2245 send_pair_disps(i) = send_pair_disps(i - 1) + send_pair_count(i - 1)
2248 ALLOCATE (send_buf_r(sum(send_sizes)))
2254 DO i = 1,
SIZE(atom_pair_recv)
2255 k = atom_pair_recv(i)%rank + 1
2256 arow = atom_pair_recv(i)%row
2257 acol = atom_pair_recv(i)%col
2258 nrow = last_row(arow) - first_row(arow) + 1
2259 ncol = last_col(acol) - first_col(acol) + 1
2260 recv_sizes(k) = recv_sizes(k) + nrow*ncol
2261 recv_pair_count(k) = recv_pair_count(k) + 1
2266 DO i = 2, desc%group_size
2267 recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1)
2268 recv_pair_disps(i) = recv_pair_disps(i - 1) + recv_pair_count(i - 1)
2270 ALLOCATE (recv_buf_r(sum(recv_sizes)))
2289 DO l = 1, desc%group_size
2292 DO i = 1, send_pair_count(l)
2293 arow = atom_pair_send(send_pair_disps(l) + i)%row
2294 acol = atom_pair_send(send_pair_disps(l) + i)%col
2295 img = atom_pair_send(send_pair_disps(l) + i)%image
2296 nrow = last_row(arow) - first_row(arow) + 1
2297 ncol = last_col(acol) - first_col(acol) + 1
2298 pmat => pmats(img)%matrix
2299 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2304 send_buf_r(send_disps(l) + send_sizes(l) + j + (k - 1)*nrow) = p_block(j, k)
2307 send_sizes(l) = send_sizes(l) + nrow*ncol
2312 IF (.NOT. scatter)
THEN
2327 CALL desc%group%alltoall(send_buf_r, send_sizes, send_disps, &
2328 recv_buf_r, recv_sizes, recv_disps)
2333 IF (.NOT. scatter)
THEN
2337 DO i = 1, send_pair_count(me)
2338 arow = atom_pair_send(send_pair_disps(me) + i)%row
2339 acol = atom_pair_send(send_pair_disps(me) + i)%col
2340 img = atom_pair_send(send_pair_disps(me) + i)%image
2341 nrow = last_row(arow) - first_row(arow) + 1
2342 ncol = last_col(acol) - first_col(acol) + 1
2343 hmat => hmats(img)%matrix
2344 pmat => pmats(img)%matrix
2345 CALL dbcsr_get_block_p(matrix=hmat, row=arow, col=acol, block=h_block, found=found)
2347 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2353 h_block(j, k) = h_block(j, k) + p_block(j, k)
2362 pmat => pmats(img)%matrix
2364 nblks_guess=
SIZE(atom_pair_recv)/nthread, sizedata_guess=
SIZE(recv_buf_r)/nthread, &
2374 DO l = 1, desc%group_size
2377 DO i = 1, recv_pair_count(l)
2378 arow = atom_pair_recv(recv_pair_disps(l) + i)%row
2379 acol = atom_pair_recv(recv_pair_disps(l) + i)%col
2380 img = atom_pair_recv(recv_pair_disps(l) + i)%image
2381 nrow = last_row(arow) - first_row(arow) + 1
2382 ncol = last_col(acol) - first_col(acol) + 1
2383 pmat => pmats(img)%matrix
2385 CALL dbcsr_get_block_p(matrix=pmat, row=arow, col=acol, block=p_block, found=found)
2387 IF (
PRESENT(hmats))
THEN
2388 hmat => hmats(img)%matrix
2389 CALL dbcsr_get_block_p(matrix=hmat, row=arow, col=acol, block=h_block, found=found)
2393 IF (scatter .AND. .NOT.
ASSOCIATED(p_block))
THEN
2394 vector => recv_buf_r(recv_disps(l) + recv_sizes(l) + 1:recv_disps(l) + recv_sizes(l) + nrow*ncol)
2395 CALL dbcsr_put_block(pmat, arow, acol, block=reshape(vector, [nrow, ncol]))
2397 IF (.NOT. scatter)
THEN
2401 h_block(j, k) = h_block(j, k) + recv_buf_r(recv_disps(l) + recv_sizes(l) + j + (k - 1)*nrow)
2406 recv_sizes(l) = recv_sizes(l) + nrow*ncol
2428 pmat => pmats(img)%matrix
2434 DEALLOCATE (send_buf_r)
2435 DEALLOCATE (recv_buf_r)
2437 DEALLOCATE (send_sizes)
2438 DEALLOCATE (recv_sizes)
2439 DEALLOCATE (send_disps)
2440 DEALLOCATE (recv_disps)
2441 DEALLOCATE (send_pair_count)
2442 DEALLOCATE (recv_pair_count)
2443 DEALLOCATE (send_pair_disps)
2444 DEALLOCATE (recv_pair_disps)
2446 DEALLOCATE (first_row, last_row, first_col, last_col)
2448 CALL timestop(handle)
2456 SUBROUTINE rs_calc_offsets(pairs, nsgf, group_size, &
2457 pair_offsets, rank_offsets, rank_sizes, buffer_size)
2459 INTEGER,
DIMENSION(:),
INTENT(IN) :: nsgf
2460 INTEGER,
INTENT(IN) :: group_size
2461 INTEGER,
DIMENSION(:),
POINTER :: pair_offsets, rank_offsets, rank_sizes
2462 INTEGER,
INTENT(INOUT) :: buffer_size
2464 INTEGER :: acol, arow, i, block_size, total_size, k, prev_k
2466 IF (
ASSOCIATED(pair_offsets))
DEALLOCATE (pair_offsets)
2467 IF (
ASSOCIATED(rank_offsets))
DEALLOCATE (rank_offsets)
2468 IF (
ASSOCIATED(rank_sizes))
DEALLOCATE (rank_sizes)
2471 ALLOCATE (pair_offsets(
SIZE(pairs)))
2473 DO i = 1,
SIZE(pairs)
2474 pair_offsets(i) = total_size
2477 block_size = nsgf(arow)*nsgf(acol)
2478 total_size = total_size + block_size
2480 buffer_size = total_size
2483 ALLOCATE (rank_offsets(group_size))
2484 ALLOCATE (rank_sizes(group_size))
2487 IF (
SIZE(pairs) > 0)
THEN
2488 prev_k = pairs(1)%rank + 1
2489 DO i = 1,
SIZE(pairs)
2490 k = pairs(i)%rank + 1
2491 cpassert(k >= prev_k)
2492 IF (k > prev_k)
THEN
2493 rank_offsets(k) = pair_offsets(i)
2494 rank_sizes(prev_k) = rank_offsets(k) - rank_offsets(prev_k)
2498 rank_sizes(k) = buffer_size - rank_offsets(k)
2501 END SUBROUTINE rs_calc_offsets
2508 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2513 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_scatter_matrices'
2516 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: buffer_send
2518 CALL timeset(routinen, handle)
2519 ALLOCATE (buffer_send(task_list%buffer_size_send))
2522 cpassert(
ASSOCIATED(task_list%atom_pair_send))
2523 CALL rs_pack_buffer(src_matrices=src_matrices, &
2524 dest_buffer=buffer_send, &
2525 atom_pair=task_list%atom_pair_send, &
2526 pair_offsets=task_list%pair_offsets_send)
2529 CALL group%alltoall(buffer_send, task_list%rank_sizes_send, task_list%rank_offsets_send, &
2530 dest_buffer%host_buffer, &
2531 task_list%rank_sizes_recv, task_list%rank_offsets_recv)
2533 DEALLOCATE (buffer_send)
2534 CALL timestop(handle)
2544 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2548 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_gather_matrices'
2551 REAL(kind=
dp),
DIMENSION(:),
ALLOCATABLE :: buffer_send
2553 CALL timeset(routinen, handle)
2556 ALLOCATE (buffer_send(task_list%buffer_size_send))
2559 CALL group%alltoall(src_buffer%host_buffer, task_list%rank_sizes_recv, task_list%rank_offsets_recv, &
2560 buffer_send, task_list%rank_sizes_send, task_list%rank_offsets_send)
2563 cpassert(
ASSOCIATED(task_list%atom_pair_send))
2564 CALL rs_unpack_buffer(src_buffer=buffer_send, &
2565 dest_matrices=dest_matrices, &
2566 atom_pair=task_list%atom_pair_send, &
2567 pair_offsets=task_list%pair_offsets_send)
2569 DEALLOCATE (buffer_send)
2570 CALL timestop(handle)
2579 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2583 CALL rs_pack_buffer(src_matrices=src_matrices, &
2584 dest_buffer=dest_buffer%host_buffer, &
2585 atom_pair=task_list%atom_pair_recv, &
2586 pair_offsets=task_list%pair_offsets_recv)
2596 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2599 CALL rs_unpack_buffer(src_buffer=src_buffer%host_buffer, &
2600 dest_matrices=dest_matrices, &
2601 atom_pair=task_list%atom_pair_recv, &
2602 pair_offsets=task_list%pair_offsets_recv)
2610 SUBROUTINE rs_pack_buffer(src_matrices, dest_buffer, atom_pair, pair_offsets)
2611 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: src_matrices
2612 REAL(kind=
dp),
DIMENSION(:),
INTENT(INOUT) :: dest_buffer
2614 INTEGER,
DIMENSION(:),
INTENT(IN) :: pair_offsets
2616 INTEGER :: acol, arow, img, i, offset, block_size
2618 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
2624 DO i = 1,
SIZE(atom_pair)
2625 arow = atom_pair(i)%row
2626 acol = atom_pair(i)%col
2627 img = atom_pair(i)%image
2629 block=block, found=found)
2631 block_size =
SIZE(block)
2632 offset = pair_offsets(i)
2633 dest_buffer(offset + 1:offset + block_size) = reshape(block, shape=[block_size])
2638 END SUBROUTINE rs_pack_buffer
2644 SUBROUTINE rs_unpack_buffer(src_buffer, dest_matrices, atom_pair, pair_offsets)
2645 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: src_buffer
2646 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dest_matrices
2648 INTEGER,
DIMENSION(:),
INTENT(IN) :: pair_offsets
2650 INTEGER :: acol, arow, img, i, offset, &
2651 nrows, ncols, lock_num
2653 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
2654 INTEGER(kind=omp_lock_kind),
ALLOCATABLE,
DIMENSION(:) :: locks
2657 ALLOCATE (locks(10*omp_get_max_threads()))
2658 DO i = 1,
SIZE(locks)
2659 CALL omp_init_lock(locks(i))
2666 DO i = 1,
SIZE(atom_pair)
2667 arow = atom_pair(i)%row
2668 acol = atom_pair(i)%col
2669 img = atom_pair(i)%image
2671 block=block, found=found)
2673 nrows =
SIZE(block, 1)
2674 ncols =
SIZE(block, 2)
2675 offset = pair_offsets(i)
2676 lock_num =
modulo(arow,
SIZE(locks)) + 1
2678 CALL omp_set_lock(locks(lock_num))
2679 block = block + reshape(src_buffer(offset + 1:offset + nrows*ncols), shape=[nrows, ncols])
2680 CALL omp_unset_lock(locks(lock_num))
2686 DO i = 1,
SIZE(locks)
2687 CALL omp_destroy_lock(locks(i))
2691 END SUBROUTINE rs_unpack_buffer
2709 SUBROUTINE rs_find_node(rs_desc, igrid_level, n_levels, cube_center, ntasks, tasks, &
2710 lb_cube, ub_cube, added_tasks)
2713 INTEGER,
INTENT(IN) :: igrid_level, n_levels
2714 INTEGER,
DIMENSION(3),
INTENT(IN) :: cube_center
2715 INTEGER,
INTENT(INOUT) :: ntasks
2716 TYPE(
task_type),
DIMENSION(:),
POINTER :: tasks
2717 INTEGER,
DIMENSION(3),
INTENT(IN) :: lb_cube, ub_cube
2718 INTEGER,
INTENT(OUT) :: added_tasks
2720 INTEGER,
PARAMETER :: add_tasks = 1000
2721 REAL(kind=
dp),
PARAMETER :: mult_tasks = 2.0_dp
2723 INTEGER :: bit_index, coord(3), curr_tasks, dest, i, icoord(3), idest, itask, ix, iy, iz, &
2724 lb_coord(3), lb_domain(3), lbc(3), ub_coord(3), ub_domain(3), ubc(3)
2725 INTEGER :: bit_pattern
2726 LOGICAL :: dir_periodic(3)
2728 coord(1) = rs_desc%x2coord(cube_center(1))
2729 coord(2) = rs_desc%y2coord(cube_center(2))
2730 coord(3) = rs_desc%z2coord(cube_center(3))
2731 dest = rs_desc%coord2rank(coord(1), coord(2), coord(3))
2734 lbc = lb_cube + cube_center
2735 ubc = ub_cube + cube_center
2737 IF (all((rs_desc%lb_global(:, dest) - rs_desc%border) <= lbc) .AND. &
2738 all((rs_desc%ub_global(:, dest) + rs_desc%border) >= ubc))
THEN
2740 tasks(ntasks)%destination = encode_rank(dest, igrid_level, n_levels)
2741 tasks(ntasks)%dist_type = 1
2742 tasks(ntasks)%subpatch_pattern = 0
2761 IF (rs_desc%perd(i) == 1)
THEN
2762 bit_pattern = ibclr(bit_pattern, bit_index)
2763 bit_index = bit_index + 1
2764 bit_pattern = ibclr(bit_pattern, bit_index)
2765 bit_index = bit_index + 1
2768 IF (ubc(i) <= rs_desc%lb_global(i, dest) - 1 + rs_desc%border)
THEN
2769 bit_pattern = ibset(bit_pattern, bit_index)
2770 bit_index = bit_index + 1
2772 bit_pattern = ibclr(bit_pattern, bit_index)
2773 bit_index = bit_index + 1
2776 IF (lbc(i) >= rs_desc%ub_global(i, dest) + 1 - rs_desc%border)
THEN
2777 bit_pattern = ibset(bit_pattern, bit_index)
2778 bit_index = bit_index + 1
2780 bit_pattern = ibclr(bit_pattern, bit_index)
2781 bit_index = bit_index + 1
2785 tasks(ntasks)%subpatch_pattern = bit_pattern
2795 lb_domain = rs_desc%lb_global(:, dest) - rs_desc%border
2796 ub_domain = rs_desc%ub_global(:, dest) + rs_desc%border
2799 IF (rs_desc%perd(i) == 0)
THEN
2802 IF (lb_domain(i) > lbc(i))
THEN
2803 lb_coord(i) = lb_coord(i) - 1
2804 icoord =
modulo(lb_coord, rs_desc%group_dim)
2805 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2806 lb_domain(i) = lb_domain(i) - (rs_desc%ub_global(i, idest) - rs_desc%lb_global(i, idest) + 1)
2813 IF (ub_domain(i) < ubc(i))
THEN
2814 ub_coord(i) = ub_coord(i) + 1
2815 icoord =
modulo(ub_coord, rs_desc%group_dim)
2816 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2817 ub_domain(i) = ub_domain(i) + (rs_desc%ub_global(i, idest) - rs_desc%lb_global(i, idest) + 1)
2827 IF (ub_domain(i) - lb_domain(i) + 1 >= rs_desc%npts(i))
THEN
2828 dir_periodic(i) = .true.
2830 ub_coord(i) = rs_desc%group_dim(i) - 1
2832 dir_periodic(i) = .false.
2836 added_tasks = product(ub_coord - lb_coord + 1)
2838 ntasks = ntasks + added_tasks - 1
2839 IF (ntasks >
SIZE(tasks))
THEN
2840 curr_tasks = int((
SIZE(tasks) + add_tasks)*mult_tasks)
2843 DO iz = lb_coord(3), ub_coord(3)
2844 DO iy = lb_coord(2), ub_coord(2)
2845 DO ix = lb_coord(1), ub_coord(1)
2846 icoord =
modulo([ix, iy, iz], rs_desc%group_dim)
2847 idest = rs_desc%coord2rank(icoord(1), icoord(2), icoord(3))
2848 tasks(itask)%destination = encode_rank(idest, igrid_level, n_levels)
2849 tasks(itask)%dist_type = 2
2850 tasks(itask)%subpatch_pattern = 0
2853 IF (ix == lb_coord(1) .AND. .NOT. dir_periodic(1)) &
2854 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 0)
2855 IF (ix == ub_coord(1) .AND. .NOT. dir_periodic(1)) &
2856 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 1)
2857 IF (iy == lb_coord(2) .AND. .NOT. dir_periodic(2)) &
2858 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 2)
2859 IF (iy == ub_coord(2) .AND. .NOT. dir_periodic(2)) &
2860 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 3)
2861 IF (iz == lb_coord(3) .AND. .NOT. dir_periodic(3)) &
2862 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 4)
2863 IF (iz == ub_coord(3) .AND. .NOT. dir_periodic(3)) &
2864 tasks(itask)%subpatch_pattern = ibset(tasks(itask)%subpatch_pattern, 5)
2871 END SUBROUTINE rs_find_node
2885 FUNCTION encode_rank(rank, grid_level, n_levels)
RESULT(encoded_int)
2887 INTEGER,
INTENT(IN) :: rank, grid_level, n_levels
2888 INTEGER :: encoded_int
2892 encoded_int = rank*n_levels + grid_level - 1
2894 END FUNCTION encode_rank
2902 FUNCTION decode_rank(encoded_int, n_levels)
RESULT(rank)
2904 INTEGER,
INTENT(IN) :: encoded_int
2905 INTEGER,
INTENT(IN) :: n_levels
2908 rank = int(encoded_int/n_levels)
2910 END FUNCTION decode_rank
2918 FUNCTION decode_level(encoded_int, n_levels)
RESULT(grid_level)
2920 INTEGER,
INTENT(IN) :: encoded_int
2921 INTEGER,
INTENT(IN) :: n_levels
2922 INTEGER :: grid_level
2924 grid_level = int(
modulo(encoded_int, n_levels)) + 1
2926 END FUNCTION decode_level
2942 PURE FUNCTION tasks_less_than(a, b)
RESULT(res)
2946 IF (a%grid_level /= b%grid_level)
THEN
2947 res = a%grid_level < b%grid_level
2949 ELSE IF (a%image /= b%image)
THEN
2950 res = a%image < b%image
2952 ELSE IF (a%iatom /= b%iatom)
THEN
2953 res = a%iatom < b%iatom
2955 ELSE IF (a%jatom /= b%jatom)
THEN
2956 res = a%jatom < b%jatom
2958 ELSE IF (a%iset /= b%iset)
THEN
2959 res = a%iset < b%iset
2961 ELSE IF (a%jset /= b%jset)
THEN
2962 res = a%jset < b%jset
2964 ELSE IF (a%ipgf /= b%ipgf)
THEN
2965 res = a%ipgf < b%ipgf
2968 res = a%jpgf < b%jpgf
2971 END FUNCTION tasks_less_than
2984 SUBROUTINE tasks_sort(arr, n, indices)
2985 INTEGER,
INTENT(IN) :: n
2986 TYPE(
task_type),
DIMENSION(1:n),
INTENT(INOUT) :: arr
2987 integer,
DIMENSION(1:n),
INTENT(INOUT) :: indices
2990 TYPE(
task_type),
ALLOCATABLE :: tmp_arr(:)
2991 INTEGER,
ALLOCATABLE :: tmp_idx(:)
2995 ALLOCATE (tmp_arr((n + 1)/2), tmp_idx((n + 1)/2))
2997 indices = [(i, i=1, n)]
2999 CALL tasks_sort_low(arr(1:n), indices, tmp_arr, tmp_idx)
3001 DEALLOCATE (tmp_arr, tmp_idx)
3002 ELSE IF (n > 0)
THEN
3006 END SUBROUTINE tasks_sort
3018 RECURSIVE SUBROUTINE tasks_sort_low(arr, indices, tmp_arr, tmp_idx)
3019 TYPE(
task_type),
DIMENSION(:),
INTENT(INOUT) :: arr
3020 INTEGER,
DIMENSION(size(arr)),
INTENT(INOUT) :: indices
3021 TYPE(
task_type),
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_arr
3022 INTEGER,
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_idx
3024 INTEGER :: t, m, i, j, k
3031 IF (
size(arr) <= 7)
THEN
3032 DO j =
size(arr) - 1, 1, -1
3035 IF (tasks_less_than(arr(i + 1), arr(i)))
THEN
3042 indices(i) = indices(i + 1)
3047 IF (.NOT. swapped)
EXIT
3053 m = (
size(arr) + 1)/2
3054 CALL tasks_sort_low(arr(1:m), indices(1:m), tmp_arr, tmp_idx)
3055 CALL tasks_sort_low(arr(m + 1:), indices(m + 1:), tmp_arr, tmp_idx)
3059 IF (tasks_less_than(arr(m + 1), arr(m)))
THEN
3062 tmp_arr(1:m) = arr(1:m)
3063 tmp_idx(1:m) = indices(1:m)
3068 DO WHILE (i <= m .and. j <=
size(arr) - m)
3069 IF (tasks_less_than(arr(m + j), tmp_arr(i)))
THEN
3071 indices(k) = indices(m + j)
3075 indices(k) = tmp_idx(i)
3085 indices(k) = tmp_idx(i)
3092 END SUBROUTINE tasks_sort_low
3102 PURE FUNCTION atom_pair_less_than(a, b)
RESULT(res)
3106 IF (a%rank /= b%rank)
THEN
3107 res = a%rank < b%rank
3109 ELSE IF (a%row /= b%row)
THEN
3112 ELSE IF (a%col /= b%col)
THEN
3116 res = a%image < b%image
3119 END FUNCTION atom_pair_less_than
3132 SUBROUTINE atom_pair_sort(arr, n, indices)
3133 INTEGER,
INTENT(IN) :: n
3135 integer,
DIMENSION(1:n),
INTENT(INOUT) :: indices
3139 INTEGER,
ALLOCATABLE :: tmp_idx(:)
3143 ALLOCATE (tmp_arr((n + 1)/2), tmp_idx((n + 1)/2))
3145 indices = [(i, i=1, n)]
3147 CALL atom_pair_sort_low(arr(1:n), indices, tmp_arr, tmp_idx)
3149 DEALLOCATE (tmp_arr, tmp_idx)
3150 ELSE IF (n > 0)
THEN
3154 END SUBROUTINE atom_pair_sort
3166 RECURSIVE SUBROUTINE atom_pair_sort_low(arr, indices, tmp_arr, tmp_idx)
3168 INTEGER,
DIMENSION(size(arr)),
INTENT(INOUT) :: indices
3169 TYPE(
atom_pair_type),
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_arr
3170 INTEGER,
DIMENSION((size(arr) + 1)/2),
INTENT(INOUT) :: tmp_idx
3172 INTEGER :: t, m, i, j, k
3179 IF (
size(arr) <= 7)
THEN
3180 DO j =
size(arr) - 1, 1, -1
3183 IF (atom_pair_less_than(arr(i + 1), arr(i)))
THEN
3190 indices(i) = indices(i + 1)
3195 IF (.NOT. swapped)
EXIT
3201 m = (
size(arr) + 1)/2
3202 CALL atom_pair_sort_low(arr(1:m), indices(1:m), tmp_arr, tmp_idx)
3203 CALL atom_pair_sort_low(arr(m + 1:), indices(m + 1:), tmp_arr, tmp_idx)
3207 IF (atom_pair_less_than(arr(m + 1), arr(m)))
THEN
3210 tmp_arr(1:m) = arr(1:m)
3211 tmp_idx(1:m) = indices(1:m)
3216 DO WHILE (i <= m .and. j <=
size(arr) - m)
3217 IF (atom_pair_less_than(arr(m + j), tmp_arr(i)))
THEN
3219 indices(k) = indices(m + j)
3223 indices(k) = tmp_idx(i)
3233 indices(k) = tmp_idx(i)
3240 END SUBROUTINE atom_pair_sort_low
void grid_create_basis_set(const int nset, const int nsgf, const int maxco, const int maxpgf, const int lmin[nset], const int lmax[nset], const int npgf[nset], const int nsgf_set[nset], const int first_sgf[nset], const double sphi[nsgf][maxco], const double zet[nset][maxpgf], grid_basis_set **basis_set_out)
Allocates a basis set which can be passed to grid_create_task_list. See grid_task_list....
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
All kind of helpful little routines.
real(kind=dp) function, public exp_radius_very_extended(la_min, la_max, lb_min, lb_max, pab, o1, o2, ra, rb, rp, zetp, eps, prefactor, cutoff, epsabs)
computes the radius of the Gaussian outside of which it is smaller than eps
subroutine, public get_gto_basis_set(gto_basis_set, name, aliases, norm_type, kind_radius, ncgf, nset, nsgf, cgf_symbol, sgf_symbol, norm_cgf, set_radius, lmax, lmin, lx, ly, lz, m, ncgf_set, npgf, nsgf_set, nshell, cphi, pgf_radius, sphi, scon, zet, first_cgf, first_sgf, l, last_cgf, last_sgf, n, gcc, maxco, maxl, maxpgf, maxsgf_set, maxshell, maxso, nco_sum, npgf_sum, nshell_sum, maxder, short_kind_radius, npgf_seg_sum)
...
Handles all functions related to the CELL.
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_work_create(matrix, nblks_guess, sizedata_guess, n, work_mutable)
...
subroutine, public dbcsr_finalize(matrix)
...
subroutine, public dbcsr_put_block(matrix, row, col, block, summation)
...
for a given dr()/dh(r) this will provide the bounds to be used if one wants to go over a sphere-subre...
subroutine, public compute_cube_center(cube_center, rs_desc, zeta, zetb, ra, rab)
unifies the computation of the cube center, so that differences in implementation,...
subroutine, public return_cube(info, radius, lb_cube, ub_cube, sphere_bounds)
...
subroutine, public return_cube_nonortho(info, radius, lb, ub, rp)
...
integer function, public gaussian_gridlevel(gridlevel_info, exponent)
...
Fortran API for the grid package, which is written in C.
subroutine, public grid_create_task_list(ntasks, natoms, nkinds, nblocks, block_offsets, atom_positions, atom_kinds, basis_sets, level_list, iatom_list, jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list, block_num_list, radius_list, rab_list, rs_grids, task_list)
Allocates a task list which can be passed to grid_collocate_task_list.
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
integer, parameter, public default_string_length
Types and basic routines needed for a kpoint calculation.
subroutine, public get_kpoint_info(kpoint, kp_scheme, nkp_grid, kp_shift, symmetry, verbose, full_grid, use_real_wfn, eps_geo, parallel_group_size, kp_range, nkp, xkp, wkp, para_env, blacs_env_all, para_env_kp, para_env_inter_kp, blacs_env, kp_env, kp_aux_env, mpools, iogrp, nkp_groups, kp_dist, cell_to_index, index_to_cell, sab_nl, sab_nl_nosym)
Retrieve information from a kpoint environment.
An array-based list which grows on demand. When the internal array is full, a new array of twice the ...
Utility routines for the memory handling.
Interface to the message passing library MPI.
Fortran API for the offload package, which is written in C.
subroutine, public offload_create_buffer(length, buffer)
Allocates a buffer of given length, ie. number of elements.
Define methods related to particle_type.
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Define the data structure for the particle information.
container for various plainwaves related things
subroutine, public pw_env_get(pw_env, pw_pools, cube_info, gridlevel_info, auxbas_pw_pool, auxbas_grid, auxbas_rs_desc, auxbas_rs_grid, rs_descs, rs_grids, xc_pw_pool, vdw_pw_pool, poisson_env, interp_section)
returns the various attributes of the pw env
Define the quickstep kind type and their sub types.
subroutine, public get_qs_kind(qs_kind, basis_set, basis_type, ncgf, nsgf, all_potential, tnadd_potential, gth_potential, sgp_potential, upf_potential, cneo_potential, se_parameter, dftb_parameter, xtb_parameter, dftb3_param, zatom, zeff, elec_conf, mao, lmax_dftb, alpha_core_charge, ccore_charge, core_charge, core_charge_radius, paw_proj_set, paw_atom, hard_radius, hard0_radius, max_rad_local, covalent_radius, vdw_radius, gpw_type_forced, harmonics, max_iso_not0, max_s_harm, grid_atom, ngrid_ang, ngrid_rad, lmax_rho0, dft_plus_u_atom, l_of_dft_plus_u, n_of_dft_plus_u, u_minus_j, u_of_dft_plus_u, j_of_dft_plus_u, alpha_of_dft_plus_u, beta_of_dft_plus_u, j0_of_dft_plus_u, occupation_of_dft_plus_u, dispersion, bs_occupation, magnetization, no_optimize, addel, laddel, naddel, orbitals, max_scf, eps_scf, smear, u_ramping, u_minus_j_target, eps_u_ramping, init_u_ramping_each_scf, reltmat, ghost, monovalent, floating, name, element_symbol, pao_basis_size, pao_model_file, pao_potentials, pao_descriptors, nelec)
Get attributes of an atomic kind.
subroutine, public get_ks_env(ks_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, complex_ks, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, kinetic, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_ks_im_kp, rho, rho_xc, vppl, rho_core, rho_nlcc, rho_nlcc_g, vee, neighbor_list_id, sab_orb, sab_all, sac_ae, sac_ppl, sac_lri, sap_ppnl, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_vdw, sab_scp, sab_almo, sab_kp, sab_kp_nosym, sab_cneo, task_list, task_list_soft, kpoints, do_kpoints, atomic_kind_set, qs_kind_set, cell, cell_ref, use_ref_cell, particle_set, energy, force, local_particles, local_molecules, molecule_kind_set, molecule_set, subsys, cp_subsys, virial, results, atprop, nkind, natom, dft_control, dbcsr_dist, distribution_2d, pw_env, para_env, blacs_env, nelectron_total, nelectron_spin)
...
Define the neighbor list data types and the corresponding functionality.
subroutine, public rs_grid_create(rs, desc)
...
pure integer function, public rs_grid_locate_rank(rs_desc, rank_in, shift)
returns the 1D rank of the task which is a cartesian shift away from 1D rank rank_in only possible if...
pure subroutine, public rs_grid_reorder_ranks(desc, real2virtual)
Defines a new ordering of ranks on this realspace grid, recalculating the data bounds and reallocatin...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
generate the tasks lists used by collocate and integrate routines
subroutine, public rs_copy_to_matrices(src_buffer, dest_matrices, task_list)
Copies from buffer into DBCSR matrics, replaces rs_gather_matrix for non-distributed grids.
subroutine, public rs_scatter_matrices(src_matrices, dest_buffer, task_list, group)
Scatters dbcsr matrix blocks and receives them into a buffer as needed before collocation.
subroutine, public rs_distribute_matrix(rs_descs, pmats, atom_pair_send, atom_pair_recv, nimages, scatter, hmats)
redistributes the matrix so that it can be used in realspace operations i.e. according to the task li...
subroutine, public distribute_tasks(rs_descs, ntasks, natoms, tasks, atom_pair_send, atom_pair_recv, symmetric, reorder_rs_grid_ranks, skip_load_balance_distributed)
Assembles tasks to be performed on local grid.
subroutine, public generate_qs_task_list(ks_env, task_list, basis_type, reorder_rs_grid_ranks, skip_load_balance_distributed, pw_env_external, sab_orb_external)
...
subroutine, public rs_gather_matrices(src_buffer, dest_matrices, task_list, group)
Gather the dbcsr matrix blocks and receives them into a buffer as needed after integration.
subroutine, public task_list_inner_loop(tasks, ntasks, curr_tasks, rs_descs, dft_control, cube_info, gridlevel_info, cindex, iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, set_radius_a, set_radius_b, ra, rab, la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)
...
subroutine, public rs_copy_to_buffer(src_matrices, dest_buffer, task_list)
Copies the DBCSR blocks into buffer, replaces rs_scatter_matrix for non-distributed grids.
subroutine, public serialize_task(task, serialized_task)
Serialize a task into an integer array. Used for MPI communication.
subroutine, public deserialize_task(task, serialized_task)
De-serialize a task from an integer array. Used for MPI communication.
subroutine, public reallocate_tasks(tasks, new_size)
Grow an array of tasks while preserving the existing entries.
integer, parameter, public task_size_in_int8
All kind of helpful little routines.
Type defining parameters related to the simulation cell.
Contains information about kpoints.
contained for different pw related things
Provides all information about a quickstep kind.
calculation environment to calculate the ks matrix, holds all the needed vars. assumes that the core ...