27#include "./base/base_uses.f90"
33 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'skala_gpw_features'
34 REAL(KIND=
dp),
PARAMETER,
PRIVATE :: layout_tol = 1.0e-12_dp
35 INTEGER,
PARAMETER,
PRIVATE :: ndynamic_per_point = 10, nstatic_per_point = 4, &
40 TYPE skala_gpw_layout_cache_type
41 INTEGER :: chunk_atom_begin = 1, chunk_atom_end = 0, &
42 chunk_feature_begin = 1, &
43 chunk_feature_count = 0, chunk_natom = 0, &
44 natom = 0, nflat = 0, nflat_local = 0, &
46 INTEGER,
DIMENSION(2, 3) :: bo = 0, bounds = 0
47 INTEGER,
DIMENSION(3) :: npts = 0
48 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: dynamic_counts, dynamic_displs, &
49 chunk_feature_counts, chunk_feature_displs, &
50 chunk_grad_counts, chunk_grad_displs, &
51 feature_counts, feature_displs, &
52 global_to_feature, route_dynamic_recv_counts, &
53 route_dynamic_recv_displs, &
54 route_dynamic_send_counts, &
55 route_dynamic_send_displs, &
56 route_grad_return_recv_counts, &
57 route_grad_return_recv_displs, &
58 route_grad_return_send_counts, &
59 route_grad_return_send_displs, &
60 route_local_dest, route_meta_recv_counts, &
61 route_meta_recv_displs, &
62 route_meta_send_counts, &
63 route_meta_send_displs, &
64 route_point_recv_counts, &
65 route_point_recv_displs, &
66 route_point_send_counts, &
67 route_point_send_displs, &
69 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: feature_index
70 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: atomic_grid_sizes, chunk_atomic_grid_sizes, &
72 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: local_feature_indices
73 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:, :) :: atomic_grid_size_bound_shape, &
74 chunk_atomic_grid_size_bound_shape
75 TYPE(torch_dict_type) :: chunk_static_inputs
76 TYPE(torch_dict_type) :: static_inputs
77 TYPE(torch_tensor_type) :: atomic_grid_size_bound_shape_t
78 TYPE(torch_tensor_type) :: atomic_grid_sizes_t
79 TYPE(torch_tensor_type) :: atomic_grid_weights_t
80 TYPE(torch_tensor_type) :: chunk_atomic_grid_size_bound_shape_t
81 TYPE(torch_tensor_type) :: chunk_atomic_grid_sizes_t
82 TYPE(torch_tensor_type) :: chunk_atomic_grid_weights_t
83 TYPE(torch_tensor_type) :: chunk_coarse_0_atomic_coords_t
84 TYPE(torch_tensor_type) :: chunk_density_t
85 TYPE(torch_tensor_type) :: chunk_feature_indices_t
86 TYPE(torch_tensor_type) :: chunk_grad_t
87 TYPE(torch_tensor_type) :: chunk_grid_coords_t
88 TYPE(torch_tensor_type) :: chunk_grid_weights_t
89 TYPE(torch_tensor_type) :: chunk_kin_t
90 TYPE(torch_tensor_type) :: coarse_0_atomic_coords_t
91 TYPE(torch_tensor_type) :: density_t
92 TYPE(torch_tensor_type) :: grid_coords_t
93 TYPE(torch_tensor_type) :: grid_weights_t
94 TYPE(torch_tensor_type) :: grad_t
95 TYPE(torch_tensor_type) :: kin_t
96 TYPE(torch_tensor_type) :: local_feature_indices_t
97 REAL(KIND=
dp) :: dvol = 0.0_dp, weight_sum = 0.0_dp, &
99 REAL(KIND=
dp),
DIMENSION(3, 3) :: cell_hmat = 0.0_dp, dh = 0.0_dp
100 REAL(KIND=
dp),
ALLOCATABLE,
DIMENSION(:) :: atomic_grid_weights, chunk_atomic_grid_weights, &
101 chunk_grid_weights, grid_weights
102 REAL(KIND=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: atom_coords, chunk_coarse_0_atomic_coords, &
103 chunk_grid_coords, coarse_0_atomic_coords, &
105 LOGICAL :: active = .false., has_weights = .false., &
106 chunk_dynamic_tensors_active = .false., &
107 chunk_static_tensors_active = .false., &
108 dynamic_tensors_active = .false., &
109 static_tensors_active = .false.
110 END TYPE skala_gpw_layout_cache_type
113 INTEGER :: chunk_feature_count = 0, nflat = 0, &
126 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: chunk_grad_counts, chunk_grad_displs, &
127 chunk_return_positions, &
128 chunk_return_ranks, chunk_return_rows, &
129 route_grad_return_recv_counts, &
130 route_grad_return_recv_displs, &
131 route_grad_return_send_counts, &
132 route_grad_return_send_displs, &
133 route_point_recv_counts, &
134 route_point_recv_displs, &
135 route_point_send_counts, &
136 route_point_send_displs, &
137 route_send_local_rows
138 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: feature_index
139 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:) :: atomic_grid_sizes
140 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:, :) :: atomic_grid_size_bound_shape
141 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: atomic_grid_weights, grid_weights
142 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: chunk_density, chunk_kin, &
143 coarse_0_atomic_coords, density, &
145 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: chunk_grad, grad
146 REAL(kind=
dp) :: electron_count = 0.0_dp, &
147 grid_weight_sum = 0.0_dp, &
149 LOGICAL :: active = .false., owns_coordinate_tensor = .false., &
150 owns_dynamic_tensors = .true., &
151 owns_static_tensors = .true., &
152 uses_atom_chunk_routing = .false., &
153 uses_atom_chunks = .false.
174 requires_grad, weights, requires_coordinate_grad, &
175 use_atom_chunks, route_atom_chunks)
181 LOGICAL,
INTENT(IN),
OPTIONAL :: requires_grad
183 LOGICAL,
INTENT(IN),
OPTIONAL :: requires_coordinate_grad, &
184 use_atom_chunks, route_atom_chunks
186 INTEGER :: handle, i, ipt, ispin, j, k, local_row, &
187 nflat, nflat_local, nspins, &
188 phase_handle, real_base, row
189 INTEGER,
DIMENSION(2, 3) :: bo
190 LOGICAL :: can_use_atom_chunks, my_requires_coordinate_grad, my_requires_grad, &
191 my_route_atom_chunks, my_use_atom_chunks
192 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: global_dynamic, local_dynamic
193 REAL(kind=
dp),
DIMENSION(:, :, :),
POINTER :: rho, rhoa, rhob, tau_a, tau_b, tau_total
197 CALL timeset(
"skala_gpw_feature_build", handle)
199 my_requires_grad = .false.
200 IF (
PRESENT(requires_grad)) my_requires_grad = requires_grad
201 my_requires_coordinate_grad = .false.
202 IF (
PRESENT(requires_coordinate_grad)) &
203 my_requires_coordinate_grad = requires_coordinate_grad
204 my_use_atom_chunks = .false.
205 IF (
PRESENT(use_atom_chunks)) my_use_atom_chunks = use_atom_chunks
206 my_route_atom_chunks = .false.
207 IF (
PRESENT(route_atom_chunks)) my_route_atom_chunks = route_atom_chunks
209 cpassert(
ASSOCIATED(cell))
210 cpassert(
ASSOCIATED(particle_set))
211 cpassert(
SIZE(rho_r) == 1 .OR.
SIZE(rho_r) == 2)
212 cpassert(
ASSOCIATED(rho_r(1)%pw_grid))
213 pw_grid => rho_r(1)%pw_grid
216 bo = pw_grid%bounds_local
217 nflat_local = pw_grid%ngpts_local
219 CALL timeset(
"skala_gpw_pre_release", phase_handle)
221 CALL timestop(phase_handle)
223 CALL timeset(
"skala_gpw_layout_cache", phase_handle)
224 CALL ensure_layout_cache(pw_grid, particle_set, cell, weights)
225 CALL timestop(phase_handle)
227 can_use_atom_chunks = my_use_atom_chunks .AND.
cached_layout%nproc > 1 .AND. &
229 ALLOCATE (local_dynamic(ndynamic_per_point*nflat_local))
230 local_dynamic = 0.0_dp
232 CALL timeset(
"skala_gpw_pack_local", phase_handle)
233 IF (nspins == 1)
THEN
236 CALL xc_rho_set_get(rho_set, rhoa=rhoa, rhob=rhob, drhoa=drhoa, drhob=drhob, &
237 tau_a=tau_a, tau_b=tau_b)
241 DO k = bo(1, 3), bo(2, 3)
242 DO j = bo(1, 2), bo(2, 2)
243 DO i = bo(1, 1), bo(2, 1)
244 local_row = local_row + 1
245 real_base = ndynamic_per_point*(local_row - 1)
247 IF (nspins == 1)
THEN
248 local_dynamic(real_base + 1) = 0.5_dp*rho(i, j, k)
249 local_dynamic(real_base + 2) = 0.5_dp*rho(i, j, k)
251 local_dynamic(real_base + 2 + 3*(ispin - 1) + 1) = 0.5_dp*drho(1)%array(i, j, k)
252 local_dynamic(real_base + 2 + 3*(ispin - 1) + 2) = 0.5_dp*drho(2)%array(i, j, k)
253 local_dynamic(real_base + 2 + 3*(ispin - 1) + 3) = 0.5_dp*drho(3)%array(i, j, k)
254 local_dynamic(real_base + 8 + ispin) = 0.5_dp*tau_total(i, j, k)
257 local_dynamic(real_base + 1) = rhoa(i, j, k)
258 local_dynamic(real_base + 2) = rhob(i, j, k)
259 local_dynamic(real_base + 3) = drhoa(1)%array(i, j, k)
260 local_dynamic(real_base + 4) = drhoa(2)%array(i, j, k)
261 local_dynamic(real_base + 5) = drhoa(3)%array(i, j, k)
262 local_dynamic(real_base + 6) = drhob(1)%array(i, j, k)
263 local_dynamic(real_base + 7) = drhob(2)%array(i, j, k)
264 local_dynamic(real_base + 8) = drhob(3)%array(i, j, k)
265 local_dynamic(real_base + 9) = tau_a(i, j, k)
266 local_dynamic(real_base + 10) = tau_b(i, j, k)
271 CALL timestop(phase_handle)
273 CALL timeset(
"skala_gpw_copy_layout", phase_handle)
274 CALL copy_cached_layout(features, my_requires_coordinate_grad)
275 CALL timestop(phase_handle)
277 IF (can_use_atom_chunks .AND. my_route_atom_chunks)
THEN
278 CALL timeset(
"skala_gpw_route_dyn", phase_handle)
279 CALL route_atom_chunk_dynamics(features, local_dynamic, pw_grid%para%group)
280 features%uses_atom_chunk_routing = .true.
281 features%uses_atom_chunks = .true.
282 CALL timestop(phase_handle)
284 ALLOCATE (global_dynamic(ndynamic_per_point*nflat))
285 CALL timeset(
"skala_gpw_allgatherv", phase_handle)
286 CALL pw_grid%para%group%allgatherv(local_dynamic, global_dynamic, &
289 CALL timestop(phase_handle)
291 CALL timeset(
"skala_gpw_reorder_dyn", phase_handle)
292 ALLOCATE (features%density(nflat, 2), features%grad(nflat, 3, 2), &
293 features%kin(nflat, 2))
294 features%density = 0.0_dp
295 features%grad = 0.0_dp
296 features%kin = 0.0_dp
300 real_base = ndynamic_per_point*(ipt - 1)
301 features%density(row, :) = global_dynamic(real_base + 1:real_base + 2)
302 features%grad(row, 1, 1) = global_dynamic(real_base + 3)
303 features%grad(row, 2, 1) = global_dynamic(real_base + 4)
304 features%grad(row, 3, 1) = global_dynamic(real_base + 5)
305 features%grad(row, 1, 2) = global_dynamic(real_base + 6)
306 features%grad(row, 2, 2) = global_dynamic(real_base + 7)
307 features%grad(row, 3, 2) = global_dynamic(real_base + 8)
308 features%kin(row, :) = global_dynamic(real_base + 9:real_base + 10)
310 CALL timestop(phase_handle)
313 CALL timeset(
"skala_gpw_feature_sums", phase_handle)
314 IF (features%uses_atom_chunks)
THEN
315 features%electron_count = sum((features%chunk_density(:, 1) + &
316 features%chunk_density(:, 2))* &
318 features%spin_moment = sum((features%chunk_density(:, 1) - &
319 features%chunk_density(:, 2))* &
321 CALL pw_grid%para%group%sum(features%electron_count)
322 CALL pw_grid%para%group%sum(features%spin_moment)
324 features%electron_count = sum((features%density(:, 1) + features%density(:, 2))* &
325 features%grid_weights)
326 features%spin_moment = sum((features%density(:, 1) - features%density(:, 2))* &
327 features%grid_weights)
329 features%grid_weight_sum = sum(features%grid_weights)
330 CALL timestop(phase_handle)
332 CALL timeset(
"skala_gpw_tensor_update", phase_handle)
333 IF (can_use_atom_chunks .AND. .NOT. features%uses_atom_chunks)
THEN
334 CALL extract_atom_chunk_dynamics(features)
335 features%uses_atom_chunks = .true.
337 CALL add_feature_tensors(features, my_requires_grad, my_requires_coordinate_grad, &
338 features%uses_atom_chunks)
339 CALL timestop(phase_handle)
340 features%active = .true.
342 IF (
ALLOCATED(global_dynamic))
DEALLOCATE (global_dynamic)
343 DEALLOCATE (local_dynamic)
344 CALL timestop(handle)
355 SUBROUTINE ensure_layout_cache(pw_grid, particle_set, cell, weights)
361 INTEGER :: phase_handle
362 LOGICAL :: cache_matches
364 IF (
PRESENT(weights))
THEN
365 CALL timeset(
"skala_gpw_layout_match", phase_handle)
366 cache_matches = layout_cache_matches(pw_grid, particle_set, cell, weights)
367 CALL timestop(phase_handle)
368 IF (cache_matches)
RETURN
369 CALL timeset(
"skala_gpw_layout_rebuild", phase_handle)
370 CALL rebuild_layout_cache(pw_grid, particle_set, cell, weights)
371 CALL timestop(phase_handle)
373 CALL timeset(
"skala_gpw_layout_match", phase_handle)
374 cache_matches = layout_cache_matches(pw_grid, particle_set, cell)
375 CALL timestop(phase_handle)
376 IF (cache_matches)
RETURN
377 CALL timeset(
"skala_gpw_layout_rebuild", phase_handle)
378 CALL rebuild_layout_cache(pw_grid, particle_set, cell)
379 CALL timestop(phase_handle)
382 END SUBROUTINE ensure_layout_cache
392 FUNCTION layout_cache_matches(pw_grid, particle_set, cell, weights)
RESULT(matches)
400 LOGICAL :: weights_match
410 IF (abs(
cached_layout%dvol - pw_grid%dvol) > layout_tol)
RETURN
411 IF (any(abs(
cached_layout%dh - pw_grid%dh) > layout_tol))
RETURN
412 IF (any(abs(
cached_layout%cell_hmat - cell%hmat) > layout_tol))
RETURN
415 DO iatom = 1,
SIZE(particle_set)
416 IF (any(abs(
cached_layout%atom_coords(:, iatom) - particle_set(iatom)%r) > layout_tol))
RETURN
419 IF (
PRESENT(weights))
THEN
420 weights_match = layout_weights_match(pw_grid, weights)
422 weights_match = layout_weights_match(pw_grid)
424 IF (.NOT. weights_match)
RETURN
428 END FUNCTION layout_cache_matches
436 FUNCTION layout_weights_match(pw_grid, weights)
RESULT(matches)
441 LOGICAL :: has_weights
442 REAL(kind=
dp) :: weight_sum, weight_sumsq
446 IF (
PRESENT(weights))
THEN
447 CALL weights_signature(weights, has_weights, weight_sum, weight_sumsq)
449 CALL weights_signature(has_weights=has_weights, weight_sum=weight_sum, &
450 weight_sumsq=weight_sumsq)
454 IF (abs(
cached_layout%weight_sum - weight_sum) > layout_tol)
RETURN
455 IF (abs(
cached_layout%weight_sumsq - weight_sumsq) > layout_tol)
RETURN
459 END FUNCTION layout_weights_match
468 SUBROUTINE rebuild_layout_cache(pw_grid, particle_set, cell, weights)
474 INTEGER :: i, iatom, ipt, j, k, local_row, max_grid_size, natom, nflat, nflat_local, nproc, &
475 owner, pe, pe_index, phase_handle, row, static_base
476 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: atom_offset, atom_position, chunk_atom_begin, &
477 chunk_atom_end, feature_counts, feature_displs, global_owner, local_owner, &
478 local_to_global, static_counts, static_displs
479 INTEGER,
DIMENSION(2, 3) :: bo
480 LOGICAL :: has_weights
481 REAL(kind=
dp) :: weight_sum, weight_sumsq
482 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: global_static, local_static
483 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: atom_coords_pbc
484 REAL(kind=
dp),
DIMENSION(3) :: grid_point, owner_coord
488 natom =
SIZE(particle_set)
489 bo = pw_grid%bounds_local
490 nflat_local = pw_grid%ngpts_local
491 nproc = pw_grid%para%group%num_pe
492 pe_index = pw_grid%para%group%mepos + 1
494 IF (
PRESENT(weights))
THEN
495 CALL weights_signature(weights, has_weights, weight_sum, weight_sumsq)
497 CALL weights_signature(has_weights=has_weights, weight_sum=weight_sum, &
498 weight_sumsq=weight_sumsq)
501 ALLOCATE (local_owner(nflat_local), local_static(nstatic_per_point*nflat_local), &
502 feature_counts(nproc), feature_displs(nproc), static_counts(nproc), &
503 static_displs(nproc), atom_coords_pbc(3, natom))
508 local_static = 0.0_dp
510 atom_coords_pbc(:, iatom) =
pbc(particle_set(iatom)%r, cell, positive_range=.true.)
513 CALL timeset(
"skala_gpw_layout_local", phase_handle)
515 DO k = bo(1, 3), bo(2, 3)
516 DO j = bo(1, 2), bo(2, 2)
517 DO i = bo(1, 1), bo(2, 1)
518 local_row = local_row + 1
519 static_base = nstatic_per_point*(local_row - 1)
520 grid_point = grid_coordinate(pw_grid, [i, j, k])
521 owner = nearest_atom(grid_point, atom_coords_pbc, cell)
522 local_owner(local_row) = owner
525 owner_coord = atom_coords_pbc(:, owner)
526 local_static(static_base + 1:static_base + 3) = &
527 nearest_image_coordinate(owner_coord, grid_point, cell)
528 local_static(static_base + 4) = pw_grid%dvol
529 IF (
PRESENT(weights))
THEN
530 IF (
ASSOCIATED(weights)) local_static(static_base + 4) = &
531 pw_grid%dvol*weights%array(i, j, k)
536 CALL timestop(phase_handle)
540 CALL timeset(
"skala_gpw_layout_gather", phase_handle)
541 CALL pw_grid%para%group%allgather(nflat_local, feature_counts)
542 feature_displs(1) = 0
544 feature_displs(pe) = feature_displs(pe - 1) + feature_counts(pe - 1)
547 static_counts(pe) = nstatic_per_point*feature_counts(pe)
548 static_displs(pe) = nstatic_per_point*feature_displs(pe)
550 nflat = sum(feature_counts)
551 ALLOCATE (global_owner(nflat), global_static(nstatic_per_point*nflat))
552 CALL pw_grid%para%group%allgatherv(local_owner, global_owner, feature_counts, &
554 CALL pw_grid%para%group%allgatherv(local_static, global_static, static_counts, &
556 CALL timestop(phase_handle)
580 cached_layout%local_feature_indices(nflat_local), atom_offset(natom + 1), &
581 atom_position(natom), chunk_atom_begin(nproc), chunk_atom_end(nproc), &
582 local_to_global(nflat_local))
585 cached_layout%dynamic_counts(:) = ndynamic_per_point*feature_counts
586 cached_layout%dynamic_displs(:) = ndynamic_per_point*feature_displs
589 CALL timeset(
"skala_gpw_layout_atom_sort", phase_handle)
596 atom_offset(iatom + 1) = atom_offset(iatom) + int(
cached_layout%atomic_grid_sizes(iatom))
599 atom_position(iatom) = atom_offset(iatom)
602 CALL build_atom_chunks(
cached_layout%atomic_grid_sizes, atom_offset, nproc, &
603 chunk_atom_begin, chunk_atom_end, &
618 cached_layout%atomic_grid_size_bound_shape(0, max_grid_size), &
627 cached_layout%coarse_0_atomic_coords(:, iatom) = atom_coords_pbc(:, iatom)
631 owner = global_owner(ipt)
632 row = atom_position(owner)
633 atom_position(owner) = atom_position(owner) + 1
635 static_base = nstatic_per_point*(ipt - 1)
636 cached_layout%grid_coords(:, row) = global_static(static_base + 1:static_base + 3)
637 cached_layout%grid_weights(row) = global_static(static_base + 4)
639 IF (ipt > feature_displs(pe_index) .AND. &
640 ipt <= feature_displs(pe_index) + nflat_local)
THEN
641 local_to_global(ipt - feature_displs(pe_index)) = row
645 DO k = bo(1, 3), bo(2, 3)
646 DO j = bo(1, 2), bo(2, 2)
647 DO i = bo(1, 1), bo(2, 1)
653 DO local_row = 1, nflat_local
655 int(local_to_global(local_row) - 1, kind=
int_8)
657 CALL timestop(phase_handle)
658 CALL timeset(
"skala_gpw_layout_chunk_routes", phase_handle)
659 CALL build_atom_chunk_routes(
cached_layout, local_to_global, pw_grid%para%group)
661 CALL timestop(phase_handle)
676 CALL timeset(
"skala_gpw_layout_tensors", phase_handle)
678 CALL timestop(phase_handle)
681 DEALLOCATE (atom_coords_pbc, atom_offset, atom_position, chunk_atom_begin, chunk_atom_end, &
682 feature_counts, feature_displs, global_owner, global_static, local_owner, &
683 local_static, local_to_global, static_counts, static_displs)
685 END SUBROUTINE rebuild_layout_cache
691 SUBROUTINE build_static_layout_tensors(cache)
692 TYPE(skala_gpw_layout_cache_type),
INTENT(INOUT) :: cache
694 cpassert(.NOT. cache%static_tensors_active)
707 cache%atomic_grid_size_bound_shape)
714 CALL torch_dict_insert(cache%static_inputs,
"grid_weights", cache%grid_weights_t)
716 cache%atomic_grid_weights_t)
718 cache%atomic_grid_sizes_t)
720 cache%atomic_grid_size_bound_shape_t)
721 cache%static_tensors_active = .true.
723 IF (cache%chunk_feature_count > 0)
THEN
724 cpassert(.NOT. cache%chunk_static_tensors_active)
730 cache%chunk_atomic_grid_weights)
733 cache%chunk_atomic_grid_sizes)
736 cache%chunk_coarse_0_atomic_coords)
739 cache%chunk_atomic_grid_size_bound_shape)
746 cache%chunk_grid_coords_t)
748 cache%chunk_grid_weights_t)
750 cache%chunk_atomic_grid_weights_t)
752 cache%chunk_atomic_grid_sizes_t)
753 CALL torch_dict_insert(cache%chunk_static_inputs,
"atomic_grid_size_bound_shape", &
754 cache%chunk_atomic_grid_size_bound_shape_t)
755 cache%chunk_static_tensors_active = .true.
758 END SUBROUTINE build_static_layout_tensors
765 SUBROUTINE copy_cached_layout(features, needs_coordinate_array)
767 LOGICAL,
INTENT(IN) :: needs_coordinate_array
771 ALLOCATE (features%feature_index(lbound(
cached_layout%feature_index, 1): &
779 features%feature_index(:, :, :) =
cached_layout%feature_index
783 features%chunk_feature_count =
cached_layout%chunk_feature_count
786 features%route_grad_return_recv_counts(
cached_layout%nproc), &
787 features%route_grad_return_recv_displs(
cached_layout%nproc), &
788 features%route_grad_return_send_counts(
cached_layout%nproc), &
789 features%route_grad_return_send_displs(
cached_layout%nproc), &
795 features%chunk_grad_counts(:) =
cached_layout%chunk_grad_counts
796 features%chunk_grad_displs(:) =
cached_layout%chunk_grad_displs
797 features%route_grad_return_recv_counts(:) =
cached_layout%route_grad_return_recv_counts
798 features%route_grad_return_recv_displs(:) =
cached_layout%route_grad_return_recv_displs
799 features%route_grad_return_send_counts(:) =
cached_layout%route_grad_return_send_counts
800 features%route_grad_return_send_displs(:) =
cached_layout%route_grad_return_send_displs
801 features%route_point_recv_counts(:) =
cached_layout%route_point_recv_counts
802 features%route_point_recv_displs(:) =
cached_layout%route_point_recv_displs
803 features%route_point_send_counts(:) =
cached_layout%route_point_send_counts
804 features%route_point_send_displs(:) =
cached_layout%route_point_send_displs
805 features%route_send_local_rows(:) =
cached_layout%route_send_local_rows
806 IF (needs_coordinate_array)
THEN
807 ALLOCATE (features%coarse_0_atomic_coords(3,
cached_layout%natom))
808 features%coarse_0_atomic_coords(:, :) =
cached_layout%coarse_0_atomic_coords
811 END SUBROUTINE copy_cached_layout
823 SUBROUTINE build_atom_chunks(atomic_grid_sizes, atom_offset, nproc, chunk_atom_begin, &
824 chunk_atom_end, chunk_feature_counts, chunk_feature_displs)
825 INTEGER(KIND=int_8),
DIMENSION(:),
INTENT(IN) :: atomic_grid_sizes
826 INTEGER,
DIMENSION(:),
INTENT(IN) :: atom_offset
827 INTEGER,
INTENT(IN) :: nproc
828 INTEGER,
DIMENSION(:),
INTENT(OUT) :: chunk_atom_begin, chunk_atom_end, &
829 chunk_feature_counts, &
832 INTEGER :: atoms_left, count, displ, end_atom, max_end_atom, natom, next_atom, next_count, &
833 pe, ranks_left, target_count, total_left
835 natom =
SIZE(atomic_grid_sizes)
836 chunk_atom_begin = natom + 1
837 chunk_atom_end = natom
838 chunk_feature_counts = 0
839 chunk_feature_displs = 0
844 chunk_feature_displs(pe) = displ
845 IF (next_atom > natom) cycle
847 ranks_left = nproc - pe + 1
848 atoms_left = natom - next_atom + 1
849 chunk_atom_begin(pe) = next_atom
850 IF (ranks_left >= atoms_left)
THEN
853 max_end_atom = natom - ranks_left + 1
854 total_left = atom_offset(natom + 1) - atom_offset(next_atom)
855 target_count = max(1, nint(real(total_left, kind=
dp)/real(ranks_left, kind=
dp)))
857 count = int(atomic_grid_sizes(end_atom))
858 DO WHILE (end_atom < max_end_atom)
859 next_count = count + int(atomic_grid_sizes(end_atom + 1))
860 IF (count >= target_count .AND. &
861 abs(count - target_count) <= abs(next_count - target_count))
EXIT
862 IF (count < target_count .OR. &
863 abs(next_count - target_count) < abs(count - target_count))
THEN
864 end_atom = end_atom + 1
872 chunk_atom_end(pe) = end_atom
873 chunk_feature_counts(pe) = atom_offset(end_atom + 1) - atom_offset(next_atom)
874 displ = displ + chunk_feature_counts(pe)
875 next_atom = end_atom + 1
878 cpassert(displ == atom_offset(natom + 1) - 1)
880 END SUBROUTINE build_atom_chunks
889 FUNCTION feature_row_chunk_owner(row, counts, displs)
RESULT(owner)
890 INTEGER,
INTENT(IN) :: row
891 INTEGER,
DIMENSION(:),
INTENT(IN) :: counts, displs
897 DO pe = 1,
SIZE(counts)
898 IF (row > displs(pe) .AND. row <= displs(pe) + counts(pe))
THEN
904 END FUNCTION feature_row_chunk_owner
911 SUBROUTINE counts_to_displs(counts, displs)
912 INTEGER,
DIMENSION(:),
INTENT(IN) :: counts
913 INTEGER,
DIMENSION(:),
INTENT(OUT) :: displs
918 DO pe = 2,
SIZE(counts)
919 displs(pe) = displs(pe - 1) + counts(pe - 1)
922 END SUBROUTINE counts_to_displs
930 SUBROUTINE build_atom_chunk_routes(cache, local_to_global, group)
931 TYPE(skala_gpw_layout_cache_type),
INTENT(INOUT) :: cache
932 INTEGER,
DIMENSION(:),
INTENT(IN) :: local_to_global
936 INTEGER :: dest, local_row, point_pos
937 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: cursor
939 ALLOCATE (cache%route_local_dest(
SIZE(local_to_global)), &
940 cache%route_send_local_rows(
SIZE(local_to_global)), &
941 cursor(
SIZE(cache%route_point_send_counts)))
942 cache%route_point_send_counts = 0
943 cache%route_send_local_rows = 0
944 DO local_row = 1,
SIZE(local_to_global)
945 dest = feature_row_chunk_owner(local_to_global(local_row), &
946 cache%chunk_feature_counts, &
947 cache%chunk_feature_displs)
949 cache%route_local_dest(local_row) = dest
950 cache%route_point_send_counts(dest) = cache%route_point_send_counts(dest) + 1
952 CALL counts_to_displs(cache%route_point_send_counts, cache%route_point_send_displs)
953 cursor(:) = cache%route_point_send_displs + 1
954 DO local_row = 1,
SIZE(local_to_global)
955 dest = cache%route_local_dest(local_row)
956 point_pos = cursor(dest)
957 cursor(dest) = cursor(dest) + 1
958 cache%route_send_local_rows(point_pos) = local_row
960 CALL group%alltoall(cache%route_point_send_counts, cache%route_point_recv_counts, 1)
961 CALL counts_to_displs(cache%route_point_recv_counts, cache%route_point_recv_displs)
963 cache%route_meta_send_counts(:) = 2*cache%route_point_send_counts
964 cache%route_meta_send_displs(:) = 2*cache%route_point_send_displs
965 cache%route_meta_recv_counts(:) = 2*cache%route_point_recv_counts
966 cache%route_meta_recv_displs(:) = 2*cache%route_point_recv_displs
967 cache%route_dynamic_send_counts(:) = ndynamic_per_point*cache%route_point_send_counts
968 cache%route_dynamic_send_displs(:) = ndynamic_per_point*cache%route_point_send_displs
969 cache%route_dynamic_recv_counts(:) = ndynamic_per_point*cache%route_point_recv_counts
970 cache%route_dynamic_recv_displs(:) = ndynamic_per_point*cache%route_point_recv_displs
971 cache%route_grad_return_send_counts(:) = ngrad_per_point*cache%route_point_recv_counts
972 cache%route_grad_return_send_displs(:) = ngrad_per_point*cache%route_point_recv_displs
973 cache%route_grad_return_recv_counts(:) = ngrad_per_point*cache%route_point_send_counts
974 cache%route_grad_return_recv_displs(:) = ngrad_per_point*cache%route_point_send_displs
976 cpassert(sum(cache%route_point_send_counts) ==
SIZE(local_to_global))
977 cpassert(sum(cache%route_point_recv_counts) == cache%chunk_feature_count)
978 cpassert(all(cache%route_send_local_rows > 0))
982 END SUBROUTINE build_atom_chunk_routes
988 SUBROUTINE build_atom_chunk_layout(cache)
989 TYPE(skala_gpw_layout_cache_type),
INTENT(INOUT) :: cache
991 INTEGER :: irow, max_grid_size, row_begin, row_end
993 IF (cache%chunk_feature_count <= 0 .OR. cache%chunk_natom <= 0)
RETURN
995 row_begin = cache%chunk_feature_begin
996 row_end = row_begin + cache%chunk_feature_count - 1
997 ALLOCATE (cache%chunk_grid_coords(3, cache%chunk_feature_count), &
998 cache%chunk_grid_weights(cache%chunk_feature_count), &
999 cache%chunk_atomic_grid_weights(cache%chunk_feature_count), &
1000 cache%chunk_atomic_grid_sizes(cache%chunk_natom), &
1001 cache%chunk_coarse_0_atomic_coords(3, cache%chunk_natom), &
1002 cache%chunk_feature_indices(cache%chunk_feature_count))
1003 cache%chunk_grid_coords(:, :) = cache%grid_coords(:, row_begin:row_end)
1004 cache%chunk_grid_weights(:) = cache%grid_weights(row_begin:row_end)
1005 cache%chunk_atomic_grid_weights(:) = cache%atomic_grid_weights(row_begin:row_end)
1006 cache%chunk_atomic_grid_sizes(:) = &
1007 cache%atomic_grid_sizes(cache%chunk_atom_begin:cache%chunk_atom_end)
1008 cache%chunk_coarse_0_atomic_coords(:, :) = &
1009 cache%coarse_0_atomic_coords(:, cache%chunk_atom_begin:cache%chunk_atom_end)
1011 max_grid_size = maxval(int(cache%chunk_atomic_grid_sizes))
1012 ALLOCATE (cache%chunk_atomic_grid_size_bound_shape(0, max_grid_size))
1013 cache%chunk_atomic_grid_size_bound_shape = 0_int_8
1014 DO irow = 1, cache%chunk_feature_count
1015 cache%chunk_feature_indices(irow) = int(irow - 1, kind=
int_8)
1018 END SUBROUTINE build_atom_chunk_layout
1026 SUBROUTINE route_atom_chunk_dynamics(features, local_dynamic, group)
1028 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: local_dynamic
1032 INTEGER :: chunk_row, dest, dyn_base, irow, local_row, &
1033 meta_base, nrecv, nsend, pe, point_pos, &
1035 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: cursor, recv_meta, send_meta
1036 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: recv_dynamic, send_dynamic
1044 ALLOCATE (send_meta(2*nsend), send_dynamic(ndynamic_per_point*nsend), &
1045 recv_meta(2*nrecv), recv_dynamic(ndynamic_per_point*nrecv), &
1048 send_dynamic = 0.0_dp
1050 DO local_row = 1, nsend
1052 point_pos = cursor(dest)
1053 cursor(dest) = cursor(dest) + 1
1054 meta_base = 2*(point_pos - 1)
1055 dyn_base = ndynamic_per_point*(point_pos - 1)
1056 src_base = ndynamic_per_point*(local_row - 1)
1057 send_meta(meta_base + 1) = int(
cached_layout%local_feature_indices(local_row) + 1_int_8)
1058 send_meta(meta_base + 2) = local_row
1059 send_dynamic(dyn_base + 1:dyn_base + ndynamic_per_point) = &
1060 local_dynamic(src_base + 1:src_base + ndynamic_per_point)
1063 CALL group%alltoall(send_meta,
cached_layout%route_meta_send_counts, &
1067 CALL group%alltoall(send_dynamic,
cached_layout%route_dynamic_send_counts, &
1072 ALLOCATE (features%chunk_density(
cached_layout%chunk_feature_count, 2), &
1073 features%chunk_grad(
cached_layout%chunk_feature_count, 3, 2), &
1075 features%chunk_return_positions(
cached_layout%chunk_feature_count), &
1076 features%chunk_return_ranks(
cached_layout%chunk_feature_count), &
1077 features%chunk_return_rows(
cached_layout%chunk_feature_count))
1078 features%chunk_density = 0.0_dp
1079 features%chunk_grad = 0.0_dp
1080 features%chunk_kin = 0.0_dp
1081 features%chunk_return_positions = 0
1082 features%chunk_return_ranks = 0
1083 features%chunk_return_rows = 0
1087 point_pos =
cached_layout%route_point_recv_displs(pe) + irow
1088 meta_base = 2*(point_pos - 1)
1089 dyn_base = ndynamic_per_point*(point_pos - 1)
1090 row = recv_meta(meta_base + 1)
1091 local_row = recv_meta(meta_base + 2)
1093 cpassert(chunk_row >= 1 .AND. chunk_row <=
cached_layout%chunk_feature_count)
1094 features%chunk_density(chunk_row, :) = recv_dynamic(dyn_base + 1:dyn_base + 2)
1095 features%chunk_grad(chunk_row, 1, 1) = recv_dynamic(dyn_base + 3)
1096 features%chunk_grad(chunk_row, 2, 1) = recv_dynamic(dyn_base + 4)
1097 features%chunk_grad(chunk_row, 3, 1) = recv_dynamic(dyn_base + 5)
1098 features%chunk_grad(chunk_row, 1, 2) = recv_dynamic(dyn_base + 6)
1099 features%chunk_grad(chunk_row, 2, 2) = recv_dynamic(dyn_base + 7)
1100 features%chunk_grad(chunk_row, 3, 2) = recv_dynamic(dyn_base + 8)
1101 features%chunk_kin(chunk_row, :) = recv_dynamic(dyn_base + 9:dyn_base + 10)
1102 features%chunk_return_positions(chunk_row) = point_pos
1103 features%chunk_return_ranks(chunk_row) = pe
1104 features%chunk_return_rows(chunk_row) = local_row
1107 cpassert(all(features%chunk_return_positions > 0))
1108 cpassert(all(features%chunk_return_ranks > 0))
1109 cpassert(all(features%chunk_return_rows > 0))
1111 DEALLOCATE (cursor, recv_dynamic, recv_meta, send_dynamic, send_meta)
1113 END SUBROUTINE route_atom_chunk_dynamics
1119 SUBROUTINE extract_atom_chunk_dynamics(features)
1122 INTEGER :: row_begin, row_end
1127 ALLOCATE (features%chunk_density(
cached_layout%chunk_feature_count, 2), &
1128 features%chunk_grad(
cached_layout%chunk_feature_count, 3, 2), &
1130 features%chunk_density(:, :) = features%density(row_begin:row_end, :)
1131 features%chunk_grad(:, :, :) = features%grad(row_begin:row_end, :, :)
1132 features%chunk_kin(:, :) = features%kin(row_begin:row_end, :)
1134 END SUBROUTINE extract_atom_chunk_dynamics
1143 SUBROUTINE weights_signature(weights, has_weights, weight_sum, weight_sumsq)
1145 LOGICAL,
INTENT(OUT) :: has_weights
1146 REAL(kind=
dp),
INTENT(OUT) :: weight_sum, weight_sumsq
1148 has_weights = .false.
1150 weight_sumsq = 0.0_dp
1151 IF (
PRESENT(weights))
THEN
1152 IF (
ASSOCIATED(weights))
THEN
1153 has_weights = .true.
1154 weight_sum = sum(weights%array)
1155 weight_sumsq = sum(weights%array*weights%array)
1159 END SUBROUTINE weights_signature
1165 SUBROUTINE release_layout_cache(cache)
1166 TYPE(skala_gpw_layout_cache_type),
INTENT(INOUT) :: cache
1168 IF (cache%dynamic_tensors_active)
THEN
1172 cache%dynamic_tensors_active = .false.
1175 IF (cache%chunk_dynamic_tensors_active)
THEN
1179 cache%chunk_dynamic_tensors_active = .false.
1182 IF (cache%static_tensors_active)
THEN
1191 cache%static_tensors_active = .false.
1194 IF (cache%chunk_static_tensors_active)
THEN
1203 cache%chunk_static_tensors_active = .false.
1206 IF (
ALLOCATED(cache%chunk_feature_counts))
DEALLOCATE (cache%chunk_feature_counts)
1207 IF (
ALLOCATED(cache%chunk_feature_displs))
DEALLOCATE (cache%chunk_feature_displs)
1208 IF (
ALLOCATED(cache%chunk_grad_counts))
DEALLOCATE (cache%chunk_grad_counts)
1209 IF (
ALLOCATED(cache%chunk_grad_displs))
DEALLOCATE (cache%chunk_grad_displs)
1210 IF (
ALLOCATED(cache%route_dynamic_recv_counts))
DEALLOCATE (cache%route_dynamic_recv_counts)
1211 IF (
ALLOCATED(cache%route_dynamic_recv_displs))
DEALLOCATE (cache%route_dynamic_recv_displs)
1212 IF (
ALLOCATED(cache%route_dynamic_send_counts))
DEALLOCATE (cache%route_dynamic_send_counts)
1213 IF (
ALLOCATED(cache%route_dynamic_send_displs))
DEALLOCATE (cache%route_dynamic_send_displs)
1214 IF (
ALLOCATED(cache%route_grad_return_recv_counts)) &
1215 DEALLOCATE (cache%route_grad_return_recv_counts)
1216 IF (
ALLOCATED(cache%route_grad_return_recv_displs)) &
1217 DEALLOCATE (cache%route_grad_return_recv_displs)
1218 IF (
ALLOCATED(cache%route_grad_return_send_counts)) &
1219 DEALLOCATE (cache%route_grad_return_send_counts)
1220 IF (
ALLOCATED(cache%route_grad_return_send_displs)) &
1221 DEALLOCATE (cache%route_grad_return_send_displs)
1222 IF (
ALLOCATED(cache%route_local_dest))
DEALLOCATE (cache%route_local_dest)
1223 IF (
ALLOCATED(cache%route_meta_recv_counts))
DEALLOCATE (cache%route_meta_recv_counts)
1224 IF (
ALLOCATED(cache%route_meta_recv_displs))
DEALLOCATE (cache%route_meta_recv_displs)
1225 IF (
ALLOCATED(cache%route_meta_send_counts))
DEALLOCATE (cache%route_meta_send_counts)
1226 IF (
ALLOCATED(cache%route_meta_send_displs))
DEALLOCATE (cache%route_meta_send_displs)
1227 IF (
ALLOCATED(cache%route_point_recv_counts))
DEALLOCATE (cache%route_point_recv_counts)
1228 IF (
ALLOCATED(cache%route_point_recv_displs))
DEALLOCATE (cache%route_point_recv_displs)
1229 IF (
ALLOCATED(cache%route_point_send_counts))
DEALLOCATE (cache%route_point_send_counts)
1230 IF (
ALLOCATED(cache%route_point_send_displs))
DEALLOCATE (cache%route_point_send_displs)
1231 IF (
ALLOCATED(cache%route_send_local_rows))
DEALLOCATE (cache%route_send_local_rows)
1232 IF (
ALLOCATED(cache%dynamic_counts))
DEALLOCATE (cache%dynamic_counts)
1233 IF (
ALLOCATED(cache%dynamic_displs))
DEALLOCATE (cache%dynamic_displs)
1234 IF (
ALLOCATED(cache%feature_counts))
DEALLOCATE (cache%feature_counts)
1235 IF (
ALLOCATED(cache%feature_displs))
DEALLOCATE (cache%feature_displs)
1236 IF (
ALLOCATED(cache%global_to_feature))
DEALLOCATE (cache%global_to_feature)
1237 IF (
ALLOCATED(cache%feature_index))
DEALLOCATE (cache%feature_index)
1238 IF (
ALLOCATED(cache%atomic_grid_sizes))
DEALLOCATE (cache%atomic_grid_sizes)
1239 IF (
ALLOCATED(cache%chunk_atomic_grid_sizes))
DEALLOCATE (cache%chunk_atomic_grid_sizes)
1240 IF (
ALLOCATED(cache%chunk_feature_indices))
DEALLOCATE (cache%chunk_feature_indices)
1241 IF (
ALLOCATED(cache%local_feature_indices))
DEALLOCATE (cache%local_feature_indices)
1242 IF (
ALLOCATED(cache%atomic_grid_size_bound_shape)) &
1243 DEALLOCATE (cache%atomic_grid_size_bound_shape)
1244 IF (
ALLOCATED(cache%chunk_atomic_grid_size_bound_shape)) &
1245 DEALLOCATE (cache%chunk_atomic_grid_size_bound_shape)
1246 IF (
ALLOCATED(cache%atomic_grid_weights))
DEALLOCATE (cache%atomic_grid_weights)
1247 IF (
ALLOCATED(cache%chunk_atomic_grid_weights))
DEALLOCATE (cache%chunk_atomic_grid_weights)
1248 IF (
ALLOCATED(cache%chunk_grid_weights))
DEALLOCATE (cache%chunk_grid_weights)
1249 IF (
ALLOCATED(cache%grid_weights))
DEALLOCATE (cache%grid_weights)
1250 IF (
ALLOCATED(cache%atom_coords))
DEALLOCATE (cache%atom_coords)
1251 IF (
ALLOCATED(cache%chunk_coarse_0_atomic_coords)) &
1252 DEALLOCATE (cache%chunk_coarse_0_atomic_coords)
1253 IF (
ALLOCATED(cache%coarse_0_atomic_coords))
DEALLOCATE (cache%coarse_0_atomic_coords)
1254 IF (
ALLOCATED(cache%chunk_grid_coords))
DEALLOCATE (cache%chunk_grid_coords)
1255 IF (
ALLOCATED(cache%grid_coords))
DEALLOCATE (cache%grid_coords)
1257 cache%chunk_atom_begin = 1
1258 cache%chunk_atom_end = 0
1259 cache%chunk_feature_begin = 1
1260 cache%chunk_feature_count = 0
1261 cache%chunk_natom = 0
1264 cache%nflat_local = 0
1270 cache%weight_sum = 0.0_dp
1271 cache%weight_sumsq = 0.0_dp
1272 cache%cell_hmat = 0.0_dp
1274 cache%active = .false.
1275 cache%has_weights = .false.
1276 cache%chunk_dynamic_tensors_active = .false.
1277 cache%chunk_static_tensors_active = .false.
1278 cache%dynamic_tensors_active = .false.
1279 cache%static_tensors_active = .false.
1281 END SUBROUTINE release_layout_cache
1290 IF (features%active)
THEN
1291 IF (features%owns_dynamic_tensors)
THEN
1296 IF (features%owns_static_tensors)
THEN
1303 IF (features%owns_static_tensors .OR. features%owns_coordinate_tensor)
THEN
1307 features%active = .false.
1308 features%owns_coordinate_tensor = .false.
1309 features%owns_dynamic_tensors = .true.
1310 features%owns_static_tensors = .true.
1311 features%uses_atom_chunk_routing = .false.
1312 features%uses_atom_chunks = .false.
1315 IF (
ALLOCATED(features%chunk_density))
DEALLOCATE (features%chunk_density)
1316 IF (
ALLOCATED(features%chunk_grad))
DEALLOCATE (features%chunk_grad)
1317 IF (
ALLOCATED(features%chunk_kin))
DEALLOCATE (features%chunk_kin)
1318 IF (
ALLOCATED(features%density))
DEALLOCATE (features%density)
1319 IF (
ALLOCATED(features%grad))
DEALLOCATE (features%grad)
1320 IF (
ALLOCATED(features%kin))
DEALLOCATE (features%kin)
1321 IF (
ALLOCATED(features%chunk_grad_counts))
DEALLOCATE (features%chunk_grad_counts)
1322 IF (
ALLOCATED(features%chunk_grad_displs))
DEALLOCATE (features%chunk_grad_displs)
1323 IF (
ALLOCATED(features%chunk_return_positions))
DEALLOCATE (features%chunk_return_positions)
1324 IF (
ALLOCATED(features%chunk_return_ranks))
DEALLOCATE (features%chunk_return_ranks)
1325 IF (
ALLOCATED(features%chunk_return_rows))
DEALLOCATE (features%chunk_return_rows)
1326 IF (
ALLOCATED(features%route_grad_return_recv_counts)) &
1327 DEALLOCATE (features%route_grad_return_recv_counts)
1328 IF (
ALLOCATED(features%route_grad_return_recv_displs)) &
1329 DEALLOCATE (features%route_grad_return_recv_displs)
1330 IF (
ALLOCATED(features%route_grad_return_send_counts)) &
1331 DEALLOCATE (features%route_grad_return_send_counts)
1332 IF (
ALLOCATED(features%route_grad_return_send_displs)) &
1333 DEALLOCATE (features%route_grad_return_send_displs)
1334 IF (
ALLOCATED(features%route_point_recv_counts)) &
1335 DEALLOCATE (features%route_point_recv_counts)
1336 IF (
ALLOCATED(features%route_point_recv_displs)) &
1337 DEALLOCATE (features%route_point_recv_displs)
1338 IF (
ALLOCATED(features%route_point_send_counts)) &
1339 DEALLOCATE (features%route_point_send_counts)
1340 IF (
ALLOCATED(features%route_point_send_displs)) &
1341 DEALLOCATE (features%route_point_send_displs)
1342 IF (
ALLOCATED(features%route_send_local_rows))
DEALLOCATE (features%route_send_local_rows)
1343 IF (
ALLOCATED(features%feature_index))
DEALLOCATE (features%feature_index)
1344 IF (
ALLOCATED(features%grid_coords))
DEALLOCATE (features%grid_coords)
1345 IF (
ALLOCATED(features%grid_weights))
DEALLOCATE (features%grid_weights)
1346 IF (
ALLOCATED(features%atomic_grid_weights))
DEALLOCATE (features%atomic_grid_weights)
1347 IF (
ALLOCATED(features%atomic_grid_sizes))
DEALLOCATE (features%atomic_grid_sizes)
1348 IF (
ALLOCATED(features%coarse_0_atomic_coords))
DEALLOCATE (features%coarse_0_atomic_coords)
1349 IF (
ALLOCATED(features%atomic_grid_size_bound_shape)) &
1350 DEALLOCATE (features%atomic_grid_size_bound_shape)
1351 features%chunk_feature_count = 0
1353 features%nflat_local = 0
1354 features%uses_atom_chunk_routing = .false.
1365 SUBROUTINE add_feature_tensors(features, requires_grad, requires_coordinate_grad, &
1368 LOGICAL,
INTENT(IN) :: requires_grad, requires_coordinate_grad, &
1372 features%owns_static_tensors = .false.
1373 features%owns_coordinate_tensor = .false.
1374 features%owns_dynamic_tensors = .false.
1375 IF (use_atom_chunks)
THEN
1379 features%grid_weights_t =
cached_layout%chunk_grid_weights_t
1380 features%atomic_grid_weights_t =
cached_layout%chunk_atomic_grid_weights_t
1381 features%atomic_grid_sizes_t =
cached_layout%chunk_atomic_grid_sizes_t
1382 features%atomic_grid_size_bound_shape_t = &
1384 features%local_feature_indices_t =
cached_layout%chunk_feature_indices_t
1387 features%chunk_density, requires_grad=requires_grad)
1391 requires_grad=requires_grad)
1395 requires_grad=requires_grad)
1403 features%atomic_grid_weights_t =
cached_layout%atomic_grid_weights_t
1404 features%atomic_grid_sizes_t =
cached_layout%atomic_grid_sizes_t
1405 features%atomic_grid_size_bound_shape_t =
cached_layout%atomic_grid_size_bound_shape_t
1406 features%local_feature_indices_t =
cached_layout%local_feature_indices_t
1409 requires_grad=requires_grad)
1413 requires_grad=requires_grad)
1417 requires_grad=requires_grad)
1423 IF (requires_coordinate_grad)
THEN
1424 cpassert(.NOT. use_atom_chunks)
1426 features%coarse_0_atomic_coords)
1429 features%coarse_0_atomic_coords_t)
1430 features%owns_coordinate_tensor = .true.
1432 IF (use_atom_chunks)
THEN
1433 features%coarse_0_atomic_coords_t =
cached_layout%chunk_coarse_0_atomic_coords_t
1437 features%coarse_0_atomic_coords_t =
cached_layout%coarse_0_atomic_coords_t
1443 END SUBROUTINE add_feature_tensors
1451 FUNCTION grid_coordinate(pw_grid, index)
RESULT(coord)
1453 INTEGER,
DIMENSION(3),
INTENT(IN) :: index
1454 REAL(kind=
dp),
DIMENSION(3) :: coord
1456 INTEGER,
DIMENSION(3) :: relative_index
1458 relative_index = index - pw_grid%bounds(1, :)
1459 coord = real(relative_index(1), kind=
dp)*pw_grid%dh(:, 1) + &
1460 REAL(relative_index(2), kind=
dp)*pw_grid%dh(:, 2) + &
1461 REAL(relative_index(3), kind=
dp)*pw_grid%dh(:, 3)
1463 END FUNCTION grid_coordinate
1472 FUNCTION nearest_image_coordinate(owner_coord, grid_point, cell)
RESULT(coord)
1473 REAL(kind=
dp),
DIMENSION(3),
INTENT(IN) :: owner_coord, grid_point
1475 REAL(kind=
dp),
DIMENSION(3) :: coord
1477 REAL(kind=
dp) :: dx, dy, dz
1479 IF (cell%orthorhombic)
THEN
1480 dx = grid_point(1) - owner_coord(1)
1481 dy = grid_point(2) - owner_coord(2)
1482 dz = grid_point(3) - owner_coord(3)
1483 dx = dx - cell%hmat(1, 1)*cell%perd(1)*anint(cell%h_inv(1, 1)*dx)
1484 dy = dy - cell%hmat(2, 2)*cell%perd(2)*anint(cell%h_inv(2, 2)*dy)
1485 dz = dz - cell%hmat(3, 3)*cell%perd(3)*anint(cell%h_inv(3, 3)*dz)
1486 coord = owner_coord + [dx, dy, dz]
1488 coord = owner_coord +
pbc(owner_coord, grid_point, cell)
1491 END FUNCTION nearest_image_coordinate
1500 FUNCTION nearest_atom(grid_point, atom_coords, cell)
RESULT(owner)
1501 REAL(kind=
dp),
DIMENSION(3),
INTENT(IN) :: grid_point
1502 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: atom_coords
1507 REAL(kind=
dp) :: best_r2, dx, dy, dz, r2
1508 REAL(kind=
dp),
DIMENSION(3) :: rij
1511 best_r2 = huge(1.0_dp)
1512 IF (cell%orthorhombic)
THEN
1513 DO iatom = 1,
SIZE(atom_coords, 2)
1514 dx = grid_point(1) - atom_coords(1, iatom)
1515 dy = grid_point(2) - atom_coords(2, iatom)
1516 dz = grid_point(3) - atom_coords(3, iatom)
1517 dx = dx - cell%hmat(1, 1)*cell%perd(1)*anint(cell%h_inv(1, 1)*dx)
1518 dy = dy - cell%hmat(2, 2)*cell%perd(2)*anint(cell%h_inv(2, 2)*dy)
1519 dz = dz - cell%hmat(3, 3)*cell%perd(3)*anint(cell%h_inv(3, 3)*dz)
1520 r2 = dx*dx + dy*dy + dz*dz
1521 IF (r2 < best_r2)
THEN
1527 DO iatom = 1,
SIZE(atom_coords, 2)
1528 rij =
pbc(grid_point, atom_coords(:, iatom), cell)
1530 IF (r2 < best_r2)
THEN
1537 END FUNCTION nearest_atom
Handles all functions related to the CELL.
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Interface to the message passing library MPI.
Define the data structure for the particle information.
Build SKALA TorchScript feature dictionaries from CP2K GPW real-space grids.
subroutine, public skala_gpw_feature_build(features, rho_set, rho_r, particle_set, cell, requires_grad, weights, requires_coordinate_grad, use_atom_chunks, route_atom_chunks)
Build a flat SKALA molecular feature dictionary from a local GPW grid.
subroutine, public skala_gpw_feature_release(features)
Release Torch objects and backing arrays owned by a feature bundle.
type(skala_gpw_layout_cache_type), save cached_layout
subroutine, public torch_dict_release(dict)
Releases a Torch dictionary and all its ressources.
subroutine, public torch_tensor_to_device_leaf(tensor, requires_grad)
Moves a tensor to the active Torch device and makes it an autograd leaf.
subroutine, public torch_dict_create(dict)
Creates an empty Torch dictionary.
subroutine, public torch_dict_insert(dict, key, tensor)
Inserts a Torch tensor into a Torch dictionary.
subroutine, public torch_dict_clone(source, target)
Clones a Torch dictionary.
subroutine, public torch_tensor_release(tensor)
Releases a Torch tensor and all its ressources.
subroutine, public xc_rho_set_get(rho_set, can_return_null, rho, drho, norm_drho, rhoa, rhob, norm_drhoa, norm_drhob, rho_1_3, rhoa_1_3, rhob_1_3, laplace_rho, laplace_rhoa, laplace_rhob, drhoa, drhob, rho_cutoff, drho_cutoff, tau_cutoff, tau, tau_a, tau_b, local_bounds)
returns the various attributes of rho_set
Type defining parameters related to the simulation cell.
represent a pointer to a contiguous 3d array
represent a density, with all the representation and data needed to perform a functional evaluation