(git:d18deda)
Loading...
Searching...
No Matches
smeagol_matrix_utils.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines to convert sparse matrices between DBCSR (distributed-blocks compressed sparse rows)
10!> and SIESTA (distributed compressed sparse columns) formats.
11!> \author Sergey Chulkov
12!> \author Christian Ahart
13!> \author Clotilde Cucinotta
14! **************************************************************************************************
16 USE cell_types, ONLY: cell_type, &
23 USE kinds, ONLY: dp, &
24 dp_size, &
25 int_8
30#if defined(__SMEAGOL)
31 USE parallel, ONLY: getnodeorbs, &
32 globaltolocalorb, &
33 localtoglobalorb, &
34 whichnodeorb
35#endif
44 USE qs_subsys_types, ONLY: qs_subsys_get, &
46 USE util, ONLY: sort
47#include "./base/base_uses.f90"
48
49 IMPLICIT NONE
50 PRIVATE
51
52 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'smeagol_matrix_utils'
53 LOGICAL, PARAMETER, PRIVATE :: debug_this_module = .false.
54
55 INTEGER, PARAMETER, PRIVATE :: neighbor_list_iatom_index = 1
56 INTEGER, PARAMETER, PRIVATE :: neighbor_list_jatom_index = 2
57 INTEGER, PARAMETER, PRIVATE :: neighbor_list_dbcsr_image_index = 3
58 INTEGER, PARAMETER, PRIVATE :: neighbor_list_siesta_image_index = 4
59 INTEGER, PARAMETER, PRIVATE :: neighbor_list_siesta_transp_image_index = 5
60 INTEGER, PARAMETER, PRIVATE :: neighbor_list_dim1 = neighbor_list_siesta_transp_image_index
61
65
66 PRIVATE :: get_negf_cell_ijk, index_in_canonical_enumeration, number_from_canonical_enumeration, pbc_0_1
67 PRIVATE :: get_number_of_mpi_sendrecv_requests, assign_nonzero_elements_to_requests
68
69 !> number of DBCSR matrix elements to receive from a given rank
70 INTEGER, PARAMETER, PRIVATE :: nelements_dbcsr_recv = 1
71 !> number of DBCSR matrix elements to send to a given rank
72 INTEGER, PARAMETER, PRIVATE :: nelements_dbcsr_send = 2
73 INTEGER, PARAMETER, PRIVATE :: nelements_dbcsr_dim2 = nelements_dbcsr_send
74
75 INTEGER(kind=int_8), PARAMETER, PRIVATE :: max_mpi_packet_size_bytes = 134217728 ! 128 MiB (to limit memory usage for matrix redistribution)
76 INTEGER(kind=int_8), PARAMETER, PRIVATE :: max_mpi_packet_size_dp = max_mpi_packet_size_bytes/int(dp_size, kind=int_8)
77
78 ! a portable way to determine the upper bound for tag value is to call
79 ! MPI_COMM_GET_ATTR(comm, MPI_TAG_UB, max_mpi_rank, flag, ierror).
80 ! The MPI specification guarantees a value of 32767
81 INTEGER, PARAMETER, PRIVATE :: max_mpi_rank = 32767
82
83! **************************************************************************************************
84!> \brief Sparsity pattern of replicated SIESTA compressed sparse column (CSC) matrices
85! **************************************************************************************************
87 !> gather all non-zero matrix elements on the given MPI process.
88 !> Distribute the elements across MPI processes if gather_root < 0.
89 !> The matrix elements should be located on I/O process in case of bulk transport,
90 !> and should be distributed in case of SMEAGOL calculation.
91 INTEGER :: gather_root = 0
92 !> Based of full (.FALSE.) or upper-triangular (.TRUE.) DBCSR matrix.
93 !> It is used in CPASSERTs to allow access to lower-triangular matrix elements of non-symmetric DBCSR matrices
94 !> In case there is no bugs in this module, these CPASSERTs should newer trigger.
95 !> Therefore the 'symmetric' variable alongside with relevant CPASSERT calls are excessive and can be removed.
96 LOGICAL :: symmetric = .true.
97 !> number of neighbour list nodes for each MPI process (0:num_pe-1).
98 !> If do_merge == .TRUE., nodes for different cell images along k cell vector are merged into one node
99 INTEGER, ALLOCATABLE, DIMENSION(:) :: nnodes_per_proc
100
101 !> replicated neighbour list (1:neighbor_list_dim1, 1:SUM(nnodes_per_proc)).
102 !> Neighbour list nodes are ordered according to their MPI ranks.
103 !> Thus, the first nnodes_per_proc(0) nodes are stored on MPI rank 0,
104 !> the next nnodes_per_proc(1) nodes reside on MPI rank 1, etc
105 !> Nodes for cell images along transport direction are merged into one node.
106 !> The number of non-zero DBCSR matrix blocks and their DBCSR cell image indices
107 !> are stored into 'n_dbcsr_cell_images_to_merge' and 'dbcsr_cell_image_to_merge' arrays
108 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: nl_repl
109
110 !> number of DBCSR images for each local merged neighbour list node (1:nnodes_per_proc(para_env%mepos))
111 INTEGER, ALLOCATABLE, DIMENSION(:) :: n_dbcsr_cell_images_to_merge
112 !> list of DBCSR image indices to merge; (1:SUM(n_dbcsr_cell_images_to_merge))
113 INTEGER, ALLOCATABLE, DIMENSION(:) :: dbcsr_cell_image_to_merge
114
115 !> number of DBCSR non-zero matrix elements that should be received/sent from each MPI rank
116 !> (0:num_pe-1, 1:nelements_dbcsr_dim2)
117 INTEGER(kind=int_8), ALLOCATABLE, DIMENSION(:, :) :: nelements_per_proc
118
119 !> number of non-zero matrix elements local to this MPI rank.
120 INTEGER(kind=int_8) :: n_nonzero_elements = 0_int_8
121 INTEGER :: nrows = 0, ncols = 0
122 !> Number of non-zero matrix elements (columns) on each row
123 !> n_nonzero_cols(1:nrows); same as 'numh' in SMEAGOL code.
124 INTEGER, ALLOCATABLE, DIMENSION(:) :: n_nonzero_cols
125 !> offset of the first non-zero matrix elements on each row.
126 !> column_offset(1:nrows); same as 'listhptr' in SMEAGOL code.
127 !> It should be declared as INTEGER(kind=int_8), but SMEAGOL expects it to be INTEGER
128 INTEGER, ALLOCATABLE, DIMENSION(:) :: row_offset
129 !> column index of each non-zero matrix element.
130 !> col_index(1:n_nonzero_elements); same as 'listh' in SMEAGOL code
131 !> col index of the first non-zero matrix elements of irow row is col_index(row_offset(irow)+1)
132 INTEGER, ALLOCATABLE, DIMENSION(:) :: col_index
133 !> index of the non-zero matrix element in a communication buffer for each rank
134 INTEGER, ALLOCATABLE, DIMENSION(:) :: packed_index
135 !> R_atom_row - R_atom_col
136 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: xij
137 !> equivalent atomic orbitals
138 INTEGER, ALLOCATABLE, DIMENSION(:) :: indxuo
139 !> atomic index on which the orbital is centred
140 INTEGER, ALLOCATABLE, DIMENSION(:) :: iaorb
141 !> coordinates of all atoms in the supercell
142 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: xa
144
145CONTAINS
146
147! **************************************************************************************************
148!> \brief Map non-zero matrix blocks between sparse matrices in DBCSR and SIESTA formats.
149!> \param siesta_struct structure that stores metadata (sparsity pattern) of sparse SIESTA matrices
150!> \param matrix_dbcsr_kp DBCSR matrices for each cell image
151!> \param subsys QuickStep molecular system
152!> \param cell_to_index array to convert 3-D cell indices to 1-D DBCSR image indices
153!> \param sab_nl pair-wise neighbour list
154!> \param para_env MPI parallel environment
155!> \param max_ij_cell_image largest index of cell images along i and j cell vectors (e.g. (2,0) in
156!> case of 5 cell images (0,0), (1,0), (-1,0), (2,0), and (-2,0))
157!> \param do_merge merge DBCSR images along transport direction (k cell vector)
158!> \param gather_root distribute non-zero matrix elements of SIESTA matrices across all
159!> parallel processes (-1), or gather them on the given MPI rank (>= 0).
160! **************************************************************************************************
161 SUBROUTINE siesta_struct_create(siesta_struct, matrix_dbcsr_kp, subsys, cell_to_index, &
162 sab_nl, para_env, max_ij_cell_image, do_merge, gather_root)
164 INTENT(inout) :: siesta_struct
165 TYPE(dbcsr_p_type), DIMENSION(:), INTENT(in) :: matrix_dbcsr_kp
166 TYPE(qs_subsys_type), POINTER :: subsys
167 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
168 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
169 POINTER :: sab_nl
170 TYPE(mp_para_env_type), POINTER :: para_env
171 INTEGER, DIMENSION(2), INTENT(inout) :: max_ij_cell_image
172 LOGICAL, INTENT(in) :: do_merge
173 INTEGER, INTENT(in) :: gather_root
174
175 CHARACTER(len=*), PARAMETER :: routinen = 'siesta_struct_create'
176
177 CHARACTER(len=20) :: str_nelem, str_nelem_max
178 INTEGER :: handle, iatom, icol, icol_blk, icol_local, image, image_j, image_k, irow, &
179 irow_local, natoms, ncells_siesta_total, ncols_blk, ncols_total, nrows_local, &
180 nrows_total, offset
181 INTEGER(kind=int_8) :: n_nonzero_elements_local
182 INTEGER, DIMENSION(3) :: max_ijk_cell_image, ncells_siesta
183 INTEGER, DIMENSION(:), POINTER :: col_blk_offset, col_blk_size
184 LOGICAL :: do_distribute, is_root_rank
185 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: particle_coords
186 REAL(kind=dp), DIMENSION(3) :: real_cell_shift, scaled_cell_shift
187 TYPE(cell_type), POINTER :: cell
188 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
189
190 CALL timeset(routinen, handle)
191 do_distribute = gather_root < 0
192 is_root_rank = gather_root == para_env%mepos
193
194 ! here row_blk_offset / col_blk_offset are global indices of the first row / column of a given non-zero block.
195 ! They are not offsets (index-1) but the actual indices.
196 CALL dbcsr_get_info(matrix=matrix_dbcsr_kp(1)%matrix, &
197 nfullrows_total=nrows_total, nfullcols_total=ncols_total, &
198 nblkcols_total=ncols_blk, col_blk_size=col_blk_size, col_blk_offset=col_blk_offset)
199 IF (debug_this_module) THEN
200 cpassert(nrows_total == ncols_total)
201 cpassert(gather_root < para_env%num_pe)
202 END IF
203
204 siesta_struct%gather_root = gather_root
205 CALL get_neighbor_list_set_p(neighbor_list_sets=sab_nl, symmetric=siesta_struct%symmetric)
206
207 ! apply periodic boundary conditions to atomic coordinates
208 CALL qs_subsys_get(subsys, cell=cell, particle_set=particle_set, nparticle=natoms)
209 ALLOCATE (particle_coords(3, natoms))
210 DO iatom = 1, natoms
211 CALL pbc_0_1(particle_coords(1:3, iatom), particle_set(iatom)%r(1:3), cell)
212 END DO
213
214 ! Note: in case we would like to limit the number of cell images along transport direction (k cell vector)
215 ! by enabling 'BulkTransvCellSizeZ' keyword, we need to pass max_ijk_cell_image(1:3) vector
216 ! to the subroutine instead of the reduced vector max_ij_cell_image(1:2)
217 max_ijk_cell_image(1:2) = max_ij_cell_image(1:2)
218
219 ! determine the actual number of cell images along k cell vector. Unless the third element is also passed
220 ! via subroutine arguments, an extra MPI_Allreduce operation is needed each time we call
221 ! replicate_neighbour_list() / get_nnodes_local().
222 max_ijk_cell_image(3) = -1
223 IF (.NOT. do_merge) max_ijk_cell_image(3) = 1 ! bulk-transport calculation expects exactly 3 cell images along transport direction
224
225 ! replicate pair-wise neighbour list. Identical non-zero matrix blocks from cell image along transport direction
226 ! are grouped together if do_merge == .TRUE.
227 ALLOCATE (siesta_struct%nnodes_per_proc(0:para_env%num_pe - 1))
228 CALL replicate_neighbour_list(siesta_struct%nl_repl, &
229 siesta_struct%n_dbcsr_cell_images_to_merge, &
230 siesta_struct%dbcsr_cell_image_to_merge, &
231 siesta_struct%nnodes_per_proc, &
232 max_ijk_cell_image, sab_nl, para_env, particle_coords, cell, cell_to_index, do_merge)
233 max_ij_cell_image(1:2) = max_ijk_cell_image(1:2)
234
235 ! count number of non-zero matrix elements that need to be send to and received from other parallel processes
236 ALLOCATE (siesta_struct%nelements_per_proc(0:para_env%num_pe - 1, nelements_dbcsr_dim2))
237 CALL count_remote_dbcsr_elements(siesta_struct%nelements_per_proc, siesta_struct%nnodes_per_proc, &
238 siesta_struct%nl_repl, matrix_dbcsr_kp, siesta_struct%symmetric, para_env, gather_root)
239
240 ! number of SIESTA non-zero matrix elements that are going to be stored on this parallel process
241 n_nonzero_elements_local = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv))
242 siesta_struct%n_nonzero_elements = n_nonzero_elements_local
243
244 ! as SMEAGOL uses 32-bits integers, the number of non-zero matrix elements is limited by 2^31 per MPI rank.
245 ! Abort CP2K if we are about to exceed this limit.
246 IF (n_nonzero_elements_local > int(huge(0), kind=int_8)) THEN
247 WRITE (str_nelem, '(I0)') n_nonzero_elements_local
248 WRITE (str_nelem_max, '(I0)') huge(0)
249 CALL cp_abort(__location__, &
250 "The number of non-zero matrix elements per MPI process "//trim(str_nelem)// &
251 " cannot exceed "//trim(str_nelem_max)// &
252 ". Please increase the number of MPI processes to satisfy this SMEAGOL limitation.")
253 END IF
254
255 ! in case there is no nonzero matrix element stored on this process, allocate arrays with one element to avoid SEGFAULT
256 IF (n_nonzero_elements_local == 0) n_nonzero_elements_local = 1
257
258 ! number of SIESTA-matrix rows local to the given parallel process
259 IF (do_distribute) THEN
260#if defined(__SMEAGOL)
261 CALL getnodeorbs(nrows_total, para_env%mepos, para_env%num_pe, nrows_local)
262#else
263 CALL cp_abort(__location__, &
264 "CP2K was compiled with no SMEAGOL support.")
265#endif
266 ELSE
267 IF (is_root_rank) THEN
268 nrows_local = nrows_total
269 ELSE
270 nrows_local = 0
271 END IF
272 END IF
273
274 ! number of cell images along each cell vector. It is 2*m+1, as SIESTA images are ordered as 0, 1, -1, ..., m, -m
275 ncells_siesta(1:3) = 2*max_ijk_cell_image(1:3) + 1
276 ! in case of merged cell images along the transport direction, there will be just 1 'merged' cell image along it
277 IF (do_merge) ncells_siesta(3) = 1
278
279 ncells_siesta_total = ncells_siesta(1)*ncells_siesta(2)*ncells_siesta(3)
280
281 ! number of rows local to the given parallel process. Rows are distributed in a block-cyclic manner
282 siesta_struct%nrows = nrows_local
283
284 ! number of columns of the matrix in its dense form. SIESTA uses 1-D (rows) block-cyclic distribution.
285 ! All non-zero matrix elements on a given row are stored on the same parallel process
286 siesta_struct%ncols = nrows_total*ncells_siesta_total
287
288 ! allocate at least one array element to avoid SIGFAULT when passing unallocated arrays to subroutines
289 IF (nrows_local == 0) nrows_local = 1
290
291 ALLOCATE (siesta_struct%n_nonzero_cols(nrows_local))
292 ALLOCATE (siesta_struct%row_offset(nrows_local))
293 ALLOCATE (siesta_struct%col_index(n_nonzero_elements_local))
294 ALLOCATE (siesta_struct%packed_index(n_nonzero_elements_local))
295
296 ! restore the actual number of local rows
297 nrows_local = siesta_struct%nrows
298
299 ! get number of non-zero matrix element on each local row (n_nonzero_cols),
300 ! offset of the first non-zero matrix element for each local row (row_offset),
301 ! global column indices of all local non-zero matrix elements (col_index), and
302 ! the indices of all local non-zero matrix elements in the communication buffer (packed_index)
303 CALL get_nonzero_element_indices(siesta_struct%n_nonzero_cols, siesta_struct%row_offset, &
304 siesta_struct%col_index, siesta_struct%packed_index, &
305 siesta_struct%nl_repl, matrix_dbcsr_kp, &
306 siesta_struct%symmetric, para_env, gather_root)
307
308 ! indices of equivalent atomic orbitals
309 ALLOCATE (siesta_struct%indxuo(siesta_struct%ncols))
310 DO icol = 1, ncols_total
311 siesta_struct%indxuo(icol) = icol
312 END DO
313 DO image = 2, ncells_siesta_total
314 siesta_struct%indxuo((image - 1)*ncols_total + 1:image*ncols_total) = siesta_struct%indxuo(1:ncols_total)
315 END DO
316
317 ! particle index on which the orbital is centred
318 ALLOCATE (siesta_struct%iaorb(siesta_struct%ncols))
319 DO icol_blk = 1, ncols_blk
320 ! col_blk_offset() is not an offset but the index of the first atomic orbital in the column block
321 siesta_struct%iaorb(col_blk_offset(icol_blk):col_blk_offset(icol_blk) + col_blk_size(icol_blk) - 1) = icol_blk
322 END DO
323 DO image = 2, ncells_siesta_total
324 siesta_struct%iaorb((image - 1)*ncols_total + 1:image*ncols_total) = siesta_struct%iaorb(1:ncols_total) + (image - 1)*natoms
325 END DO
326
327 ! coordinates of all particles in each cell images
328 ALLOCATE (siesta_struct%xa(3, natoms*ncells_siesta_total))
329 DO image_k = 1, ncells_siesta(3)
330 !icell_siesta(3) = image_k
331 scaled_cell_shift(3) = real(number_from_canonical_enumeration(image_k), kind=dp) ! SIESTA -> actual cell index
332 DO image_j = 1, ncells_siesta(2)
333 scaled_cell_shift(2) = real(number_from_canonical_enumeration(image_j), kind=dp)
334 DO image = 1, ncells_siesta(1)
335 scaled_cell_shift(1) = real(number_from_canonical_enumeration(image), kind=dp)
336 CALL scaled_to_real(real_cell_shift, scaled_cell_shift, cell)
337 offset = (((image_k - 1)*ncells_siesta(2) + image_j - 1)*ncells_siesta(1) + image - 1)*natoms
338 DO iatom = 1, natoms
339 siesta_struct%xa(1:3, offset + iatom) = particle_set(iatom)%r(1:3) + real_cell_shift(1:3)
340 END DO
341 END DO
342 END DO
343 END DO
344
345 ! inter-atomic distance
346 ALLOCATE (siesta_struct%xij(3, n_nonzero_elements_local))
347 DO irow_local = 1, nrows_local
348 IF (do_distribute) THEN
349#if defined(__SMEAGOL)
350 CALL localtoglobalorb(irow_local, para_env%mepos, para_env%num_pe, irow)
351#else
352 CALL cp_abort(__location__, &
353 "CP2K was compiled with no SMEAGOL support.")
354#endif
355 ELSE
356 irow = irow_local
357 IF (debug_this_module) THEN
358 cpassert(is_root_rank)
359 END IF
360 END IF
361 offset = siesta_struct%row_offset(irow_local)
362 DO icol_local = offset + 1, offset + siesta_struct%n_nonzero_cols(irow_local)
363 icol = siesta_struct%col_index(icol_local)
364 siesta_struct%xij(1:3, icol_local) = siesta_struct%xa(1:3, siesta_struct%iaorb(icol)) - &
365 siesta_struct%xa(1:3, siesta_struct%iaorb(irow))
366 END DO
367 END DO
368
369 DEALLOCATE (particle_coords)
370
371 CALL timestop(handle)
372 END SUBROUTINE siesta_struct_create
373
374! **************************************************************************************************
375!> \brief Release a SIESTA matrix structure
376!> \param siesta_struct structure to release
377! **************************************************************************************************
378 SUBROUTINE siesta_struct_release(siesta_struct)
380 INTENT(inout) :: siesta_struct
381
382 CHARACTER(len=*), PARAMETER :: routinen = 'siesta_struct_release'
383
384 INTEGER :: handle
385
386 CALL timeset(routinen, handle)
387
388 siesta_struct%gather_root = -1
389
390 IF (ALLOCATED(siesta_struct%nnodes_per_proc)) DEALLOCATE (siesta_struct%nnodes_per_proc)
391 IF (ALLOCATED(siesta_struct%nl_repl)) DEALLOCATE (siesta_struct%nl_repl)
392 IF (ALLOCATED(siesta_struct%n_dbcsr_cell_images_to_merge)) DEALLOCATE (siesta_struct%n_dbcsr_cell_images_to_merge)
393 IF (ALLOCATED(siesta_struct%dbcsr_cell_image_to_merge)) DEALLOCATE (siesta_struct%dbcsr_cell_image_to_merge)
394 IF (ALLOCATED(siesta_struct%nelements_per_proc)) DEALLOCATE (siesta_struct%nelements_per_proc)
395
396 siesta_struct%n_nonzero_elements = 0
397 siesta_struct%nrows = 0
398 siesta_struct%ncols = 0
399
400 IF (ALLOCATED(siesta_struct%n_nonzero_cols)) DEALLOCATE (siesta_struct%n_nonzero_cols)
401 IF (ALLOCATED(siesta_struct%row_offset)) DEALLOCATE (siesta_struct%row_offset)
402 IF (ALLOCATED(siesta_struct%col_index)) DEALLOCATE (siesta_struct%col_index)
403 IF (ALLOCATED(siesta_struct%packed_index)) DEALLOCATE (siesta_struct%packed_index)
404
405 IF (ALLOCATED(siesta_struct%xij)) DEALLOCATE (siesta_struct%xij)
406 IF (ALLOCATED(siesta_struct%indxuo)) DEALLOCATE (siesta_struct%indxuo)
407 IF (ALLOCATED(siesta_struct%iaorb)) DEALLOCATE (siesta_struct%iaorb)
408 IF (ALLOCATED(siesta_struct%xa)) DEALLOCATE (siesta_struct%xa)
409
410 CALL timestop(handle)
411 END SUBROUTINE siesta_struct_release
412
413! **************************************************************************************************
414!> \brief Convert matrix from DBCSR to sparse SIESTA format.
415!> \param matrix_siesta matrix in SIESTA format [out]
416!> \param matrix_dbcsr_kp DBCSR matrix [in]
417!> \param siesta_struct structure to map matrix blocks between formats
418!> \param para_env MPI parallel environment
419! **************************************************************************************************
420 SUBROUTINE convert_dbcsr_to_distributed_siesta(matrix_siesta, matrix_dbcsr_kp, siesta_struct, para_env)
421 REAL(kind=dp), DIMENSION(:), INTENT(out) :: matrix_siesta
422 TYPE(dbcsr_p_type), DIMENSION(:), INTENT(in) :: matrix_dbcsr_kp
423 TYPE(siesta_distrib_csc_struct_type), INTENT(in) :: siesta_struct
424 TYPE(mp_para_env_type), INTENT(in), POINTER :: para_env
425
426 CHARACTER(len=*), PARAMETER :: routinen = 'convert_dbcsr_to_distributed_siesta'
427
428 INTEGER :: first_col_minus_one, first_row_minus_one, handle, icol_blk, icol_local, &
429 image_dbcsr, image_ind, image_ind_offset, image_siesta, image_siesta_transp, inode, &
430 inode_proc, iproc, irequest, irow_blk, irow_local, irow_proc, mepos, n_image_ind, &
431 ncols_blk, ncols_local, nnodes_proc, node_offset, nprocs, nrequests_recv, &
432 nrequests_total, nrows_blk, nrows_local
433 INTEGER(kind=int_8) :: n_nonzero_elements_dbcsr, &
434 n_nonzero_elements_siesta, &
435 offset_recv_mepos, offset_send_mepos
436 INTEGER(kind=int_8), ALLOCATABLE, DIMENSION(:) :: n_packed_elements_per_proc, &
437 nelements_per_request, &
438 offset_per_proc, offset_per_request
439 INTEGER, ALLOCATABLE, DIMENSION(:) :: next_nonzero_element_offset, peer_rank, &
440 request_tag
441 INTEGER, DIMENSION(:), POINTER :: col_blk_offset, col_blk_size, &
442 row_blk_offset, row_blk_size
443 LOGICAL :: do_distribute, found, is_root_rank, &
444 symmetric
445 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buffer, reorder_recv_buffer, &
446 send_buffer
447 REAL(kind=dp), DIMENSION(:, :), POINTER :: sm_block, sm_block_merged
448 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: requests
449
450 CALL timeset(routinen, handle)
451 matrix_siesta(:) = 0.0_dp
452
453 mepos = para_env%mepos
454 nprocs = para_env%num_pe
455 do_distribute = siesta_struct%gather_root < 0
456 is_root_rank = siesta_struct%gather_root == mepos
457
458 CALL dbcsr_get_info(matrix=matrix_dbcsr_kp(1)%matrix, &
459 nblkrows_total=nrows_blk, nblkcols_total=ncols_blk, &
460 row_blk_size=row_blk_size, col_blk_size=col_blk_size, &
461 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset)
462 symmetric = siesta_struct%symmetric
463
464 ! number of locally stored SIESTA non-zero matrix elements
465 n_nonzero_elements_siesta = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv))
466 ! number of locally stored DBCSR non-zero matrix elements
467 n_nonzero_elements_dbcsr = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_send))
468
469 ! number of concurrent MPI isend / irecv operations
470 nrequests_recv = get_number_of_mpi_sendrecv_requests(mepos, siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv), &
471 max_mpi_packet_size_dp)
472 nrequests_total = get_number_of_mpi_sendrecv_requests(mepos, siesta_struct%nelements_per_proc(:, nelements_dbcsr_send), &
473 max_mpi_packet_size_dp) + nrequests_recv
474
475 IF (nrequests_total > 0) THEN
476 ! allocate MPI-related arrays. request_tag is not actually needed, as MPI standard guarantees the order of
477 ! peer-to-peer messages with the same tag between same processes
478 ALLOCATE (requests(nrequests_total))
479 ALLOCATE (peer_rank(nrequests_total), request_tag(nrequests_total))
480 ALLOCATE (offset_per_request(nrequests_total), nelements_per_request(nrequests_total))
481 !requests(:) = mp_request_null
482
483 ! split large messages into a number of smaller messages. It is not really needed
484 ! unless we are going to send > 2^31 matrix elements per MPI request
485 IF (nrequests_recv > 0) THEN
486 CALL assign_nonzero_elements_to_requests(offset_per_request(1:nrequests_recv), &
487 nelements_per_request(1:nrequests_recv), &
488 peer_rank(1:nrequests_recv), &
489 request_tag(1:nrequests_recv), &
490 mepos, &
491 siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv), &
492 max_mpi_packet_size_dp)
493 END IF
494 IF (nrequests_total > nrequests_recv) THEN
495 CALL assign_nonzero_elements_to_requests(offset_per_request(nrequests_recv + 1:nrequests_total), &
496 nelements_per_request(nrequests_recv + 1:nrequests_total), &
497 peer_rank(nrequests_recv + 1:nrequests_total), &
498 request_tag(nrequests_recv + 1:nrequests_total), &
499 mepos, &
500 siesta_struct%nelements_per_proc(:, nelements_dbcsr_send), &
501 max_mpi_packet_size_dp)
502 END IF
503 END IF
504
505 ! point-to-point recv/send can be replaced with alltoallv, if data to distribute per rank is < 2^31 elements (should be OK due to SMEAGOL limitation)
506 ! in principle, it is possible to overcome this limit by using derived datatypes (which will require additional wrapper functions, indeed).
507 !
508 ! pre-post non-blocking receive operations
509 IF (n_nonzero_elements_siesta > 0) THEN
510 ALLOCATE (recv_buffer(n_nonzero_elements_siesta))
511 END IF
512 DO irequest = 1, nrequests_recv
513 CALL para_env%irecv(recv_buffer(offset_per_request(irequest) + 1: &
514 offset_per_request(irequest) + nelements_per_request(irequest)), &
515 peer_rank(irequest), requests(irequest), request_tag(irequest))
516 END DO
517
518 ! pack local DBCSR non-zero matrix elements ordering by their target parallel process
519 ALLOCATE (offset_per_proc(0:nprocs - 1), n_packed_elements_per_proc(0:nprocs - 1))
520 offset_per_proc(0) = 0
521 DO iproc = 1, nprocs - 1
522 offset_per_proc(iproc) = offset_per_proc(iproc - 1) + siesta_struct%nelements_per_proc(iproc - 1, nelements_dbcsr_send)
523 END DO
524 n_packed_elements_per_proc(:) = 0
525
526 ! number of local neighbour-list nodes and offset of the first local neighbour-list node
527 nnodes_proc = siesta_struct%nnodes_per_proc(mepos)
528 !node_offset = SUM(siesta_struct%nnodes_per_proc(0:mepos)) - siesta_struct%nnodes_per_proc(mepos)
529 node_offset = sum(siesta_struct%nnodes_per_proc(0:mepos)) - nnodes_proc
530
531 ! if do_distribute == .FALSE., send all matrix elements to MPI process with rank gather_root
532 ! in case of do_distribute == .TRUE., iproc is determined by calling WhichNodeOrb()
533 iproc = siesta_struct%gather_root
534
535 IF (n_nonzero_elements_dbcsr > 0) THEN
536 ALLOCATE (send_buffer(n_nonzero_elements_dbcsr))
537 send_buffer(:) = 0.0_dp
538
539 ! iterate over locally-stored DBCSR matrix blocks.
540 ! inode_proc is the target parallel process (where data are going to be sent)
541 image_ind_offset = 0
542 DO inode_proc = 1, nnodes_proc
543 n_image_ind = siesta_struct%n_dbcsr_cell_images_to_merge(inode_proc)
544 IF (n_image_ind > 0) THEN
545 inode = node_offset + inode_proc
546
547 irow_blk = siesta_struct%nl_repl(neighbor_list_iatom_index, inode)
548 icol_blk = siesta_struct%nl_repl(neighbor_list_jatom_index, inode)
549 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
550 image_siesta = siesta_struct%nl_repl(neighbor_list_siesta_image_index, inode)
551 image_siesta_transp = siesta_struct%nl_repl(neighbor_list_siesta_transp_image_index, inode)
552
553 nrows_local = row_blk_size(irow_blk)
554 ncols_local = col_blk_size(icol_blk)
555 first_row_minus_one = row_blk_offset(irow_blk) - 1
556 first_col_minus_one = col_blk_offset(icol_blk) - 1
557
558 ! merging cell images along transport direction
559 IF (n_image_ind == 1) THEN
560 ! the most common case. Nothing to merge, so there is no need to allocate memory for a merged block
561 image_dbcsr = siesta_struct%dbcsr_cell_image_to_merge(image_ind_offset + 1)
562 CALL dbcsr_get_block_p(matrix=matrix_dbcsr_kp(image_dbcsr)%matrix, &
563 row=irow_blk, col=icol_blk, block=sm_block_merged, found=found)
564 cpassert(found)
565 ELSE ! n_image_ind > 1
566 ALLOCATE (sm_block_merged(nrows_local, ncols_local))
567
568 DO image_ind = 1, n_image_ind
569 image_dbcsr = siesta_struct%dbcsr_cell_image_to_merge(image_ind + image_ind_offset)
570
571 CALL dbcsr_get_block_p(matrix=matrix_dbcsr_kp(image_dbcsr)%matrix, &
572 row=irow_blk, col=icol_blk, block=sm_block, found=found)
573 cpassert(found)
574 sm_block_merged(1:nrows_local, 1:ncols_local) = sm_block_merged(1:nrows_local, 1:ncols_local) + &
575 sm_block(1:nrows_local, 1:ncols_local)
576 END DO
577 END IF
578
579 ! pack matrix elements for the 'normal' SIESTA matrix block
580 IF (image_siesta > 0) THEN
581 DO irow_local = 1, nrows_local
582 IF (do_distribute) THEN
583#if defined(__SMEAGOL)
584 CALL whichnodeorb(irow_local + first_row_minus_one, nprocs, iproc)
585#else
586 CALL cp_abort(__location__, &
587 "CP2K was compiled with no SMEAGOL support.")
588#endif
589 END IF
590
591 ! CPASSERT
592 IF (debug_this_module) THEN
593 cpassert(iproc >= 0 .AND. iproc < nprocs)
594 IF (n_packed_elements_per_proc(iproc) + ncols_local > &
595 siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send)) THEN
596 CALL cp__a(__short_file__, __line__)
597 END IF
598 END IF
599
600 offset_send_mepos = offset_per_proc(iproc) + n_packed_elements_per_proc(iproc)
601 send_buffer(offset_send_mepos + 1:offset_send_mepos + ncols_local) = sm_block_merged(irow_local, 1:ncols_local)
602
603 n_packed_elements_per_proc(iproc) = n_packed_elements_per_proc(iproc) + ncols_local
604 END DO
605 END IF
606
607 ! pack matrix elements of the transposed SIESTA matrix block
608 IF (image_siesta_transp > 0) THEN
609 DO icol_local = 1, ncols_local
610 IF (do_distribute) THEN
611#if defined(__SMEAGOL)
612 CALL whichnodeorb(icol_local + first_col_minus_one, nprocs, iproc) ! iproc_orb
613#else
614 CALL cp_abort(__location__, &
615 "CP2K was compiled with no SMEAGOL support.")
616#endif
617 END IF
618
619 ! CPASSERT
620 IF (debug_this_module) THEN
621 cpassert(iproc >= 0 .AND. iproc < nprocs)
622 IF (n_packed_elements_per_proc(iproc) + nrows_local > &
623 siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send)) THEN
624 CALL cp__a(__short_file__, __line__)
625 END IF
626 END IF
627
628 offset_send_mepos = offset_per_proc(iproc) + n_packed_elements_per_proc(iproc)
629 send_buffer(offset_send_mepos + 1:offset_send_mepos + nrows_local) = sm_block_merged(1:nrows_local, icol_local)
630
631 n_packed_elements_per_proc(iproc) = n_packed_elements_per_proc(iproc) + nrows_local
632 END DO
633 END IF
634
635 IF (n_image_ind > 1) THEN
636 DEALLOCATE (sm_block_merged)
637 END IF
638 END IF
639
640 image_ind_offset = image_ind_offset + siesta_struct%n_dbcsr_cell_images_to_merge(inode_proc)
641 END DO
642
643 IF (debug_this_module) THEN
644 DO iproc = 0, nprocs - 1
645 IF (n_packed_elements_per_proc(iproc) /= siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send)) THEN
646 CALL cp__a(__short_file__, __line__)
647 END IF
648 END DO
649 END IF
650
651 ! send packed data to other parallel processes
652 DO irequest = nrequests_recv + 1, nrequests_total
653 CALL para_env%isend(send_buffer(offset_per_request(irequest) + 1: &
654 offset_per_request(irequest) + nelements_per_request(irequest)), &
655 peer_rank(irequest), requests(irequest), request_tag(irequest))
656 END DO
657
658 ! copy data locally that stay on the same process.
659 IF (mepos > 0) THEN
660 offset_recv_mepos = sum(siesta_struct%nelements_per_proc(0:mepos - 1, nelements_dbcsr_recv))
661 ELSE
662 offset_recv_mepos = 0
663 END IF
664 offset_send_mepos = offset_per_proc(mepos)
665
666 IF (debug_this_module) THEN
667 IF (n_packed_elements_per_proc(mepos) /= siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_recv)) THEN
668 CALL cp__a(__short_file__, __line__)
669 END IF
670 END IF
671
672 IF (n_packed_elements_per_proc(mepos) > 0) THEN
673 recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + n_packed_elements_per_proc(mepos)) = &
674 send_buffer(offset_send_mepos + 1:offset_send_mepos + n_packed_elements_per_proc(mepos))
675 END IF
676 END IF
677
678 IF (nrequests_total > 0) THEN
679 ! wait for pending isend/irecv requests
680 CALL mp_waitall(requests)
681 DEALLOCATE (nelements_per_request, offset_per_request, peer_rank, requests, request_tag)
682 END IF
683
684 ! release send buffers
685 IF (ALLOCATED(send_buffer)) DEALLOCATE (send_buffer)
686 DEALLOCATE (offset_per_proc, n_packed_elements_per_proc)
687
688 ! non-zero matrix elements in 'recv_buffer' array are grouped by their source MPI rank, local row index, and column index (in this order).
689 ! Reorder the matrix elements ('reorder_recv_buffer') so they are grouped by their local row index, source MPI rank, and column index.
690 ! (column indices are in the ascending order within each (row index, source MPI rank) block).
691 ! The array 'packed_index' allows mapping matrix element between these intermediate order and SIESTA order : local row index, column index
692 IF (n_nonzero_elements_siesta > 0) THEN
693 ALLOCATE (reorder_recv_buffer(n_nonzero_elements_siesta))
694 ALLOCATE (next_nonzero_element_offset(siesta_struct%nrows))
695 next_nonzero_element_offset(:) = 0
696 offset_recv_mepos = 0
697
698 DO inode = 1, SIZE(siesta_struct%nl_repl, 2)
699 irow_blk = siesta_struct%nl_repl(neighbor_list_iatom_index, inode)
700 icol_blk = siesta_struct%nl_repl(neighbor_list_jatom_index, inode)
701 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
702 image_siesta = siesta_struct%nl_repl(neighbor_list_siesta_image_index, inode)
703 image_siesta_transp = siesta_struct%nl_repl(neighbor_list_siesta_transp_image_index, inode)
704
705 nrows_local = row_blk_size(irow_blk)
706 ncols_local = col_blk_size(icol_blk)
707 first_row_minus_one = row_blk_offset(irow_blk) - 1
708 first_col_minus_one = col_blk_offset(icol_blk) - 1
709
710 ! normal block
711 IF (image_siesta > 0) THEN
712 DO irow_local = 1, nrows_local
713 IF (do_distribute) THEN
714#if defined(__SMEAGOL)
715 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
716#else
717 CALL cp_abort(__location__, &
718 "CP2K was compiled with no SMEAGOL support.")
719#endif
720 ELSE
721 IF (is_root_rank) THEN
722 irow_proc = irow_local + first_row_minus_one
723 ELSE
724 irow_proc = 0
725 END IF
726 END IF
727 IF (irow_proc > 0) THEN
728 offset_send_mepos = siesta_struct%row_offset(irow_proc) + next_nonzero_element_offset(irow_proc)
729 reorder_recv_buffer(offset_send_mepos + 1:offset_send_mepos + ncols_local) = &
730 recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + ncols_local)
731 offset_recv_mepos = offset_recv_mepos + ncols_local
732 next_nonzero_element_offset(irow_proc) = next_nonzero_element_offset(irow_proc) + ncols_local
733 END IF
734 END DO
735 END IF
736
737 ! transposed block
738 IF (image_siesta_transp > 0) THEN
739 DO icol_local = 1, ncols_local
740 IF (do_distribute) THEN
741#if defined(__SMEAGOL)
742 CALL globaltolocalorb(icol_local + first_col_minus_one, mepos, nprocs, irow_proc)
743#else
744 CALL cp_abort(__location__, &
745 "CP2K was compiled with no SMEAGOL support.")
746#endif
747 ELSE
748 IF (is_root_rank) THEN
749 irow_proc = icol_local + first_col_minus_one
750 ELSE
751 irow_proc = 0
752 END IF
753 END IF
754 IF (irow_proc > 0) THEN
755 offset_send_mepos = siesta_struct%row_offset(irow_proc) + next_nonzero_element_offset(irow_proc)
756 reorder_recv_buffer(offset_send_mepos + 1:offset_send_mepos + nrows_local) = &
757 recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + nrows_local)
758 offset_recv_mepos = offset_recv_mepos + nrows_local
759 next_nonzero_element_offset(irow_proc) = next_nonzero_element_offset(irow_proc) + nrows_local
760 END IF
761 END DO
762 END IF
763 END DO
764
765 IF (debug_this_module) THEN
766 DO irow_local = 1, siesta_struct%nrows
767 IF (siesta_struct%n_nonzero_cols(irow_local) /= next_nonzero_element_offset(irow_local)) THEN
768 CALL cp__a(__short_file__, __line__)
769 END IF
770 END DO
771 END IF
772
773 DEALLOCATE (next_nonzero_element_offset)
774 DEALLOCATE (recv_buffer)
775
776 ! Map non-zero matrix element between the intermediate order and SIESTA order
777 DO irow_local = 1, siesta_struct%nrows
778 offset_recv_mepos = siesta_struct%row_offset(irow_local)
779 DO icol_local = 1, siesta_struct%n_nonzero_cols(irow_local)
780 matrix_siesta(offset_recv_mepos + icol_local) = &
781 reorder_recv_buffer(offset_recv_mepos + siesta_struct%packed_index(offset_recv_mepos + icol_local))
782 END DO
783 END DO
784 DEALLOCATE (reorder_recv_buffer)
785 END IF
786
787 CALL timestop(handle)
789
790! **************************************************************************************************
791!> \brief Convert matrix from DBCSR to sparse SIESTA format.
792!> \param matrix_dbcsr_kp DBCSR matrix [out]. The matrix is declared as INTENT(in) as pointers to
793!> dbcsr matrices remain intact. However we have intention to update matrix elements
794!> \param matrix_siesta matrix in SIESTA format [in]
795!> \param siesta_struct structure to map matrix blocks between formats
796!> \param para_env MPI parallel environment
797! **************************************************************************************************
798 SUBROUTINE convert_distributed_siesta_to_dbcsr(matrix_dbcsr_kp, matrix_siesta, siesta_struct, para_env)
799 TYPE(dbcsr_p_type), DIMENSION(:), INTENT(in) :: matrix_dbcsr_kp
800 REAL(kind=dp), DIMENSION(:), INTENT(in) :: matrix_siesta
801 TYPE(siesta_distrib_csc_struct_type), INTENT(in) :: siesta_struct
802 TYPE(mp_para_env_type), INTENT(in), POINTER :: para_env
803
804 CHARACTER(len=*), PARAMETER :: routinen = 'convert_distributed_siesta_to_dbcsr'
805
806 INTEGER :: first_col_minus_one, first_row_minus_one, handle, icol_blk, icol_local, &
807 image_dbcsr, image_siesta, image_siesta_transp, inode, inode_proc, iproc, irequest, &
808 irow_blk, irow_local, irow_proc, mepos, n_image_ind, ncols_blk, ncols_local, nnodes_proc, &
809 node_offset, nprocs, nrequests_recv, nrequests_total, nrows_blk, nrows_local
810 INTEGER(kind=int_8) :: n_nonzero_elements_dbcsr, &
811 n_nonzero_elements_siesta, &
812 offset_recv_mepos, offset_send_mepos
813 INTEGER(kind=int_8), ALLOCATABLE, DIMENSION(:) :: n_packed_elements_per_proc, &
814 nelements_per_request, &
815 offset_per_proc, offset_per_request
816 INTEGER, ALLOCATABLE, DIMENSION(:) :: next_nonzero_element_offset, peer_rank, &
817 request_tag
818 INTEGER, DIMENSION(:), POINTER :: col_blk_offset, col_blk_size, &
819 row_blk_offset, row_blk_size
820 LOGICAL :: do_distribute, found, is_root_rank, &
821 symmetric
822 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buffer, reorder_send_buffer, &
823 send_buffer
824 REAL(kind=dp), DIMENSION(:, :), POINTER :: sm_block
825 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: requests
826
827 CALL timeset(routinen, handle)
828 DO image_dbcsr = 1, SIZE(matrix_dbcsr_kp)
829 CALL dbcsr_set(matrix_dbcsr_kp(image_dbcsr)%matrix, 0.0_dp)
830 END DO
831
832 mepos = para_env%mepos
833 nprocs = para_env%num_pe
834 do_distribute = siesta_struct%gather_root < 0
835 is_root_rank = siesta_struct%gather_root == mepos
836
837 CALL dbcsr_get_info(matrix=matrix_dbcsr_kp(1)%matrix, &
838 nblkrows_total=nrows_blk, nblkcols_total=ncols_blk, &
839 row_blk_size=row_blk_size, col_blk_size=col_blk_size, &
840 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset)
841 symmetric = siesta_struct%symmetric
842
843 n_nonzero_elements_siesta = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv))
844 n_nonzero_elements_dbcsr = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_send))
845
846 nrequests_recv = get_number_of_mpi_sendrecv_requests(mepos, siesta_struct%nelements_per_proc(:, nelements_dbcsr_send), &
847 max_mpi_packet_size_dp)
848 nrequests_total = get_number_of_mpi_sendrecv_requests(mepos, siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv), &
849 max_mpi_packet_size_dp) + nrequests_recv
850 IF (nrequests_total > 0) THEN
851 ALLOCATE (requests(nrequests_total))
852 ALLOCATE (peer_rank(nrequests_total), request_tag(nrequests_total))
853 ALLOCATE (offset_per_request(nrequests_total), nelements_per_request(nrequests_total))
854 !requests(:) = mp_request_null
855 IF (nrequests_recv > 0) THEN
856 CALL assign_nonzero_elements_to_requests(offset_per_request(1:nrequests_recv), &
857 nelements_per_request(1:nrequests_recv), &
858 peer_rank(1:nrequests_recv), &
859 request_tag(1:nrequests_recv), &
860 mepos, &
861 siesta_struct%nelements_per_proc(:, nelements_dbcsr_send), &
862 max_mpi_packet_size_dp)
863 END IF
864 IF (nrequests_total > nrequests_recv) THEN
865 CALL assign_nonzero_elements_to_requests(offset_per_request(nrequests_recv + 1:nrequests_total), &
866 nelements_per_request(nrequests_recv + 1:nrequests_total), &
867 peer_rank(nrequests_recv + 1:nrequests_total), &
868 request_tag(nrequests_recv + 1:nrequests_total), &
869 mepos, &
870 siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv), &
871 max_mpi_packet_size_dp)
872 END IF
873 END IF
874
875 IF (n_nonzero_elements_dbcsr > 0) THEN
876 ALLOCATE (recv_buffer(n_nonzero_elements_dbcsr))
877 END IF
878 DO irequest = 1, nrequests_recv
879 CALL para_env%irecv(recv_buffer(offset_per_request(irequest) + 1: &
880 offset_per_request(irequest) + nelements_per_request(irequest)), &
881 peer_rank(irequest), requests(irequest), request_tag(irequest))
882 END DO
883
884 ALLOCATE (offset_per_proc(0:nprocs - 1), n_packed_elements_per_proc(0:nprocs - 1))
885 offset_per_proc(0) = 0
886 DO iproc = 1, nprocs - 1
887 offset_per_proc(iproc) = offset_per_proc(iproc - 1) + siesta_struct%nelements_per_proc(iproc - 1, nelements_dbcsr_send)
888 END DO
889 n_packed_elements_per_proc(:) = 0
890
891 IF (mepos > 0) THEN
892 node_offset = sum(siesta_struct%nnodes_per_proc(0:mepos - 1))
893 ELSE
894 node_offset = 0
895 END IF
896 nnodes_proc = siesta_struct%nnodes_per_proc(mepos)
897
898 IF (n_nonzero_elements_siesta > 0) THEN
899 ALLOCATE (send_buffer(n_nonzero_elements_siesta))
900
901 ALLOCATE (reorder_send_buffer(n_nonzero_elements_siesta))
902 DO irow_local = 1, siesta_struct%nrows
903 offset_send_mepos = siesta_struct%row_offset(irow_local)
904 DO icol_local = 1, siesta_struct%n_nonzero_cols(irow_local)
905 reorder_send_buffer(offset_send_mepos + siesta_struct%packed_index(offset_send_mepos + icol_local)) = &
906 matrix_siesta(offset_send_mepos + icol_local)
907 END DO
908 END DO
909
910 ALLOCATE (next_nonzero_element_offset(siesta_struct%nrows))
911 next_nonzero_element_offset(:) = 0
912 offset_send_mepos = 0
913
914 DO inode = 1, SIZE(siesta_struct%nl_repl, 2)
915 irow_blk = siesta_struct%nl_repl(neighbor_list_iatom_index, inode)
916 icol_blk = siesta_struct%nl_repl(neighbor_list_jatom_index, inode)
917 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
918 image_siesta = siesta_struct%nl_repl(neighbor_list_siesta_image_index, inode)
919 image_siesta_transp = siesta_struct%nl_repl(neighbor_list_siesta_transp_image_index, inode)
920
921 nrows_local = row_blk_size(irow_blk)
922 ncols_local = col_blk_size(icol_blk)
923 first_row_minus_one = row_blk_offset(irow_blk) - 1
924 first_col_minus_one = col_blk_offset(icol_blk) - 1
925
926 IF (image_siesta > 0) THEN
927 DO irow_local = 1, nrows_local
928 IF (do_distribute) THEN
929#if defined(__SMEAGOL)
930 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
931#else
932 CALL cp_abort(__location__, &
933 "CP2K was compiled with no SMEAGOL support.")
934#endif
935 ELSE
936 IF (is_root_rank) THEN
937 irow_proc = irow_local + first_row_minus_one
938 ELSE
939 irow_proc = 0
940 END IF
941 END IF
942 IF (irow_proc > 0) THEN
943 offset_recv_mepos = siesta_struct%row_offset(irow_proc) + next_nonzero_element_offset(irow_proc)
944 send_buffer(offset_send_mepos + 1:offset_send_mepos + ncols_local) = &
945 reorder_send_buffer(offset_recv_mepos + 1:offset_recv_mepos + ncols_local)
946 offset_send_mepos = offset_send_mepos + ncols_local
947 next_nonzero_element_offset(irow_proc) = next_nonzero_element_offset(irow_proc) + ncols_local
948 END IF
949 END DO
950 END IF
951
952 ! transposed block
953 IF (image_siesta_transp > 0) THEN
954 DO icol_local = 1, ncols_local
955 IF (do_distribute) THEN
956#if defined(__SMEAGOL)
957 CALL globaltolocalorb(icol_local + first_col_minus_one, mepos, nprocs, irow_proc)
958#else
959 CALL cp_abort(__location__, &
960 "CP2K was compiled with no SMEAGOL support.")
961#endif
962 ELSE
963 IF (is_root_rank) THEN
964 irow_proc = icol_local + first_col_minus_one
965 ELSE
966 irow_proc = 0
967 END IF
968 END IF
969 IF (irow_proc > 0) THEN
970 offset_recv_mepos = siesta_struct%row_offset(irow_proc) + next_nonzero_element_offset(irow_proc)
971 send_buffer(offset_send_mepos + 1:offset_send_mepos + nrows_local) = &
972 reorder_send_buffer(offset_recv_mepos + 1:offset_recv_mepos + nrows_local)
973 offset_send_mepos = offset_send_mepos + nrows_local
974 next_nonzero_element_offset(irow_proc) = next_nonzero_element_offset(irow_proc) + nrows_local
975 END IF
976 END DO
977 END IF
978 END DO
979
980 IF (debug_this_module) THEN
981 DO irow_local = 1, siesta_struct%nrows
982 IF (siesta_struct%n_nonzero_cols(irow_local) /= next_nonzero_element_offset(irow_local)) THEN
983 CALL cp__a(__short_file__, __line__)
984 END IF
985 END DO
986 END IF
987
988 DEALLOCATE (next_nonzero_element_offset)
989 DEALLOCATE (reorder_send_buffer)
990
991 DO irequest = nrequests_recv + 1, nrequests_total
992 CALL para_env%isend(send_buffer(offset_per_request(irequest) + 1: &
993 offset_per_request(irequest) + nelements_per_request(irequest)), &
994 peer_rank(irequest), requests(irequest), request_tag(irequest))
995 END DO
996
997 ! copy data locally that stay on the same process.
998 IF (mepos > 0) THEN
999 offset_send_mepos = sum(siesta_struct%nelements_per_proc(0:mepos - 1, nelements_dbcsr_recv))
1000 ELSE
1001 offset_send_mepos = 0
1002 END IF
1003 offset_recv_mepos = offset_per_proc(mepos)
1004
1005 IF (debug_this_module) THEN
1006 IF (siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_recv) /= &
1007 siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_send)) THEN
1008 CALL cp__a(__short_file__, __line__)
1009 END IF
1010 END IF
1011
1012 IF (siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_send) > 0) THEN
1013 recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_send)) = &
1014 send_buffer(offset_send_mepos + 1:offset_send_mepos + siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_send))
1015 END IF
1016 END IF
1017
1018 IF (nrequests_total > 0) THEN
1019 ! wait for pending isend/irecv requests
1020 CALL mp_waitall(requests)
1021 DEALLOCATE (nelements_per_request, offset_per_request, peer_rank, requests, request_tag)
1022 END IF
1023
1024 IF (ALLOCATED(send_buffer)) DEALLOCATE (send_buffer)
1025
1026 iproc = siesta_struct%gather_root ! if do_distribute == .FALSE., collect matrix elements from MPI process with rank gather_root
1027 IF (n_nonzero_elements_dbcsr > 0) THEN
1028 DO inode_proc = 1, nnodes_proc
1029 n_image_ind = siesta_struct%n_dbcsr_cell_images_to_merge(inode_proc)
1030 IF (n_image_ind > 0) THEN
1031 inode = node_offset + inode_proc
1032
1033 irow_blk = siesta_struct%nl_repl(neighbor_list_iatom_index, inode)
1034 icol_blk = siesta_struct%nl_repl(neighbor_list_jatom_index, inode)
1035 image_dbcsr = siesta_struct%nl_repl(neighbor_list_dbcsr_image_index, inode)
1036 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
1037 image_siesta = siesta_struct%nl_repl(neighbor_list_siesta_image_index, inode)
1038 image_siesta_transp = siesta_struct%nl_repl(neighbor_list_siesta_transp_image_index, inode)
1039
1040 nrows_local = row_blk_size(irow_blk)
1041 ncols_local = col_blk_size(icol_blk)
1042 first_row_minus_one = row_blk_offset(irow_blk) - 1
1043 first_col_minus_one = col_blk_offset(icol_blk) - 1
1044
1045 CALL dbcsr_get_block_p(matrix=matrix_dbcsr_kp(image_dbcsr)%matrix, &
1046 row=irow_blk, col=icol_blk, block=sm_block, found=found)
1047 cpassert(found)
1048
1049 IF (image_siesta > 0) THEN
1050 DO irow_local = 1, nrows_local
1051 IF (do_distribute) THEN
1052#if defined(__SMEAGOL)
1053 CALL whichnodeorb(irow_local + first_row_minus_one, nprocs, iproc) ! iproc_orb
1054#else
1055 CALL cp_abort(__location__, &
1056 "CP2K was compiled with no SMEAGOL support.")
1057#endif
1058 END IF
1059 ! CPASSERT
1060 IF (debug_this_module) THEN
1061 cpassert(iproc >= 0 .AND. iproc < nprocs)
1062 IF (n_packed_elements_per_proc(iproc) + ncols_local > &
1063 siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send)) THEN
1064 CALL cp__a(__short_file__, __line__)
1065 END IF
1066 END IF
1067
1068 offset_recv_mepos = offset_per_proc(iproc) + n_packed_elements_per_proc(iproc)
1069 sm_block(irow_local, 1:ncols_local) = recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + ncols_local)
1070
1071 n_packed_elements_per_proc(iproc) = n_packed_elements_per_proc(iproc) + ncols_local
1072 END DO
1073 END IF
1074
1075 ! transposed block
1076 IF (image_siesta_transp > 0) THEN
1077 DO icol_local = 1, ncols_local
1078 IF (do_distribute) THEN
1079#if defined(__SMEAGOL)
1080 CALL whichnodeorb(icol_local + first_col_minus_one, nprocs, iproc) ! iproc_orb
1081#else
1082 CALL cp_abort(__location__, &
1083 "CP2K was compiled with no SMEAGOL support.")
1084#endif
1085 END IF
1086 ! CPASSERT
1087 IF (debug_this_module) THEN
1088 cpassert(iproc >= 0 .AND. iproc < nprocs)
1089 IF (n_packed_elements_per_proc(iproc) + nrows_local > &
1090 siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send)) THEN
1091 CALL cp__a(__short_file__, __line__)
1092 END IF
1093 END IF
1094
1095 offset_recv_mepos = offset_per_proc(iproc) + n_packed_elements_per_proc(iproc)
1096 sm_block(1:nrows_local, icol_local) = recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + nrows_local)
1097
1098 n_packed_elements_per_proc(iproc) = n_packed_elements_per_proc(iproc) + nrows_local
1099 END DO
1100 END IF
1101 END IF
1102 END DO
1103
1104 IF (debug_this_module) THEN
1105 DO iproc = 0, nprocs - 1
1106 IF (n_packed_elements_per_proc(iproc) /= siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send)) THEN
1107 CALL cp__a(__short_file__, __line__)
1108 END IF
1109 END DO
1110 END IF
1111
1112 DEALLOCATE (recv_buffer)
1113 END IF
1114
1115 DEALLOCATE (offset_per_proc, n_packed_elements_per_proc)
1116
1117 CALL timestop(handle)
1119
1120 ! *** PRIVATE SUBROUTINES ***
1121
1122! **************************************************************************************************
1123!> \brief Computes number of neighbour-list nodes on the current parallel process.
1124!> \param nnodes_local number of nodes [out]
1125!> \param max_ijk_cell_image_local largest index of cell images along i, j and k cell vectors
1126!> on this parallel process [out]
1127!> \param max_ijk_cell_image largest index of cell images along i, j and k cell vectors [inout]
1128!> \param sab_nl pair-wise neighbour list [in]
1129!> \param para_env MPI parallel environment [in]
1130!> \param particle_coords list of atomic coordinates subject to periodic boundary conditions [in]
1131!> \param cell simulation unit cell [in]
1132! **************************************************************************************************
1133 SUBROUTINE get_nnodes_local(nnodes_local, max_ijk_cell_image_local, max_ijk_cell_image, sab_nl, para_env, particle_coords, cell)
1134 INTEGER, INTENT(out) :: nnodes_local
1135 INTEGER, DIMENSION(3), INTENT(out) :: max_ijk_cell_image_local
1136 INTEGER, DIMENSION(3), INTENT(inout) :: max_ijk_cell_image
1137 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1138 INTENT(in), POINTER :: sab_nl
1139 TYPE(mp_para_env_type), INTENT(in), POINTER :: para_env
1140 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1141 INTENT(in) :: particle_coords
1142 TYPE(cell_type), INTENT(in), POINTER :: cell
1143
1144 CHARACTER(len=*), PARAMETER :: routinen = 'get_nnodes_local'
1145
1146 INTEGER :: handle, iatom, icoord, jatom
1147 INTEGER, DIMENSION(3) :: cell_ijk, max_ijk_cell_image_tmp
1148 LOGICAL :: update_ncells
1149 REAL(kind=dp), DIMENSION(3) :: r_ij
1151 DIMENSION(:), POINTER :: nl_iterator
1152
1153 CALL timeset(routinen, handle)
1154
1155 update_ncells = .false.
1156 DO icoord = 1, 3 ! x, y, z
1157 IF (max_ijk_cell_image(icoord) >= 0) THEN
1158 max_ijk_cell_image_tmp(icoord) = max_ijk_cell_image(icoord)
1159 ELSE
1160 max_ijk_cell_image_tmp(icoord) = huge(max_ijk_cell_image_tmp(icoord))
1161 update_ncells = .true.
1162 END IF
1163 END DO
1164
1165 nnodes_local = 0
1166 max_ijk_cell_image_local(:) = 0
1167
1168 CALL neighbor_list_iterator_create(nl_iterator, sab_nl)
1169 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
1170 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, r=r_ij)
1171 CALL get_negf_cell_ijk(cell_ijk, r_ij, r_i=particle_coords(1:3, iatom), r_j=particle_coords(1:3, jatom), cell=cell)
1172 cell_ijk(1:3) = abs(cell_ijk(1:3))
1173
1174 IF (cell_ijk(1) <= max_ijk_cell_image_tmp(1) .AND. cell_ijk(2) <= max_ijk_cell_image_tmp(2) .AND. &
1175 cell_ijk(3) <= max_ijk_cell_image_tmp(3)) THEN
1176 nnodes_local = nnodes_local + 1
1177 max_ijk_cell_image_local(1:3) = max(max_ijk_cell_image_local(1:3), cell_ijk(1:3))
1178 END IF
1179 END DO
1180 CALL neighbor_list_iterator_release(nl_iterator)
1181
1182 IF (update_ncells) THEN
1183 max_ijk_cell_image_tmp(1:3) = max_ijk_cell_image_local(1:3)
1184 CALL para_env%max(max_ijk_cell_image_tmp)
1185 DO icoord = 1, 3
1186 IF (max_ijk_cell_image(icoord) < 0) THEN
1187 max_ijk_cell_image(icoord) = max_ijk_cell_image_tmp(icoord)
1188 END IF
1189 END DO
1190 END IF
1191
1192 CALL timestop(handle)
1193 END SUBROUTINE get_nnodes_local
1194
1195! **************************************************************************************************
1196!> \brief Construct list of neighbour-list's nodes on the current parallel process.
1197!> \param nl_local non-merged local neighbour-list's nodes [out]
1198!> \param max_ijk_cell_image_local largest index of cell images along i, j and k cell vectors
1199!> on this parallel process [in]
1200!> \param max_ijk_cell_image largest index of cell images along i, j and k cell vectors [in]
1201!> \param sab_nl pair-wise neighbour list [in]
1202!> \param particle_coords list of atomic coordinates subject to periodic boundary conditions [in]
1203!> \param cell simulation unit cell [in]
1204!> \param cell_to_index array to convert 3-D cell indices to 1-D DBCSR image indices
1205!> \param do_merge merge cell images along transport direction [in]
1206!> \param node_merged_indices nodes-related indices. Nodes with identical indices will be merged [out]
1207!> \param k_cells list of cell image indices along transport direction. Nodes to
1208!> be merged have the same 'node_merged_indices' but different 'k_cells' indices [out]
1209! **************************************************************************************************
1210 SUBROUTINE get_nl_nodes_local(nl_local, max_ijk_cell_image_local, max_ijk_cell_image, sab_nl, particle_coords, &
1211 cell, cell_to_index, do_merge, node_merged_indices, k_cells)
1212 INTEGER, DIMENSION(:, :), INTENT(out) :: nl_local
1213 INTEGER, DIMENSION(3), INTENT(in) :: max_ijk_cell_image_local, &
1214 max_ijk_cell_image
1215 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1216 INTENT(in), POINTER :: sab_nl
1217 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1218 INTENT(in) :: particle_coords
1219 TYPE(cell_type), INTENT(in), POINTER :: cell
1220 INTEGER, DIMENSION(:, :, :), INTENT(in), POINTER :: cell_to_index
1221 LOGICAL, INTENT(in) :: do_merge
1222 INTEGER(kind=int_8), DIMENSION(:), INTENT(out) :: node_merged_indices
1223 INTEGER, DIMENSION(:), INTENT(out) :: k_cells
1224
1225 CHARACTER(len=*), PARAMETER :: routinen = 'get_nl_nodes_local'
1226
1227 INTEGER :: handle, iatom, icol_blk, image, inode, &
1228 irow_blk, jatom, natoms
1229 INTEGER(kind=8), DIMENSION(2) :: ncells_siesta_local
1230 INTEGER, DIMENSION(2) :: ncells_siesta
1231 INTEGER, DIMENSION(3) :: cell_ijk_abs, cell_ijk_dbcsr, &
1232 cell_ijk_siesta
1233 LOGICAL :: do_symmetric
1234 REAL(kind=dp), DIMENSION(3) :: r_ij
1236 DIMENSION(:), POINTER :: nl_iterator
1237
1238 CALL timeset(routinen, handle)
1239 ! natoms only used to compute a merged 1D index of each DBCSR block
1240 natoms = SIZE(particle_coords, 2)
1241 CALL get_neighbor_list_set_p(neighbor_list_sets=sab_nl, symmetric=do_symmetric)
1242 ncells_siesta(1:2) = 2*max_ijk_cell_image(1:2) + 1
1243
1244 ncells_siesta_local(1:2) = int(2*max_ijk_cell_image_local(1:2) + 1, kind=int_8)
1245
1246 inode = 0
1247 CALL neighbor_list_iterator_create(nl_iterator, sab_nl)
1248 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
1249 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_ijk_dbcsr, r=r_ij)
1250 CALL get_negf_cell_ijk(cell_ijk_abs, r_ij, r_i=particle_coords(1:3, iatom), r_j=particle_coords(1:3, jatom), cell=cell)
1251
1252 IF (abs(cell_ijk_abs(1)) <= max_ijk_cell_image(1) .AND. abs(cell_ijk_abs(2)) <= max_ijk_cell_image(2) .AND. &
1253 abs(cell_ijk_abs(3)) <= max_ijk_cell_image(3)) THEN
1254
1255 inode = inode + 1
1256
1257 image = get_index_by_cell(cell_ijk_dbcsr, cell_to_index)
1258 cpassert(image > 0)
1259 nl_local(neighbor_list_dbcsr_image_index, inode) = image
1260
1261 IF (do_symmetric .AND. iatom > jatom) THEN
1262 irow_blk = jatom
1263 icol_blk = iatom
1264 cell_ijk_abs(1:3) = -cell_ijk_abs(1:3)
1265 ELSE
1266 irow_blk = iatom
1267 icol_blk = jatom
1268 END IF
1269
1270 nl_local(neighbor_list_iatom_index, inode) = irow_blk
1271 nl_local(neighbor_list_jatom_index, inode) = icol_blk
1272
1273 cell_ijk_siesta(1:3) = index_in_canonical_enumeration(cell_ijk_abs(1:3)) ! absolute -> SIESTA
1274
1275 IF (do_merge) THEN
1276 node_merged_indices(inode) = (((cell_ijk_siesta(2) - 1)*ncells_siesta_local(1) + cell_ijk_siesta(1) - 1)* &
1277 int(natoms, kind=int_8) + icol_blk - 1)*int(natoms, kind=int_8) + int(irow_blk - 1, kind=int_8)
1278 image = cell_ijk_siesta(1) + ncells_siesta(1)*(cell_ijk_siesta(2) - 1)
1279 ELSE
1280 node_merged_indices(inode) = ((((cell_ijk_siesta(3) - 1)*ncells_siesta_local(2) + &
1281 cell_ijk_siesta(2) - 1)*ncells_siesta_local(1) + cell_ijk_siesta(1) - 1)* &
1282 int(natoms, kind=int_8) + icol_blk - 1)*int(natoms, kind=int_8) + int(irow_blk - 1, kind=int_8)
1283 image = cell_ijk_siesta(1) + ncells_siesta(1)*(cell_ijk_siesta(2) - 1 + ncells_siesta(2)*(cell_ijk_siesta(3) - 1))
1284 END IF
1285 k_cells(inode) = cell_ijk_siesta(3)
1286 nl_local(neighbor_list_siesta_image_index, inode) = image
1287
1288 IF (do_symmetric .AND. irow_blk /= icol_blk) THEN
1289 cell_ijk_abs(1:3) = -cell_ijk_abs(1:3)
1290 cell_ijk_siesta(1:3) = index_in_canonical_enumeration(cell_ijk_abs(1:3)) ! absolute -> SIESTA
1291 IF (do_merge) cell_ijk_siesta(3) = 1
1292 nl_local(neighbor_list_siesta_transp_image_index, inode) = &
1293 cell_ijk_siesta(1) + ncells_siesta(1)*(cell_ijk_siesta(2) - 1 + ncells_siesta(2)*(cell_ijk_siesta(3) - 1))
1294 ELSE
1295 nl_local(neighbor_list_siesta_transp_image_index, inode) = 0
1296 END IF
1297 END IF
1298 END DO
1299 CALL neighbor_list_iterator_release(nl_iterator)
1300
1301 IF (debug_this_module) THEN
1302 cpassert(SIZE(nl_local, 2) == inode)
1303 END IF
1304
1305 CALL timestop(handle)
1306 END SUBROUTINE get_nl_nodes_local
1307
1308! **************************************************************************************************
1309!> \brief Replicate (and optionally merge) pair-wise neighbour list.
1310!> \param repl_nl replicated neighbour list. It needs to be deallocated elsewhere [allocated]
1311!> \param n_dbcsr_cell_images_to_merge number of merged blocks per neighbour-list node [allocated]
1312!> \param dbcsr_cell_image_to_merge list of DBCSR image indices to merge [allocated]
1313!> \param nnodes_per_proc number of merged nodes on each parallel processes [out]
1314!> \param max_ijk_cell_image largest index of cell images along i, j and k cell vectors [inout]
1315!> \param sab_nl pair-wise neighbour list [in]
1316!> \param para_env MPI parallel environment [in]
1317!> \param particle_coords list of atomic coordinates subject to periodic boundary conditions [in]
1318!> \param cell simulation unit cell [in]
1319!> \param cell_to_index array to convert 3-D cell indices to 1-D DBCSR image indices [in]
1320!> \param do_merge merge cell images along transport direction [in]
1321! **************************************************************************************************
1322 SUBROUTINE replicate_neighbour_list(repl_nl, n_dbcsr_cell_images_to_merge, dbcsr_cell_image_to_merge, &
1323 nnodes_per_proc, max_ijk_cell_image, sab_nl, para_env, particle_coords, &
1324 cell, cell_to_index, do_merge)
1325 INTEGER, ALLOCATABLE, DIMENSION(:, :), &
1326 INTENT(inout) :: repl_nl
1327 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(inout) :: n_dbcsr_cell_images_to_merge, &
1328 dbcsr_cell_image_to_merge
1329 INTEGER, DIMENSION(0:), INTENT(out) :: nnodes_per_proc
1330 INTEGER, DIMENSION(3), INTENT(inout) :: max_ijk_cell_image
1331 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1332 INTENT(in), POINTER :: sab_nl
1333 TYPE(mp_para_env_type), INTENT(in), POINTER :: para_env
1334 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1335 INTENT(in) :: particle_coords
1336 TYPE(cell_type), INTENT(in), POINTER :: cell
1337 INTEGER, DIMENSION(:, :, :), INTENT(in), POINTER :: cell_to_index
1338 LOGICAL, INTENT(in) :: do_merge
1339
1340 CHARACTER(len=*), PARAMETER :: routinen = 'replicate_neighbour_list'
1341
1342 INTEGER :: handle, inode, iproc, kcell_closest, &
1343 nnodes_local, nnodes_merged, &
1344 nnodes_repl, offset_inode
1345 INTEGER(kind=int_8), ALLOCATABLE, DIMENSION(:) :: node_merged_indices
1346 INTEGER, ALLOCATABLE, DIMENSION(:) :: inodes_orig, k_cells
1347 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: nl_local
1348 INTEGER, DIMENSION(3) :: max_ijk_cell_image_local
1349
1350 CALL timeset(routinen, handle)
1351 cpassert(.NOT. ALLOCATED(repl_nl))
1352 cpassert(.NOT. ALLOCATED(n_dbcsr_cell_images_to_merge))
1353 cpassert(.NOT. ALLOCATED(dbcsr_cell_image_to_merge))
1354
1355 CALL get_nnodes_local(nnodes_local, max_ijk_cell_image_local, max_ijk_cell_image, sab_nl, para_env, particle_coords, cell)
1356
1357 nnodes_per_proc(:) = 0
1358
1359 IF (nnodes_local > 0) THEN
1360 ALLOCATE (nl_local(neighbor_list_dim1, nnodes_local))
1361 ALLOCATE (node_merged_indices(nnodes_local))
1362 ALLOCATE (k_cells(nnodes_local))
1363 CALL get_nl_nodes_local(nl_local, max_ijk_cell_image_local, max_ijk_cell_image, sab_nl, particle_coords, cell, &
1364 cell_to_index, do_merge, node_merged_indices, k_cells)
1365
1366 ALLOCATE (inodes_orig(nnodes_local))
1367 CALL sort(node_merged_indices, nnodes_local, inodes_orig)
1368
1369 nnodes_merged = 1
1370 DO inode = 2, nnodes_local
1371 IF (node_merged_indices(inode) > node_merged_indices(inode - 1)) nnodes_merged = nnodes_merged + 1
1372 END DO
1373 ELSE
1374 nnodes_merged = 0
1375 END IF
1376
1377 nnodes_per_proc(para_env%mepos) = nnodes_merged
1378 CALL para_env%sum(nnodes_per_proc)
1379
1380 nnodes_repl = sum(nnodes_per_proc(:))
1381 ALLOCATE (repl_nl(neighbor_list_dim1, nnodes_repl))
1382
1383 IF (nnodes_local > 0) THEN
1384 IF (para_env%mepos > 0) THEN
1385 offset_inode = sum(nnodes_per_proc(0:para_env%mepos - 1))
1386 ELSE
1387 offset_inode = 0
1388 END IF
1389
1390 ALLOCATE (n_dbcsr_cell_images_to_merge(nnodes_merged))
1391 ALLOCATE (dbcsr_cell_image_to_merge(nnodes_local))
1392 n_dbcsr_cell_images_to_merge(:) = 0
1393
1394 nnodes_merged = 1 !offset_inode + 1
1395 repl_nl(:, offset_inode + 1) = nl_local(:, inodes_orig(1))
1396 n_dbcsr_cell_images_to_merge(1) = 1
1397 dbcsr_cell_image_to_merge(1) = nl_local(neighbor_list_dbcsr_image_index, inodes_orig(1))
1398 kcell_closest = k_cells(inodes_orig(1))
1399 DO inode = 2, nnodes_local
1400 IF (node_merged_indices(inode) > node_merged_indices(inode - 1)) THEN
1401 nnodes_merged = nnodes_merged + 1
1402 repl_nl(:, offset_inode + nnodes_merged) = nl_local(:, inodes_orig(inode))
1403 !n_dbcsr_cell_images_to_merge(nnodes_merged) = 1
1404 kcell_closest = k_cells(inodes_orig(inode))
1405 ELSE
1406 IF (abs(k_cells(inodes_orig(inode))) < abs(kcell_closest) .OR. &
1407 (abs(k_cells(inodes_orig(inode))) == abs(kcell_closest) .AND. kcell_closest < 0)) THEN
1408 repl_nl(:, offset_inode + nnodes_merged) = nl_local(:, inodes_orig(inode))
1409 kcell_closest = k_cells(inodes_orig(inode))
1410 END IF
1411 END IF
1412 dbcsr_cell_image_to_merge(inode) = nl_local(neighbor_list_dbcsr_image_index, inodes_orig(inode))
1413 n_dbcsr_cell_images_to_merge(nnodes_merged) = n_dbcsr_cell_images_to_merge(nnodes_merged) + 1
1414 END DO
1415
1416 IF (debug_this_module) THEN
1417 cpassert(sum(n_dbcsr_cell_images_to_merge) == nnodes_local)
1418 END IF
1419
1420 DEALLOCATE (inodes_orig)
1421 DEALLOCATE (node_merged_indices, k_cells)
1422 DEALLOCATE (nl_local)
1423 END IF
1424
1425 IF (para_env%num_pe > 1) THEN
1426 offset_inode = 0
1427 DO iproc = 0, para_env%num_pe - 1
1428 IF (nnodes_per_proc(iproc) > 0) THEN
1429 CALL para_env%bcast(repl_nl(:, offset_inode + 1:offset_inode + nnodes_per_proc(iproc)), iproc)
1430 offset_inode = offset_inode + nnodes_per_proc(iproc)
1431 END IF
1432 END DO
1433 END IF
1434
1435 CALL timestop(handle)
1436 END SUBROUTINE replicate_neighbour_list
1437
1438! **************************************************************************************************
1439!> \brief Count number of DBCSR matrix elements that should be received from (*,nelements_dbcsr_recv)
1440!> and send to (*,nelements_dbcsr_send) each parallel process.
1441!> \param nelements_per_proc number of non-zero matrix elements for each MPI process
1442!> \param nnodes_per_proc number of non-zero DBCSR matrix blocks (neighbour-list nodes)
1443!> \param nl_repl replicated neighbour list
1444!> \param matrix_dbcsr_kp DBCSR matrix
1445!> \param symmetric whether the DBCSR matrix is a symmetric one
1446!> \param para_env parallel environment
1447!> \param gather_root if >=0, gather all non-zero matrix element on the MPI process with
1448!> gather_root rank (useful for bulk transport calculation).
1449!> If <0, distribute non-zero matrix element across all MPI processes
1450! **************************************************************************************************
1451 SUBROUTINE count_remote_dbcsr_elements(nelements_per_proc, nnodes_per_proc, nl_repl, matrix_dbcsr_kp, &
1452 symmetric, para_env, gather_root)
1453 INTEGER(kind=int_8), DIMENSION(0:, :), INTENT(out) :: nelements_per_proc
1454 INTEGER, DIMENSION(0:), INTENT(in) :: nnodes_per_proc
1455 INTEGER, DIMENSION(:, :), INTENT(in) :: nl_repl
1456 TYPE(dbcsr_p_type), DIMENSION(:), INTENT(in) :: matrix_dbcsr_kp
1457 LOGICAL, INTENT(in) :: symmetric
1458 TYPE(mp_para_env_type), INTENT(in), POINTER :: para_env
1459 INTEGER, INTENT(in) :: gather_root
1460
1461 CHARACTER(len=*), PARAMETER :: routinen = 'count_remote_dbcsr_elements'
1462
1463 INTEGER :: first_row_minus_one, handle, icol_blk, image, image_transp, inode, inode_proc, &
1464 iproc, iproc_orb, irow_blk, irow_local, mepos, ncols_blk, ncols_local, nnodes_proc, &
1465 nprocs, nrows_blk, nrows_local, offset_inode
1466 INTEGER, DIMENSION(:), POINTER :: col_blk_offset, col_blk_size, &
1467 row_blk_offset, row_blk_size
1468 LOGICAL :: do_distribute
1469
1470 CALL timeset(routinen, handle)
1471 nelements_per_proc(:, :) = 0
1472 mepos = para_env%mepos
1473 nprocs = para_env%num_pe
1474 do_distribute = gather_root < 0
1475 IF (debug_this_module) THEN
1476 cpassert(SIZE(nnodes_per_proc) == nprocs)
1477 END IF
1478
1479 CALL dbcsr_get_info(matrix=matrix_dbcsr_kp(1)%matrix, &
1480 nblkrows_total=nrows_blk, nblkcols_total=ncols_blk, &
1481 row_blk_size=row_blk_size, col_blk_size=col_blk_size, &
1482 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset)
1483
1484 offset_inode = 0
1485 iproc_orb = gather_root ! if do_distribute == .FALSE., send all matrix elements to MPI process with rank gather_root
1486 DO iproc = lbound(nnodes_per_proc, 1), ubound(nnodes_per_proc, 1)
1487 nnodes_proc = nnodes_per_proc(iproc)
1488 DO inode_proc = 1, nnodes_proc
1489 inode = inode_proc + offset_inode
1490
1491 irow_blk = nl_repl(neighbor_list_iatom_index, inode)
1492 icol_blk = nl_repl(neighbor_list_jatom_index, inode)
1493 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
1494 image = nl_repl(neighbor_list_siesta_image_index, inode)
1495 image_transp = nl_repl(neighbor_list_siesta_transp_image_index, inode)
1496
1497 IF (image > 0) THEN
1498 nrows_local = row_blk_size(irow_blk)
1499 first_row_minus_one = row_blk_offset(irow_blk) - 1
1500 ncols_local = col_blk_size(icol_blk)
1501 DO irow_local = 1, nrows_local
1502 IF (do_distribute) THEN
1503#if defined(__SMEAGOL)
1504 CALL whichnodeorb(irow_local + first_row_minus_one, nprocs, iproc_orb)
1505#else
1506 CALL cp_abort(__location__, &
1507 "CP2K was compiled with no SMEAGOL support.")
1508#endif
1509 END IF
1510 IF (iproc_orb == mepos) THEN
1511 nelements_per_proc(iproc, nelements_dbcsr_recv) = nelements_per_proc(iproc, nelements_dbcsr_recv) + &
1512 ncols_local
1513 END IF
1514
1515 IF (iproc == mepos) THEN
1516 nelements_per_proc(iproc_orb, nelements_dbcsr_send) = nelements_per_proc(iproc_orb, nelements_dbcsr_send) + &
1517 ncols_local
1518 END IF
1519 END DO
1520 END IF
1521
1522 ! transposed block
1523 IF (image_transp > 0) THEN
1524 nrows_local = col_blk_size(icol_blk)
1525 first_row_minus_one = col_blk_offset(icol_blk) - 1
1526 ncols_local = row_blk_size(irow_blk)
1527 DO irow_local = 1, nrows_local
1528 IF (do_distribute) THEN
1529#if defined(__SMEAGOL)
1530 CALL whichnodeorb(irow_local + first_row_minus_one, nprocs, iproc_orb)
1531#else
1532 CALL cp_abort(__location__, &
1533 "CP2K was compiled with no SMEAGOL support.")
1534#endif
1535 END IF
1536 IF (iproc_orb == mepos) THEN
1537 nelements_per_proc(iproc, nelements_dbcsr_recv) = nelements_per_proc(iproc, nelements_dbcsr_recv) + &
1538 ncols_local
1539 END IF
1540
1541 IF (iproc == mepos) THEN
1542 nelements_per_proc(iproc_orb, nelements_dbcsr_send) = nelements_per_proc(iproc_orb, nelements_dbcsr_send) + &
1543 ncols_local
1544 END IF
1545 END DO
1546 END IF
1547 END DO
1548 offset_inode = offset_inode + nnodes_proc
1549 END DO
1550 CALL timestop(handle)
1551 END SUBROUTINE count_remote_dbcsr_elements
1552
1553! **************************************************************************************************
1554!> \brief Construct list of non-zero matrix elements' indices in SIESTA format.
1555!> \param n_nonzero_cols number of non-zero matrix elements on each matrix row local to the current
1556!> MPI process
1557!> \param row_offset offset of the first non-zero matrix elements for each locally-stores row
1558!> \param col_index sorted list of column indices of non-zero matrix element
1559!> \param packed_index original order of non-sorted column indices
1560!> \param nl_repl replicated neighbour list
1561!> \param matrix_dbcsr_kp DBCSR matrix
1562!> \param symmetric whether the DBCSR matrix is a symmetric one
1563!> \param para_env parallel environment
1564!> \param gather_root if >=0, gather all non-zero matrix element on the MPI process with
1565!> gather_root rank (useful for bulk transport calculation).
1566!> If <0, distribute non-zero matrix element across all MPI processes
1567! **************************************************************************************************
1568 SUBROUTINE get_nonzero_element_indices(n_nonzero_cols, row_offset, col_index, packed_index, &
1569 nl_repl, matrix_dbcsr_kp, symmetric, para_env, gather_root)
1570 INTEGER, DIMENSION(:), INTENT(out) :: n_nonzero_cols, row_offset, col_index, &
1571 packed_index
1572 INTEGER, DIMENSION(:, :), INTENT(in) :: nl_repl
1573 TYPE(dbcsr_p_type), DIMENSION(:), INTENT(in) :: matrix_dbcsr_kp
1574 LOGICAL, INTENT(in) :: symmetric
1575 TYPE(mp_para_env_type), INTENT(in), POINTER :: para_env
1576 INTEGER, INTENT(in) :: gather_root
1577
1578 CHARACTER(len=*), PARAMETER :: routinen = 'get_nonzero_element_indices'
1579
1580 INTEGER :: first_col_minus_one, first_row_minus_one, handle, icol_blk, icol_local, &
1581 icol_offset, image, image_transp, inode, irow_blk, irow_local, irow_proc, mepos, &
1582 ncols_blk, ncols_local, ncols_total, nnodes, nprocs, nrows_blk, nrows_local, nrows_total
1583 INTEGER, DIMENSION(:), POINTER :: col_blk_offset, col_blk_size, &
1584 row_blk_offset, row_blk_size
1585 LOGICAL :: do_distribute, is_root_rank
1586
1587 CALL timeset(routinen, handle)
1588 n_nonzero_cols(:) = 0
1589 mepos = para_env%mepos
1590 nprocs = para_env%num_pe
1591 do_distribute = gather_root < 0
1592 is_root_rank = gather_root == mepos
1593
1594 CALL dbcsr_get_info(matrix=matrix_dbcsr_kp(1)%matrix, &
1595 nblkrows_total=nrows_blk, nblkcols_total=ncols_blk, &
1596 nfullrows_total=nrows_total, nfullcols_total=ncols_total, &
1597 row_blk_size=row_blk_size, col_blk_size=col_blk_size, &
1598 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset)
1599
1600 nnodes = SIZE(nl_repl, 2)
1601 DO inode = 1, nnodes
1602 irow_blk = nl_repl(neighbor_list_iatom_index, inode)
1603 icol_blk = nl_repl(neighbor_list_jatom_index, inode)
1604 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
1605 image = nl_repl(neighbor_list_siesta_image_index, inode)
1606 image_transp = nl_repl(neighbor_list_siesta_transp_image_index, inode)
1607
1608 IF (image > 0) THEN
1609 nrows_local = row_blk_size(irow_blk)
1610 first_row_minus_one = row_blk_offset(irow_blk) - 1
1611 ncols_local = col_blk_size(icol_blk)
1612 DO irow_local = 1, nrows_local
1613 IF (do_distribute) THEN
1614#if defined(__SMEAGOL)
1615 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
1616#else
1617 CALL cp_abort(__location__, &
1618 "CP2K was compiled with no SMEAGOL support.")
1619#endif
1620 ELSE
1621 IF (is_root_rank) THEN
1622 irow_proc = irow_local + first_row_minus_one
1623 ELSE
1624 irow_proc = 0
1625 END IF
1626 END IF
1627 IF (irow_proc > 0) THEN
1628 n_nonzero_cols(irow_proc) = n_nonzero_cols(irow_proc) + ncols_local
1629 END IF
1630 END DO
1631 END IF
1632
1633 ! transposed block
1634 IF (image_transp > 0) THEN
1635 nrows_local = col_blk_size(icol_blk)
1636 first_row_minus_one = col_blk_offset(icol_blk) - 1
1637 ncols_local = row_blk_size(irow_blk)
1638 DO irow_local = 1, nrows_local
1639 IF (do_distribute) THEN
1640#if defined(__SMEAGOL)
1641 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
1642#else
1643 CALL cp_abort(__location__, &
1644 "CP2K was compiled with no SMEAGOL support.")
1645#endif
1646 ELSE
1647 IF (is_root_rank) THEN
1648 irow_proc = irow_local + first_row_minus_one
1649 ELSE
1650 irow_proc = 0
1651 END IF
1652 END IF
1653 IF (irow_proc > 0) THEN
1654 n_nonzero_cols(irow_proc) = n_nonzero_cols(irow_proc) + ncols_local
1655 END IF
1656 END DO
1657 END IF
1658 END DO
1659
1660 row_offset(1) = 0
1661 DO irow_local = 1, SIZE(n_nonzero_cols) - 1
1662 row_offset(irow_local + 1) = row_offset(irow_local) + n_nonzero_cols(irow_local)
1663 END DO
1664
1665 n_nonzero_cols(:) = 0
1666 col_index(:) = 0
1667 DO inode = 1, nnodes
1668 irow_blk = nl_repl(neighbor_list_iatom_index, inode)
1669 icol_blk = nl_repl(neighbor_list_jatom_index, inode)
1670 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
1671 image = nl_repl(neighbor_list_siesta_image_index, inode)
1672 image_transp = nl_repl(neighbor_list_siesta_transp_image_index, inode)
1673
1674 IF (image > 0) THEN
1675 nrows_local = row_blk_size(irow_blk)
1676 first_row_minus_one = row_blk_offset(irow_blk) - 1
1677 ncols_local = col_blk_size(icol_blk)
1678 first_col_minus_one = col_blk_offset(icol_blk) + (image - 1)*ncols_total - 1
1679 DO irow_local = 1, nrows_local
1680 IF (do_distribute) THEN
1681#if defined(__SMEAGOL)
1682 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
1683#else
1684 CALL cp_abort(__location__, &
1685 "CP2K was compiled with no SMEAGOL support.")
1686#endif
1687 ELSE
1688 IF (is_root_rank) THEN
1689 irow_proc = irow_local + first_row_minus_one
1690 ELSE
1691 irow_proc = 0
1692 END IF
1693 END IF
1694 IF (irow_proc > 0) THEN
1695 icol_offset = row_offset(irow_proc) + n_nonzero_cols(irow_proc)
1696 DO icol_local = 1, ncols_local
1697 col_index(icol_offset + icol_local) = first_col_minus_one + icol_local
1698 END DO
1699 n_nonzero_cols(irow_proc) = n_nonzero_cols(irow_proc) + ncols_local
1700 END IF
1701 END DO
1702 END IF
1703
1704 ! transposed block
1705 IF (image_transp > 0) THEN
1706 nrows_local = col_blk_size(icol_blk)
1707 first_row_minus_one = col_blk_offset(icol_blk) - 1
1708 ncols_local = row_blk_size(irow_blk)
1709 first_col_minus_one = row_blk_offset(irow_blk) + (image_transp - 1)*nrows_total - 1
1710 DO irow_local = 1, nrows_local
1711 IF (do_distribute) THEN
1712#if defined(__SMEAGOL)
1713 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
1714#else
1715 CALL cp_abort(__location__, &
1716 "CP2K was compiled with no SMEAGOL support.")
1717#endif
1718 ELSE
1719 IF (is_root_rank) THEN
1720 irow_proc = irow_local + first_row_minus_one
1721 ELSE
1722 irow_proc = 0
1723 END IF
1724 END IF
1725 IF (irow_proc > 0) THEN
1726 icol_offset = row_offset(irow_proc) + n_nonzero_cols(irow_proc)
1727 DO icol_local = 1, ncols_local
1728 col_index(icol_offset + icol_local) = first_col_minus_one + icol_local
1729 END DO
1730 n_nonzero_cols(irow_proc) = n_nonzero_cols(irow_proc) + ncols_local
1731 END IF
1732 END DO
1733 END IF
1734 END DO
1735
1736 IF (SIZE(n_nonzero_cols) > 0) THEN
1737 DO irow_local = 1, SIZE(n_nonzero_cols)
1738 CALL sort(col_index(row_offset(irow_local) + 1:row_offset(irow_local) + n_nonzero_cols(irow_local)), &
1739 n_nonzero_cols(irow_local), &
1740 packed_index(row_offset(irow_local) + 1:row_offset(irow_local) + n_nonzero_cols(irow_local)))
1741 END DO
1742 END IF
1743
1744 CALL timestop(handle)
1745 END SUBROUTINE get_nonzero_element_indices
1746
1747! **************************************************************************************************
1748!> \brief Get absolute i, j, and k indices of cell image for the given DBCSR matrix block.
1749!> Effective cell image recorded in the neighbour-list (where the matrix block is actually
1750!> stored) depends on atomic coordinates can be significantly different.
1751!> \param cell_ijk array with 3 indices along the cell's vectors
1752!> \param r_ij actual interatomic distance (vector R_j - r_i), where R_j is the coordinates
1753!> of the j-th atom in the supercell
1754!> \param r_i coordinates of the i-th atom in the primary unit cell
1755!> \param r_j coordinates of the j-th atom in the primary unit cell
1756!> \param cell unit cell
1757! **************************************************************************************************
1758 SUBROUTINE get_negf_cell_ijk(cell_ijk, r_ij, r_i, r_j, cell)
1759 INTEGER, DIMENSION(3), INTENT(out) :: cell_ijk
1760 REAL(kind=dp), DIMENSION(3), INTENT(in) :: r_ij, r_i, r_j
1761 TYPE(cell_type), INTENT(in), POINTER :: cell
1762
1763 REAL(kind=dp), DIMENSION(3) :: coords_scaled, r
1764
1765 r(:) = r_ij(:) + r_i(:) - r_j(:)
1766 CALL real_to_scaled(coords_scaled, r, cell)
1767 cell_ijk(:) = nint(coords_scaled(:))
1768 END SUBROUTINE get_negf_cell_ijk
1769
1770! **************************************************************************************************
1771!> \brief Return the index of an integer number in the sequence 0, 1, -1, ..., n, -n, ...
1772!> (canonical enumeration of integers, oeis.org/A001057).
1773!> \param inum integer number [in]
1774!> \return index of 'inum' in A001057
1775!> \note Cell images in SMEAGOL / SIESTA are ordered according to A001057. Therefore this
1776!> function converts the absolute index of a cell replica along some (x/y/z) dimension
1777!> into its corresponding SIESTA's index.
1778! **************************************************************************************************
1779 ELEMENTAL FUNCTION index_in_canonical_enumeration(inum) RESULT(ind)
1780 INTEGER, INTENT(in) :: inum
1781 INTEGER :: ind
1782
1783 INTEGER :: inum_abs, is_non_positive
1784
1785 inum_abs = abs(inum)
1786 !IF (inum <= 0) THEN; is_non_positive = 1; ELSE; is_non_positive = 0; END IF
1787 is_non_positive = merge(1, 0, inum <= 0)
1788
1789 ! inum = 0 -> inum_abs = 0, is_non_positive = 1 -> ind = 1
1790 ! inum = 1 -> inum_abs = 1, is_non_positive = 0 -> ind = 2
1791 ! inum = -1 -> inum_abs = 1, is_non_positive = 1 -> ind = 3
1792 ind = 2*inum_abs + is_non_positive
1793 END FUNCTION index_in_canonical_enumeration
1794
1795! **************************************************************************************************
1796!> \brief Return an integer number according to its index in the sequence 0, 1, -1, ..., n, -n, ...
1797!> (canonical enumeration of integers, oeis.org/A001057)
1798!> \param ind index in A001057 starting from 1
1799!> \return integer number according to its position 'ind' in A001057
1800!> \note Cell images in SMEAGOL / SIESTA are ordered according to A001057. Therefore this
1801!> function converts SIESTA's index of a cell replica along some (x/y/z) dimension
1802!> into the corresponding absolute index.
1803! **************************************************************************************************
1804 ELEMENTAL FUNCTION number_from_canonical_enumeration(ind) RESULT(inum)
1805 INTEGER, INTENT(in) :: ind
1806 INTEGER :: inum
1807
1808 ! ind < 1 is invalid
1809 ! ind = 1 -> SIGN(0, -1) = 0
1810 ! ind = 2 -> SIGN(1, 0) = 1
1811 ! ind = 3 -> SIGN(1, -1) = -1
1812 inum = sign(ind/2, -mod(ind, 2))
1813 END FUNCTION number_from_canonical_enumeration
1814
1815! **************************************************************************************************
1816!> \brief Apply periodic boundary conditions defined by a simulation cell to a position
1817!> vector r. Similar to pbc1 from cell_types.F but returns unscaled coordinates from
1818!> the scaled range [0, 1) instead of [-0.5, 0.5)
1819!> \param r_pbc position vector subject to the periodic boundary conditions [out]
1820!> \param r initial position vector [in]
1821!> \param cell simulation unit cell [in]
1822! **************************************************************************************************
1823 PURE SUBROUTINE pbc_0_1(r_pbc, r, cell)
1824 REAL(kind=dp), DIMENSION(3), INTENT(out) :: r_pbc
1825 REAL(kind=dp), DIMENSION(3), INTENT(in) :: r
1826 TYPE(cell_type), INTENT(in), POINTER :: cell
1827
1828 REAL(kind=dp), DIMENSION(3) :: s
1829
1830 IF (cell%orthorhombic) THEN
1831 r_pbc(1) = r(1) - cell%hmat(1, 1)*cell%perd(1)*real(floor(cell%h_inv(1, 1)*r(1)), dp)
1832 r_pbc(2) = r(2) - cell%hmat(2, 2)*cell%perd(2)*real(floor(cell%h_inv(2, 2)*r(2)), dp)
1833 r_pbc(3) = r(3) - cell%hmat(3, 3)*cell%perd(3)*real(floor(cell%h_inv(3, 3)*r(3)), dp)
1834 ELSE
1835 s(1) = cell%h_inv(1, 1)*r(1) + cell%h_inv(1, 2)*r(2) + cell%h_inv(1, 3)*r(3)
1836 s(2) = cell%h_inv(2, 1)*r(1) + cell%h_inv(2, 2)*r(2) + cell%h_inv(2, 3)*r(3)
1837 s(3) = cell%h_inv(3, 1)*r(1) + cell%h_inv(3, 2)*r(2) + cell%h_inv(3, 3)*r(3)
1838 s(1) = s(1) - cell%perd(1)*real(floor(s(1)), dp)
1839 s(2) = s(2) - cell%perd(2)*real(floor(s(2)), dp)
1840 s(3) = s(3) - cell%perd(3)*real(floor(s(3)), dp)
1841 r_pbc(1) = cell%hmat(1, 1)*s(1) + cell%hmat(1, 2)*s(2) + cell%hmat(1, 3)*s(3)
1842 r_pbc(2) = cell%hmat(2, 1)*s(1) + cell%hmat(2, 2)*s(2) + cell%hmat(2, 3)*s(3)
1843 r_pbc(3) = cell%hmat(3, 1)*s(1) + cell%hmat(3, 2)*s(2) + cell%hmat(3, 3)*s(3)
1844 END IF
1845 END SUBROUTINE pbc_0_1
1846
1847! **************************************************************************************************
1848!> \brief Computes the number of send requests from this MPI process to all the other processes.
1849!> Alternatively computes the number of recv requests per MPI process that the given process
1850!> expects.
1851!> \param mepos MPI rank of the given process
1852!> \param nelements_per_proc number of element to send / receive
1853!> \param max_nelements_per_packet maximum number of elements per single MPI request
1854!> \return number of MPI requests
1855! **************************************************************************************************
1856 PURE FUNCTION get_number_of_mpi_sendrecv_requests(mepos, nelements_per_proc, max_nelements_per_packet) RESULT(nrequests)
1857 INTEGER, INTENT(in) :: mepos
1858 INTEGER(kind=int_8), DIMENSION(0:), INTENT(in) :: nelements_per_proc
1859 INTEGER(kind=int_8), INTENT(in) :: max_nelements_per_packet
1860 INTEGER :: nrequests
1861
1862 INTEGER :: iproc
1863
1864 nrequests = 0
1865 DO iproc = lbound(nelements_per_proc, 1), ubound(nelements_per_proc, 1)
1866 ! there is no need to send data to the same MPI process
1867 IF (iproc /= mepos) THEN
1868 nrequests = nrequests + int(nelements_per_proc(iproc)/max_nelements_per_packet)
1869 IF (mod(nelements_per_proc(iproc), max_nelements_per_packet) > 0) &
1870 nrequests = nrequests + 1
1871 END IF
1872 END DO
1873 END FUNCTION get_number_of_mpi_sendrecv_requests
1874
1875! **************************************************************************************************
1876!> \brief Map non-zero matrix elements on to MPI requests.
1877!> \param element_offset offset (index-1) of the first element [out]
1878!> \param nelements_per_request number of element for each request [out]
1879!> \param peer_rank rank of a peering MPI process
1880!> \param tag MPI tag
1881!> \param mepos MPI rank of a given MPI process
1882!> \param nelements_per_proc number of element to send / receive by the current MPI process
1883!> \param max_nelements_per_packet maximum number of elements per single MPI request
1884! **************************************************************************************************
1885 SUBROUTINE assign_nonzero_elements_to_requests(element_offset, nelements_per_request, peer_rank, tag, &
1886 mepos, nelements_per_proc, max_nelements_per_packet)
1887 INTEGER(kind=int_8), DIMENSION(:), INTENT(out) :: element_offset, nelements_per_request
1888 INTEGER, DIMENSION(:), INTENT(out) :: peer_rank, tag
1889 INTEGER, INTENT(in) :: mepos
1890 INTEGER(kind=int_8), DIMENSION(0:), INTENT(in) :: nelements_per_proc
1891 INTEGER(kind=int_8), INTENT(in) :: max_nelements_per_packet
1892
1893 INTEGER :: iproc, irequest, nrequests, &
1894 request_offset
1895 INTEGER(kind=int_8) :: element_offset_tmp, nelements
1896
1897 request_offset = 0
1898 element_offset_tmp = 0
1899 DO iproc = lbound(nelements_per_proc, 1), ubound(nelements_per_proc, 1)
1900 IF (iproc /= mepos) THEN
1901 nrequests = int(nelements_per_proc(iproc)/max_nelements_per_packet)
1902 IF (mod(nelements_per_proc(iproc), max_nelements_per_packet) > 0) nrequests = nrequests + 1
1903 cpassert(nrequests <= max_mpi_rank + 1)
1904 IF (nrequests > 0) THEN
1905 nelements = nelements_per_proc(iproc)/nrequests
1906 IF (nelements_per_proc(iproc) - nelements*nrequests > 0) nelements = nelements + 1
1907 cpassert(nelements <= max_nelements_per_packet)
1908
1909 DO irequest = 1, nrequests
1910 element_offset(request_offset + irequest) = (irequest - 1)*nelements + element_offset_tmp
1911 IF (irequest < nrequests) THEN
1912 nelements_per_request(request_offset + irequest) = nelements
1913 ELSE
1914 nelements_per_request(request_offset + irequest) = nelements_per_proc(iproc) - nelements*(nrequests - 1)
1915 END IF
1916 peer_rank(request_offset + irequest) = iproc
1917 tag(request_offset + irequest) = irequest - 1
1918 END DO
1919 END IF
1920 request_offset = request_offset + nrequests
1921 END IF
1922 element_offset_tmp = element_offset_tmp + nelements_per_proc(iproc)
1923 END DO
1924
1925 IF (debug_this_module) THEN
1926 cpassert(SIZE(element_offset) == request_offset)
1927 cpassert(SIZE(nelements_per_request) == request_offset)
1928 cpassert(SIZE(peer_rank) == request_offset)
1929 cpassert(SIZE(tag) == request_offset)
1930 END IF
1931 END SUBROUTINE assign_nonzero_elements_to_requests
1932
1933END MODULE smeagol_matrix_utils
Handles all functions related to the CELL.
Definition cell_types.F:15
subroutine, public scaled_to_real(r, s, cell)
Transform scaled cell coordinates real coordinates. r=h*s.
Definition cell_types.F:516
subroutine, public real_to_scaled(s, r, cell)
Transform real to scaled cell coordinates. s=h_inv*r.
Definition cell_types.F:486
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_set(matrix, alpha)
...
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp_size
Definition kinds.F:36
integer, parameter, public dp
Definition kinds.F:34
Interface to the message passing library MPI.
Helper routines to manipulate with matrices.
pure integer function, public get_index_by_cell(cell, cell_to_index)
Helper routine to obtain index of a DBCSR matrix image by its unit cell replica. Can be used with any...
Define the data structure for the particle information.
Define the neighbor list data types and the corresponding functionality.
subroutine, public neighbor_list_iterator_create(iterator_set, nl, search, nthread)
Neighbor list iterator functions.
subroutine, public neighbor_list_iterator_release(iterator_set)
...
subroutine, public get_neighbor_list_set_p(neighbor_list_sets, nlist, symmetric)
Return the components of the first neighbor list set.
integer function, public neighbor_list_iterate(iterator_set, mepos)
...
subroutine, public get_iterator_info(iterator_set, mepos, ikind, jkind, nkind, ilist, nlist, inode, nnode, iatom, jatom, r, cell)
...
types that represent a quickstep subsys
subroutine, public qs_subsys_get(subsys, atomic_kinds, atomic_kind_set, particles, particle_set, local_particles, molecules, molecule_set, molecule_kinds, molecule_kind_set, local_molecules, para_env, colvar_p, shell_particles, core_particles, gci, multipoles, natom, nparticle, ncore, nshell, nkind, atprop, virial, results, cell, cell_ref, use_ref_cell, energy, force, qs_kind_set, cp_subsys, nelectron_total, nelectron_spin)
...
Routines to convert sparse matrices between DBCSR (distributed-blocks compressed sparse rows) and SIE...
subroutine, public siesta_struct_release(siesta_struct)
Release a SIESTA matrix structure.
subroutine, public siesta_struct_create(siesta_struct, matrix_dbcsr_kp, subsys, cell_to_index, sab_nl, para_env, max_ij_cell_image, do_merge, gather_root)
Map non-zero matrix blocks between sparse matrices in DBCSR and SIESTA formats.
subroutine, public convert_distributed_siesta_to_dbcsr(matrix_dbcsr_kp, matrix_siesta, siesta_struct, para_env)
Convert matrix from DBCSR to sparse SIESTA format.
subroutine, public convert_dbcsr_to_distributed_siesta(matrix_siesta, matrix_dbcsr_kp, siesta_struct, para_env)
Convert matrix from DBCSR to sparse SIESTA format.
All kind of helpful little routines.
Definition util.F:14
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
stores all the informations relevant to an mpi environment
Sparsity pattern of replicated SIESTA compressed sparse column (CSC) matrices.