31 USE parallel,
ONLY: getnodeorbs, &
47#include "./base/base_uses.f90"
52 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'smeagol_matrix_utils'
53 LOGICAL,
PARAMETER,
PRIVATE :: debug_this_module = .false.
55 INTEGER,
PARAMETER,
PRIVATE :: neighbor_list_iatom_index = 1
56 INTEGER,
PARAMETER,
PRIVATE :: neighbor_list_jatom_index = 2
57 INTEGER,
PARAMETER,
PRIVATE :: neighbor_list_dbcsr_image_index = 3
58 INTEGER,
PARAMETER,
PRIVATE :: neighbor_list_siesta_image_index = 4
59 INTEGER,
PARAMETER,
PRIVATE :: neighbor_list_siesta_transp_image_index = 5
60 INTEGER,
PARAMETER,
PRIVATE :: neighbor_list_dim1 = neighbor_list_siesta_transp_image_index
66 PRIVATE :: get_negf_cell_ijk, index_in_canonical_enumeration, number_from_canonical_enumeration, pbc_0_1
67 PRIVATE :: get_number_of_mpi_sendrecv_requests, assign_nonzero_elements_to_requests
70 INTEGER,
PARAMETER,
PRIVATE :: nelements_dbcsr_recv = 1
72 INTEGER,
PARAMETER,
PRIVATE :: nelements_dbcsr_send = 2
73 INTEGER,
PARAMETER,
PRIVATE :: nelements_dbcsr_dim2 = nelements_dbcsr_send
75 INTEGER(kind=int_8),
PARAMETER,
PRIVATE :: max_mpi_packet_size_bytes = 134217728
76 INTEGER(kind=int_8),
PARAMETER,
PRIVATE :: max_mpi_packet_size_dp = max_mpi_packet_size_bytes/int(
dp_size, kind=
int_8)
81 INTEGER,
PARAMETER,
PRIVATE :: max_mpi_rank = 32767
91 INTEGER :: gather_root = 0
96 LOGICAL :: symmetric = .true.
99 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: nnodes_per_proc
108 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: nl_repl
111 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: n_dbcsr_cell_images_to_merge
113 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: dbcsr_cell_image_to_merge
117 INTEGER(kind=int_8),
ALLOCATABLE,
DIMENSION(:, :) :: nelements_per_proc
120 INTEGER(kind=int_8) :: n_nonzero_elements = 0_int_8
121 INTEGER :: nrows = 0, ncols = 0
124 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: n_nonzero_cols
128 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: row_offset
132 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: col_index
134 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: packed_index
136 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: xij
138 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: indxuo
140 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: iaorb
142 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: xa
162 sab_nl, para_env, max_ij_cell_image, do_merge, gather_root)
164 INTENT(inout) :: siesta_struct
165 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(in) :: matrix_dbcsr_kp
167 INTEGER,
DIMENSION(:, :, :),
POINTER :: cell_to_index
171 INTEGER,
DIMENSION(2),
INTENT(inout) :: max_ij_cell_image
172 LOGICAL,
INTENT(in) :: do_merge
173 INTEGER,
INTENT(in) :: gather_root
175 CHARACTER(len=*),
PARAMETER :: routinen =
'siesta_struct_create'
177 CHARACTER(len=20) :: str_nelem, str_nelem_max
178 INTEGER :: handle, iatom, icol, icol_blk, icol_local, image, image_j, image_k, irow, &
179 irow_local, natoms, ncells_siesta_total, ncols_blk, ncols_total, nrows_local, &
181 INTEGER(kind=int_8) :: n_nonzero_elements_local
182 INTEGER,
DIMENSION(3) :: max_ijk_cell_image, ncells_siesta
183 INTEGER,
DIMENSION(:),
POINTER :: col_blk_offset, col_blk_size
184 LOGICAL :: do_distribute, is_root_rank
185 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: particle_coords
186 REAL(kind=
dp),
DIMENSION(3) :: real_cell_shift, scaled_cell_shift
190 CALL timeset(routinen, handle)
191 do_distribute = gather_root < 0
192 is_root_rank = gather_root == para_env%mepos
197 nfullrows_total=nrows_total, nfullcols_total=ncols_total, &
198 nblkcols_total=ncols_blk, col_blk_size=col_blk_size, col_blk_offset=col_blk_offset)
199 IF (debug_this_module)
THEN
200 cpassert(nrows_total == ncols_total)
201 cpassert(gather_root < para_env%num_pe)
204 siesta_struct%gather_root = gather_root
208 CALL qs_subsys_get(subsys, cell=cell, particle_set=particle_set, nparticle=natoms)
209 ALLOCATE (particle_coords(3, natoms))
211 CALL pbc_0_1(particle_coords(1:3, iatom), particle_set(iatom)%r(1:3), cell)
217 max_ijk_cell_image(1:2) = max_ij_cell_image(1:2)
222 max_ijk_cell_image(3) = -1
223 IF (.NOT. do_merge) max_ijk_cell_image(3) = 1
227 ALLOCATE (siesta_struct%nnodes_per_proc(0:para_env%num_pe - 1))
228 CALL replicate_neighbour_list(siesta_struct%nl_repl, &
229 siesta_struct%n_dbcsr_cell_images_to_merge, &
230 siesta_struct%dbcsr_cell_image_to_merge, &
231 siesta_struct%nnodes_per_proc, &
232 max_ijk_cell_image, sab_nl, para_env, particle_coords, cell, cell_to_index, do_merge)
233 max_ij_cell_image(1:2) = max_ijk_cell_image(1:2)
236 ALLOCATE (siesta_struct%nelements_per_proc(0:para_env%num_pe - 1, nelements_dbcsr_dim2))
237 CALL count_remote_dbcsr_elements(siesta_struct%nelements_per_proc, siesta_struct%nnodes_per_proc, &
238 siesta_struct%nl_repl, matrix_dbcsr_kp, siesta_struct%symmetric, para_env, gather_root)
241 n_nonzero_elements_local = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv))
242 siesta_struct%n_nonzero_elements = n_nonzero_elements_local
246 IF (n_nonzero_elements_local > int(huge(0), kind=
int_8))
THEN
247 WRITE (str_nelem,
'(I0)') n_nonzero_elements_local
248 WRITE (str_nelem_max,
'(I0)') huge(0)
249 CALL cp_abort(__location__, &
250 "The number of non-zero matrix elements per MPI process "//trim(str_nelem)// &
251 " cannot exceed "//trim(str_nelem_max)// &
252 ". Please increase the number of MPI processes to satisfy this SMEAGOL limitation.")
256 IF (n_nonzero_elements_local == 0) n_nonzero_elements_local = 1
259 IF (do_distribute)
THEN
260#if defined(__SMEAGOL)
261 CALL getnodeorbs(nrows_total, para_env%mepos, para_env%num_pe, nrows_local)
263 CALL cp_abort(__location__, &
264 "CP2K was compiled with no SMEAGOL support.")
267 IF (is_root_rank)
THEN
268 nrows_local = nrows_total
275 ncells_siesta(1:3) = 2*max_ijk_cell_image(1:3) + 1
277 IF (do_merge) ncells_siesta(3) = 1
279 ncells_siesta_total = ncells_siesta(1)*ncells_siesta(2)*ncells_siesta(3)
282 siesta_struct%nrows = nrows_local
286 siesta_struct%ncols = nrows_total*ncells_siesta_total
289 IF (nrows_local == 0) nrows_local = 1
291 ALLOCATE (siesta_struct%n_nonzero_cols(nrows_local))
292 ALLOCATE (siesta_struct%row_offset(nrows_local))
293 ALLOCATE (siesta_struct%col_index(n_nonzero_elements_local))
294 ALLOCATE (siesta_struct%packed_index(n_nonzero_elements_local))
297 nrows_local = siesta_struct%nrows
303 CALL get_nonzero_element_indices(siesta_struct%n_nonzero_cols, siesta_struct%row_offset, &
304 siesta_struct%col_index, siesta_struct%packed_index, &
305 siesta_struct%nl_repl, matrix_dbcsr_kp, &
306 siesta_struct%symmetric, para_env, gather_root)
309 ALLOCATE (siesta_struct%indxuo(siesta_struct%ncols))
310 DO icol = 1, ncols_total
311 siesta_struct%indxuo(icol) = icol
313 DO image = 2, ncells_siesta_total
314 siesta_struct%indxuo((image - 1)*ncols_total + 1:image*ncols_total) = siesta_struct%indxuo(1:ncols_total)
318 ALLOCATE (siesta_struct%iaorb(siesta_struct%ncols))
319 DO icol_blk = 1, ncols_blk
321 siesta_struct%iaorb(col_blk_offset(icol_blk):col_blk_offset(icol_blk) + col_blk_size(icol_blk) - 1) = icol_blk
323 DO image = 2, ncells_siesta_total
324 siesta_struct%iaorb((image - 1)*ncols_total + 1:image*ncols_total) = siesta_struct%iaorb(1:ncols_total) + (image - 1)*natoms
328 ALLOCATE (siesta_struct%xa(3, natoms*ncells_siesta_total))
329 DO image_k = 1, ncells_siesta(3)
331 scaled_cell_shift(3) = real(number_from_canonical_enumeration(image_k), kind=
dp)
332 DO image_j = 1, ncells_siesta(2)
333 scaled_cell_shift(2) = real(number_from_canonical_enumeration(image_j), kind=
dp)
334 DO image = 1, ncells_siesta(1)
335 scaled_cell_shift(1) = real(number_from_canonical_enumeration(image), kind=
dp)
337 offset = (((image_k - 1)*ncells_siesta(2) + image_j - 1)*ncells_siesta(1) + image - 1)*natoms
339 siesta_struct%xa(1:3, offset + iatom) = particle_set(iatom)%r(1:3) + real_cell_shift(1:3)
346 ALLOCATE (siesta_struct%xij(3, n_nonzero_elements_local))
347 DO irow_local = 1, nrows_local
348 IF (do_distribute)
THEN
349#if defined(__SMEAGOL)
350 CALL localtoglobalorb(irow_local, para_env%mepos, para_env%num_pe, irow)
352 CALL cp_abort(__location__, &
353 "CP2K was compiled with no SMEAGOL support.")
357 IF (debug_this_module)
THEN
358 cpassert(is_root_rank)
361 offset = siesta_struct%row_offset(irow_local)
362 DO icol_local = offset + 1, offset + siesta_struct%n_nonzero_cols(irow_local)
363 icol = siesta_struct%col_index(icol_local)
364 siesta_struct%xij(1:3, icol_local) = siesta_struct%xa(1:3, siesta_struct%iaorb(icol)) - &
365 siesta_struct%xa(1:3, siesta_struct%iaorb(irow))
369 DEALLOCATE (particle_coords)
371 CALL timestop(handle)
380 INTENT(inout) :: siesta_struct
382 CHARACTER(len=*),
PARAMETER :: routinen =
'siesta_struct_release'
386 CALL timeset(routinen, handle)
388 siesta_struct%gather_root = -1
390 IF (
ALLOCATED(siesta_struct%nnodes_per_proc))
DEALLOCATE (siesta_struct%nnodes_per_proc)
391 IF (
ALLOCATED(siesta_struct%nl_repl))
DEALLOCATE (siesta_struct%nl_repl)
392 IF (
ALLOCATED(siesta_struct%n_dbcsr_cell_images_to_merge))
DEALLOCATE (siesta_struct%n_dbcsr_cell_images_to_merge)
393 IF (
ALLOCATED(siesta_struct%dbcsr_cell_image_to_merge))
DEALLOCATE (siesta_struct%dbcsr_cell_image_to_merge)
394 IF (
ALLOCATED(siesta_struct%nelements_per_proc))
DEALLOCATE (siesta_struct%nelements_per_proc)
396 siesta_struct%n_nonzero_elements = 0
397 siesta_struct%nrows = 0
398 siesta_struct%ncols = 0
400 IF (
ALLOCATED(siesta_struct%n_nonzero_cols))
DEALLOCATE (siesta_struct%n_nonzero_cols)
401 IF (
ALLOCATED(siesta_struct%row_offset))
DEALLOCATE (siesta_struct%row_offset)
402 IF (
ALLOCATED(siesta_struct%col_index))
DEALLOCATE (siesta_struct%col_index)
403 IF (
ALLOCATED(siesta_struct%packed_index))
DEALLOCATE (siesta_struct%packed_index)
405 IF (
ALLOCATED(siesta_struct%xij))
DEALLOCATE (siesta_struct%xij)
406 IF (
ALLOCATED(siesta_struct%indxuo))
DEALLOCATE (siesta_struct%indxuo)
407 IF (
ALLOCATED(siesta_struct%iaorb))
DEALLOCATE (siesta_struct%iaorb)
408 IF (
ALLOCATED(siesta_struct%xa))
DEALLOCATE (siesta_struct%xa)
410 CALL timestop(handle)
421 REAL(kind=
dp),
DIMENSION(:),
INTENT(out) :: matrix_siesta
422 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(in) :: matrix_dbcsr_kp
426 CHARACTER(len=*),
PARAMETER :: routinen =
'convert_dbcsr_to_distributed_siesta'
428 INTEGER :: first_col_minus_one, first_row_minus_one, handle, icol_blk, icol_local, &
429 image_dbcsr, image_ind, image_ind_offset, image_siesta, image_siesta_transp, inode, &
430 inode_proc, iproc, irequest, irow_blk, irow_local, irow_proc, mepos, n_image_ind, &
431 ncols_blk, ncols_local, nnodes_proc, node_offset, nprocs, nrequests_recv, &
432 nrequests_total, nrows_blk, nrows_local
433 INTEGER(kind=int_8) :: n_nonzero_elements_dbcsr, &
434 n_nonzero_elements_siesta, &
435 offset_recv_mepos, offset_send_mepos
436 INTEGER(kind=int_8),
ALLOCATABLE,
DIMENSION(:) :: n_packed_elements_per_proc, &
437 nelements_per_request, &
438 offset_per_proc, offset_per_request
439 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: next_nonzero_element_offset, peer_rank, &
441 INTEGER,
DIMENSION(:),
POINTER :: col_blk_offset, col_blk_size, &
442 row_blk_offset, row_blk_size
443 LOGICAL :: do_distribute, found, is_root_rank, &
445 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: recv_buffer, reorder_recv_buffer, &
447 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: sm_block, sm_block_merged
450 CALL timeset(routinen, handle)
451 matrix_siesta(:) = 0.0_dp
453 mepos = para_env%mepos
454 nprocs = para_env%num_pe
455 do_distribute = siesta_struct%gather_root < 0
456 is_root_rank = siesta_struct%gather_root == mepos
459 nblkrows_total=nrows_blk, nblkcols_total=ncols_blk, &
460 row_blk_size=row_blk_size, col_blk_size=col_blk_size, &
461 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset)
462 symmetric = siesta_struct%symmetric
465 n_nonzero_elements_siesta = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv))
467 n_nonzero_elements_dbcsr = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_send))
470 nrequests_recv = get_number_of_mpi_sendrecv_requests(mepos, siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv), &
471 max_mpi_packet_size_dp)
472 nrequests_total = get_number_of_mpi_sendrecv_requests(mepos, siesta_struct%nelements_per_proc(:, nelements_dbcsr_send), &
473 max_mpi_packet_size_dp) + nrequests_recv
475 IF (nrequests_total > 0)
THEN
478 ALLOCATE (requests(nrequests_total))
479 ALLOCATE (peer_rank(nrequests_total), request_tag(nrequests_total))
480 ALLOCATE (offset_per_request(nrequests_total), nelements_per_request(nrequests_total))
485 IF (nrequests_recv > 0)
THEN
486 CALL assign_nonzero_elements_to_requests(offset_per_request(1:nrequests_recv), &
487 nelements_per_request(1:nrequests_recv), &
488 peer_rank(1:nrequests_recv), &
489 request_tag(1:nrequests_recv), &
491 siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv), &
492 max_mpi_packet_size_dp)
494 IF (nrequests_total > nrequests_recv)
THEN
495 CALL assign_nonzero_elements_to_requests(offset_per_request(nrequests_recv + 1:nrequests_total), &
496 nelements_per_request(nrequests_recv + 1:nrequests_total), &
497 peer_rank(nrequests_recv + 1:nrequests_total), &
498 request_tag(nrequests_recv + 1:nrequests_total), &
500 siesta_struct%nelements_per_proc(:, nelements_dbcsr_send), &
501 max_mpi_packet_size_dp)
509 IF (n_nonzero_elements_siesta > 0)
THEN
510 ALLOCATE (recv_buffer(n_nonzero_elements_siesta))
512 DO irequest = 1, nrequests_recv
513 CALL para_env%irecv(recv_buffer(offset_per_request(irequest) + 1: &
514 offset_per_request(irequest) + nelements_per_request(irequest)), &
515 peer_rank(irequest), requests(irequest), request_tag(irequest))
519 ALLOCATE (offset_per_proc(0:nprocs - 1), n_packed_elements_per_proc(0:nprocs - 1))
520 offset_per_proc(0) = 0
521 DO iproc = 1, nprocs - 1
522 offset_per_proc(iproc) = offset_per_proc(iproc - 1) + siesta_struct%nelements_per_proc(iproc - 1, nelements_dbcsr_send)
524 n_packed_elements_per_proc(:) = 0
527 nnodes_proc = siesta_struct%nnodes_per_proc(mepos)
529 node_offset = sum(siesta_struct%nnodes_per_proc(0:mepos)) - nnodes_proc
533 iproc = siesta_struct%gather_root
535 IF (n_nonzero_elements_dbcsr > 0)
THEN
536 ALLOCATE (send_buffer(n_nonzero_elements_dbcsr))
537 send_buffer(:) = 0.0_dp
542 DO inode_proc = 1, nnodes_proc
543 n_image_ind = siesta_struct%n_dbcsr_cell_images_to_merge(inode_proc)
544 IF (n_image_ind > 0)
THEN
545 inode = node_offset + inode_proc
547 irow_blk = siesta_struct%nl_repl(neighbor_list_iatom_index, inode)
548 icol_blk = siesta_struct%nl_repl(neighbor_list_jatom_index, inode)
549 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
550 image_siesta = siesta_struct%nl_repl(neighbor_list_siesta_image_index, inode)
551 image_siesta_transp = siesta_struct%nl_repl(neighbor_list_siesta_transp_image_index, inode)
553 nrows_local = row_blk_size(irow_blk)
554 ncols_local = col_blk_size(icol_blk)
555 first_row_minus_one = row_blk_offset(irow_blk) - 1
556 first_col_minus_one = col_blk_offset(icol_blk) - 1
559 IF (n_image_ind == 1)
THEN
561 image_dbcsr = siesta_struct%dbcsr_cell_image_to_merge(image_ind_offset + 1)
563 row=irow_blk, col=icol_blk, block=sm_block_merged, found=found)
566 ALLOCATE (sm_block_merged(nrows_local, ncols_local))
568 DO image_ind = 1, n_image_ind
569 image_dbcsr = siesta_struct%dbcsr_cell_image_to_merge(image_ind + image_ind_offset)
572 row=irow_blk, col=icol_blk, block=sm_block, found=found)
574 sm_block_merged(1:nrows_local, 1:ncols_local) = sm_block_merged(1:nrows_local, 1:ncols_local) + &
575 sm_block(1:nrows_local, 1:ncols_local)
580 IF (image_siesta > 0)
THEN
581 DO irow_local = 1, nrows_local
582 IF (do_distribute)
THEN
583#if defined(__SMEAGOL)
584 CALL whichnodeorb(irow_local + first_row_minus_one, nprocs, iproc)
586 CALL cp_abort(__location__, &
587 "CP2K was compiled with no SMEAGOL support.")
592 IF (debug_this_module)
THEN
593 cpassert(iproc >= 0 .AND. iproc < nprocs)
594 IF (n_packed_elements_per_proc(iproc) + ncols_local > &
595 siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send))
THEN
596 CALL cp__a(__short_file__, __line__)
600 offset_send_mepos = offset_per_proc(iproc) + n_packed_elements_per_proc(iproc)
601 send_buffer(offset_send_mepos + 1:offset_send_mepos + ncols_local) = sm_block_merged(irow_local, 1:ncols_local)
603 n_packed_elements_per_proc(iproc) = n_packed_elements_per_proc(iproc) + ncols_local
608 IF (image_siesta_transp > 0)
THEN
609 DO icol_local = 1, ncols_local
610 IF (do_distribute)
THEN
611#if defined(__SMEAGOL)
612 CALL whichnodeorb(icol_local + first_col_minus_one, nprocs, iproc)
614 CALL cp_abort(__location__, &
615 "CP2K was compiled with no SMEAGOL support.")
620 IF (debug_this_module)
THEN
621 cpassert(iproc >= 0 .AND. iproc < nprocs)
622 IF (n_packed_elements_per_proc(iproc) + nrows_local > &
623 siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send))
THEN
624 CALL cp__a(__short_file__, __line__)
628 offset_send_mepos = offset_per_proc(iproc) + n_packed_elements_per_proc(iproc)
629 send_buffer(offset_send_mepos + 1:offset_send_mepos + nrows_local) = sm_block_merged(1:nrows_local, icol_local)
631 n_packed_elements_per_proc(iproc) = n_packed_elements_per_proc(iproc) + nrows_local
635 IF (n_image_ind > 1)
THEN
636 DEALLOCATE (sm_block_merged)
640 image_ind_offset = image_ind_offset + siesta_struct%n_dbcsr_cell_images_to_merge(inode_proc)
643 IF (debug_this_module)
THEN
644 DO iproc = 0, nprocs - 1
645 IF (n_packed_elements_per_proc(iproc) /= siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send))
THEN
646 CALL cp__a(__short_file__, __line__)
652 DO irequest = nrequests_recv + 1, nrequests_total
653 CALL para_env%isend(send_buffer(offset_per_request(irequest) + 1: &
654 offset_per_request(irequest) + nelements_per_request(irequest)), &
655 peer_rank(irequest), requests(irequest), request_tag(irequest))
660 offset_recv_mepos = sum(siesta_struct%nelements_per_proc(0:mepos - 1, nelements_dbcsr_recv))
662 offset_recv_mepos = 0
664 offset_send_mepos = offset_per_proc(mepos)
666 IF (debug_this_module)
THEN
667 IF (n_packed_elements_per_proc(mepos) /= siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_recv))
THEN
668 CALL cp__a(__short_file__, __line__)
672 IF (n_packed_elements_per_proc(mepos) > 0)
THEN
673 recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + n_packed_elements_per_proc(mepos)) = &
674 send_buffer(offset_send_mepos + 1:offset_send_mepos + n_packed_elements_per_proc(mepos))
678 IF (nrequests_total > 0)
THEN
681 DEALLOCATE (nelements_per_request, offset_per_request, peer_rank, requests, request_tag)
685 IF (
ALLOCATED(send_buffer))
DEALLOCATE (send_buffer)
686 DEALLOCATE (offset_per_proc, n_packed_elements_per_proc)
692 IF (n_nonzero_elements_siesta > 0)
THEN
693 ALLOCATE (reorder_recv_buffer(n_nonzero_elements_siesta))
694 ALLOCATE (next_nonzero_element_offset(siesta_struct%nrows))
695 next_nonzero_element_offset(:) = 0
696 offset_recv_mepos = 0
698 DO inode = 1,
SIZE(siesta_struct%nl_repl, 2)
699 irow_blk = siesta_struct%nl_repl(neighbor_list_iatom_index, inode)
700 icol_blk = siesta_struct%nl_repl(neighbor_list_jatom_index, inode)
701 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
702 image_siesta = siesta_struct%nl_repl(neighbor_list_siesta_image_index, inode)
703 image_siesta_transp = siesta_struct%nl_repl(neighbor_list_siesta_transp_image_index, inode)
705 nrows_local = row_blk_size(irow_blk)
706 ncols_local = col_blk_size(icol_blk)
707 first_row_minus_one = row_blk_offset(irow_blk) - 1
708 first_col_minus_one = col_blk_offset(icol_blk) - 1
711 IF (image_siesta > 0)
THEN
712 DO irow_local = 1, nrows_local
713 IF (do_distribute)
THEN
714#if defined(__SMEAGOL)
715 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
717 CALL cp_abort(__location__, &
718 "CP2K was compiled with no SMEAGOL support.")
721 IF (is_root_rank)
THEN
722 irow_proc = irow_local + first_row_minus_one
727 IF (irow_proc > 0)
THEN
728 offset_send_mepos = siesta_struct%row_offset(irow_proc) + next_nonzero_element_offset(irow_proc)
729 reorder_recv_buffer(offset_send_mepos + 1:offset_send_mepos + ncols_local) = &
730 recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + ncols_local)
731 offset_recv_mepos = offset_recv_mepos + ncols_local
732 next_nonzero_element_offset(irow_proc) = next_nonzero_element_offset(irow_proc) + ncols_local
738 IF (image_siesta_transp > 0)
THEN
739 DO icol_local = 1, ncols_local
740 IF (do_distribute)
THEN
741#if defined(__SMEAGOL)
742 CALL globaltolocalorb(icol_local + first_col_minus_one, mepos, nprocs, irow_proc)
744 CALL cp_abort(__location__, &
745 "CP2K was compiled with no SMEAGOL support.")
748 IF (is_root_rank)
THEN
749 irow_proc = icol_local + first_col_minus_one
754 IF (irow_proc > 0)
THEN
755 offset_send_mepos = siesta_struct%row_offset(irow_proc) + next_nonzero_element_offset(irow_proc)
756 reorder_recv_buffer(offset_send_mepos + 1:offset_send_mepos + nrows_local) = &
757 recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + nrows_local)
758 offset_recv_mepos = offset_recv_mepos + nrows_local
759 next_nonzero_element_offset(irow_proc) = next_nonzero_element_offset(irow_proc) + nrows_local
765 IF (debug_this_module)
THEN
766 DO irow_local = 1, siesta_struct%nrows
767 IF (siesta_struct%n_nonzero_cols(irow_local) /= next_nonzero_element_offset(irow_local))
THEN
768 CALL cp__a(__short_file__, __line__)
773 DEALLOCATE (next_nonzero_element_offset)
774 DEALLOCATE (recv_buffer)
777 DO irow_local = 1, siesta_struct%nrows
778 offset_recv_mepos = siesta_struct%row_offset(irow_local)
779 DO icol_local = 1, siesta_struct%n_nonzero_cols(irow_local)
780 matrix_siesta(offset_recv_mepos + icol_local) = &
781 reorder_recv_buffer(offset_recv_mepos + siesta_struct%packed_index(offset_recv_mepos + icol_local))
784 DEALLOCATE (reorder_recv_buffer)
787 CALL timestop(handle)
799 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(in) :: matrix_dbcsr_kp
800 REAL(kind=
dp),
DIMENSION(:),
INTENT(in) :: matrix_siesta
804 CHARACTER(len=*),
PARAMETER :: routinen =
'convert_distributed_siesta_to_dbcsr'
806 INTEGER :: first_col_minus_one, first_row_minus_one, handle, icol_blk, icol_local, &
807 image_dbcsr, image_siesta, image_siesta_transp, inode, inode_proc, iproc, irequest, &
808 irow_blk, irow_local, irow_proc, mepos, n_image_ind, ncols_blk, ncols_local, nnodes_proc, &
809 node_offset, nprocs, nrequests_recv, nrequests_total, nrows_blk, nrows_local
810 INTEGER(kind=int_8) :: n_nonzero_elements_dbcsr, &
811 n_nonzero_elements_siesta, &
812 offset_recv_mepos, offset_send_mepos
813 INTEGER(kind=int_8),
ALLOCATABLE,
DIMENSION(:) :: n_packed_elements_per_proc, &
814 nelements_per_request, &
815 offset_per_proc, offset_per_request
816 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: next_nonzero_element_offset, peer_rank, &
818 INTEGER,
DIMENSION(:),
POINTER :: col_blk_offset, col_blk_size, &
819 row_blk_offset, row_blk_size
820 LOGICAL :: do_distribute, found, is_root_rank, &
822 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: recv_buffer, reorder_send_buffer, &
824 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: sm_block
827 CALL timeset(routinen, handle)
828 DO image_dbcsr = 1,
SIZE(matrix_dbcsr_kp)
829 CALL dbcsr_set(matrix_dbcsr_kp(image_dbcsr)%matrix, 0.0_dp)
832 mepos = para_env%mepos
833 nprocs = para_env%num_pe
834 do_distribute = siesta_struct%gather_root < 0
835 is_root_rank = siesta_struct%gather_root == mepos
838 nblkrows_total=nrows_blk, nblkcols_total=ncols_blk, &
839 row_blk_size=row_blk_size, col_blk_size=col_blk_size, &
840 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset)
841 symmetric = siesta_struct%symmetric
843 n_nonzero_elements_siesta = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv))
844 n_nonzero_elements_dbcsr = sum(siesta_struct%nelements_per_proc(:, nelements_dbcsr_send))
846 nrequests_recv = get_number_of_mpi_sendrecv_requests(mepos, siesta_struct%nelements_per_proc(:, nelements_dbcsr_send), &
847 max_mpi_packet_size_dp)
848 nrequests_total = get_number_of_mpi_sendrecv_requests(mepos, siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv), &
849 max_mpi_packet_size_dp) + nrequests_recv
850 IF (nrequests_total > 0)
THEN
851 ALLOCATE (requests(nrequests_total))
852 ALLOCATE (peer_rank(nrequests_total), request_tag(nrequests_total))
853 ALLOCATE (offset_per_request(nrequests_total), nelements_per_request(nrequests_total))
855 IF (nrequests_recv > 0)
THEN
856 CALL assign_nonzero_elements_to_requests(offset_per_request(1:nrequests_recv), &
857 nelements_per_request(1:nrequests_recv), &
858 peer_rank(1:nrequests_recv), &
859 request_tag(1:nrequests_recv), &
861 siesta_struct%nelements_per_proc(:, nelements_dbcsr_send), &
862 max_mpi_packet_size_dp)
864 IF (nrequests_total > nrequests_recv)
THEN
865 CALL assign_nonzero_elements_to_requests(offset_per_request(nrequests_recv + 1:nrequests_total), &
866 nelements_per_request(nrequests_recv + 1:nrequests_total), &
867 peer_rank(nrequests_recv + 1:nrequests_total), &
868 request_tag(nrequests_recv + 1:nrequests_total), &
870 siesta_struct%nelements_per_proc(:, nelements_dbcsr_recv), &
871 max_mpi_packet_size_dp)
875 IF (n_nonzero_elements_dbcsr > 0)
THEN
876 ALLOCATE (recv_buffer(n_nonzero_elements_dbcsr))
878 DO irequest = 1, nrequests_recv
879 CALL para_env%irecv(recv_buffer(offset_per_request(irequest) + 1: &
880 offset_per_request(irequest) + nelements_per_request(irequest)), &
881 peer_rank(irequest), requests(irequest), request_tag(irequest))
884 ALLOCATE (offset_per_proc(0:nprocs - 1), n_packed_elements_per_proc(0:nprocs - 1))
885 offset_per_proc(0) = 0
886 DO iproc = 1, nprocs - 1
887 offset_per_proc(iproc) = offset_per_proc(iproc - 1) + siesta_struct%nelements_per_proc(iproc - 1, nelements_dbcsr_send)
889 n_packed_elements_per_proc(:) = 0
892 node_offset = sum(siesta_struct%nnodes_per_proc(0:mepos - 1))
896 nnodes_proc = siesta_struct%nnodes_per_proc(mepos)
898 IF (n_nonzero_elements_siesta > 0)
THEN
899 ALLOCATE (send_buffer(n_nonzero_elements_siesta))
901 ALLOCATE (reorder_send_buffer(n_nonzero_elements_siesta))
902 DO irow_local = 1, siesta_struct%nrows
903 offset_send_mepos = siesta_struct%row_offset(irow_local)
904 DO icol_local = 1, siesta_struct%n_nonzero_cols(irow_local)
905 reorder_send_buffer(offset_send_mepos + siesta_struct%packed_index(offset_send_mepos + icol_local)) = &
906 matrix_siesta(offset_send_mepos + icol_local)
910 ALLOCATE (next_nonzero_element_offset(siesta_struct%nrows))
911 next_nonzero_element_offset(:) = 0
912 offset_send_mepos = 0
914 DO inode = 1,
SIZE(siesta_struct%nl_repl, 2)
915 irow_blk = siesta_struct%nl_repl(neighbor_list_iatom_index, inode)
916 icol_blk = siesta_struct%nl_repl(neighbor_list_jatom_index, inode)
917 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
918 image_siesta = siesta_struct%nl_repl(neighbor_list_siesta_image_index, inode)
919 image_siesta_transp = siesta_struct%nl_repl(neighbor_list_siesta_transp_image_index, inode)
921 nrows_local = row_blk_size(irow_blk)
922 ncols_local = col_blk_size(icol_blk)
923 first_row_minus_one = row_blk_offset(irow_blk) - 1
924 first_col_minus_one = col_blk_offset(icol_blk) - 1
926 IF (image_siesta > 0)
THEN
927 DO irow_local = 1, nrows_local
928 IF (do_distribute)
THEN
929#if defined(__SMEAGOL)
930 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
932 CALL cp_abort(__location__, &
933 "CP2K was compiled with no SMEAGOL support.")
936 IF (is_root_rank)
THEN
937 irow_proc = irow_local + first_row_minus_one
942 IF (irow_proc > 0)
THEN
943 offset_recv_mepos = siesta_struct%row_offset(irow_proc) + next_nonzero_element_offset(irow_proc)
944 send_buffer(offset_send_mepos + 1:offset_send_mepos + ncols_local) = &
945 reorder_send_buffer(offset_recv_mepos + 1:offset_recv_mepos + ncols_local)
946 offset_send_mepos = offset_send_mepos + ncols_local
947 next_nonzero_element_offset(irow_proc) = next_nonzero_element_offset(irow_proc) + ncols_local
953 IF (image_siesta_transp > 0)
THEN
954 DO icol_local = 1, ncols_local
955 IF (do_distribute)
THEN
956#if defined(__SMEAGOL)
957 CALL globaltolocalorb(icol_local + first_col_minus_one, mepos, nprocs, irow_proc)
959 CALL cp_abort(__location__, &
960 "CP2K was compiled with no SMEAGOL support.")
963 IF (is_root_rank)
THEN
964 irow_proc = icol_local + first_col_minus_one
969 IF (irow_proc > 0)
THEN
970 offset_recv_mepos = siesta_struct%row_offset(irow_proc) + next_nonzero_element_offset(irow_proc)
971 send_buffer(offset_send_mepos + 1:offset_send_mepos + nrows_local) = &
972 reorder_send_buffer(offset_recv_mepos + 1:offset_recv_mepos + nrows_local)
973 offset_send_mepos = offset_send_mepos + nrows_local
974 next_nonzero_element_offset(irow_proc) = next_nonzero_element_offset(irow_proc) + nrows_local
980 IF (debug_this_module)
THEN
981 DO irow_local = 1, siesta_struct%nrows
982 IF (siesta_struct%n_nonzero_cols(irow_local) /= next_nonzero_element_offset(irow_local))
THEN
983 CALL cp__a(__short_file__, __line__)
988 DEALLOCATE (next_nonzero_element_offset)
989 DEALLOCATE (reorder_send_buffer)
991 DO irequest = nrequests_recv + 1, nrequests_total
992 CALL para_env%isend(send_buffer(offset_per_request(irequest) + 1: &
993 offset_per_request(irequest) + nelements_per_request(irequest)), &
994 peer_rank(irequest), requests(irequest), request_tag(irequest))
999 offset_send_mepos = sum(siesta_struct%nelements_per_proc(0:mepos - 1, nelements_dbcsr_recv))
1001 offset_send_mepos = 0
1003 offset_recv_mepos = offset_per_proc(mepos)
1005 IF (debug_this_module)
THEN
1006 IF (siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_recv) /= &
1007 siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_send))
THEN
1008 CALL cp__a(__short_file__, __line__)
1012 IF (siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_send) > 0)
THEN
1013 recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_send)) = &
1014 send_buffer(offset_send_mepos + 1:offset_send_mepos + siesta_struct%nelements_per_proc(mepos, nelements_dbcsr_send))
1018 IF (nrequests_total > 0)
THEN
1021 DEALLOCATE (nelements_per_request, offset_per_request, peer_rank, requests, request_tag)
1024 IF (
ALLOCATED(send_buffer))
DEALLOCATE (send_buffer)
1026 iproc = siesta_struct%gather_root
1027 IF (n_nonzero_elements_dbcsr > 0)
THEN
1028 DO inode_proc = 1, nnodes_proc
1029 n_image_ind = siesta_struct%n_dbcsr_cell_images_to_merge(inode_proc)
1030 IF (n_image_ind > 0)
THEN
1031 inode = node_offset + inode_proc
1033 irow_blk = siesta_struct%nl_repl(neighbor_list_iatom_index, inode)
1034 icol_blk = siesta_struct%nl_repl(neighbor_list_jatom_index, inode)
1035 image_dbcsr = siesta_struct%nl_repl(neighbor_list_dbcsr_image_index, inode)
1036 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
1037 image_siesta = siesta_struct%nl_repl(neighbor_list_siesta_image_index, inode)
1038 image_siesta_transp = siesta_struct%nl_repl(neighbor_list_siesta_transp_image_index, inode)
1040 nrows_local = row_blk_size(irow_blk)
1041 ncols_local = col_blk_size(icol_blk)
1042 first_row_minus_one = row_blk_offset(irow_blk) - 1
1043 first_col_minus_one = col_blk_offset(icol_blk) - 1
1046 row=irow_blk, col=icol_blk, block=sm_block, found=found)
1049 IF (image_siesta > 0)
THEN
1050 DO irow_local = 1, nrows_local
1051 IF (do_distribute)
THEN
1052#if defined(__SMEAGOL)
1053 CALL whichnodeorb(irow_local + first_row_minus_one, nprocs, iproc)
1055 CALL cp_abort(__location__, &
1056 "CP2K was compiled with no SMEAGOL support.")
1060 IF (debug_this_module)
THEN
1061 cpassert(iproc >= 0 .AND. iproc < nprocs)
1062 IF (n_packed_elements_per_proc(iproc) + ncols_local > &
1063 siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send))
THEN
1064 CALL cp__a(__short_file__, __line__)
1068 offset_recv_mepos = offset_per_proc(iproc) + n_packed_elements_per_proc(iproc)
1069 sm_block(irow_local, 1:ncols_local) = recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + ncols_local)
1071 n_packed_elements_per_proc(iproc) = n_packed_elements_per_proc(iproc) + ncols_local
1076 IF (image_siesta_transp > 0)
THEN
1077 DO icol_local = 1, ncols_local
1078 IF (do_distribute)
THEN
1079#if defined(__SMEAGOL)
1080 CALL whichnodeorb(icol_local + first_col_minus_one, nprocs, iproc)
1082 CALL cp_abort(__location__, &
1083 "CP2K was compiled with no SMEAGOL support.")
1087 IF (debug_this_module)
THEN
1088 cpassert(iproc >= 0 .AND. iproc < nprocs)
1089 IF (n_packed_elements_per_proc(iproc) + nrows_local > &
1090 siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send))
THEN
1091 CALL cp__a(__short_file__, __line__)
1095 offset_recv_mepos = offset_per_proc(iproc) + n_packed_elements_per_proc(iproc)
1096 sm_block(1:nrows_local, icol_local) = recv_buffer(offset_recv_mepos + 1:offset_recv_mepos + nrows_local)
1098 n_packed_elements_per_proc(iproc) = n_packed_elements_per_proc(iproc) + nrows_local
1104 IF (debug_this_module)
THEN
1105 DO iproc = 0, nprocs - 1
1106 IF (n_packed_elements_per_proc(iproc) /= siesta_struct%nelements_per_proc(iproc, nelements_dbcsr_send))
THEN
1107 CALL cp__a(__short_file__, __line__)
1112 DEALLOCATE (recv_buffer)
1115 DEALLOCATE (offset_per_proc, n_packed_elements_per_proc)
1117 CALL timestop(handle)
1133 SUBROUTINE get_nnodes_local(nnodes_local, max_ijk_cell_image_local, max_ijk_cell_image, sab_nl, para_env, particle_coords, cell)
1134 INTEGER,
INTENT(out) :: nnodes_local
1135 INTEGER,
DIMENSION(3),
INTENT(out) :: max_ijk_cell_image_local
1136 INTEGER,
DIMENSION(3),
INTENT(inout) :: max_ijk_cell_image
1138 INTENT(in),
POINTER :: sab_nl
1140 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :), &
1141 INTENT(in) :: particle_coords
1142 TYPE(
cell_type),
INTENT(in),
POINTER :: cell
1144 CHARACTER(len=*),
PARAMETER :: routinen =
'get_nnodes_local'
1146 INTEGER :: handle, iatom, icoord, jatom
1147 INTEGER,
DIMENSION(3) :: cell_ijk, max_ijk_cell_image_tmp
1148 LOGICAL :: update_ncells
1149 REAL(kind=
dp),
DIMENSION(3) :: r_ij
1151 DIMENSION(:),
POINTER :: nl_iterator
1153 CALL timeset(routinen, handle)
1155 update_ncells = .false.
1157 IF (max_ijk_cell_image(icoord) >= 0)
THEN
1158 max_ijk_cell_image_tmp(icoord) = max_ijk_cell_image(icoord)
1160 max_ijk_cell_image_tmp(icoord) = huge(max_ijk_cell_image_tmp(icoord))
1161 update_ncells = .true.
1166 max_ijk_cell_image_local(:) = 0
1171 CALL get_negf_cell_ijk(cell_ijk, r_ij, r_i=particle_coords(1:3, iatom), r_j=particle_coords(1:3, jatom), cell=cell)
1172 cell_ijk(1:3) = abs(cell_ijk(1:3))
1174 IF (cell_ijk(1) <= max_ijk_cell_image_tmp(1) .AND. cell_ijk(2) <= max_ijk_cell_image_tmp(2) .AND. &
1175 cell_ijk(3) <= max_ijk_cell_image_tmp(3))
THEN
1176 nnodes_local = nnodes_local + 1
1177 max_ijk_cell_image_local(1:3) = max(max_ijk_cell_image_local(1:3), cell_ijk(1:3))
1182 IF (update_ncells)
THEN
1183 max_ijk_cell_image_tmp(1:3) = max_ijk_cell_image_local(1:3)
1184 CALL para_env%max(max_ijk_cell_image_tmp)
1186 IF (max_ijk_cell_image(icoord) < 0)
THEN
1187 max_ijk_cell_image(icoord) = max_ijk_cell_image_tmp(icoord)
1192 CALL timestop(handle)
1193 END SUBROUTINE get_nnodes_local
1210 SUBROUTINE get_nl_nodes_local(nl_local, max_ijk_cell_image_local, max_ijk_cell_image, sab_nl, particle_coords, &
1211 cell, cell_to_index, do_merge, node_merged_indices, k_cells)
1212 INTEGER,
DIMENSION(:, :),
INTENT(out) :: nl_local
1213 INTEGER,
DIMENSION(3),
INTENT(in) :: max_ijk_cell_image_local, &
1216 INTENT(in),
POINTER :: sab_nl
1217 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :), &
1218 INTENT(in) :: particle_coords
1219 TYPE(
cell_type),
INTENT(in),
POINTER :: cell
1220 INTEGER,
DIMENSION(:, :, :),
INTENT(in),
POINTER :: cell_to_index
1221 LOGICAL,
INTENT(in) :: do_merge
1222 INTEGER(kind=int_8),
DIMENSION(:),
INTENT(out) :: node_merged_indices
1223 INTEGER,
DIMENSION(:),
INTENT(out) :: k_cells
1225 CHARACTER(len=*),
PARAMETER :: routinen =
'get_nl_nodes_local'
1227 INTEGER :: handle, iatom, icol_blk, image, inode, &
1228 irow_blk, jatom, natoms
1229 INTEGER(kind=8),
DIMENSION(2) :: ncells_siesta_local
1230 INTEGER,
DIMENSION(2) :: ncells_siesta
1231 INTEGER,
DIMENSION(3) :: cell_ijk_abs, cell_ijk_dbcsr, &
1233 LOGICAL :: do_symmetric
1234 REAL(kind=
dp),
DIMENSION(3) :: r_ij
1236 DIMENSION(:),
POINTER :: nl_iterator
1238 CALL timeset(routinen, handle)
1240 natoms =
SIZE(particle_coords, 2)
1242 ncells_siesta(1:2) = 2*max_ijk_cell_image(1:2) + 1
1244 ncells_siesta_local(1:2) = int(2*max_ijk_cell_image_local(1:2) + 1, kind=
int_8)
1249 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_ijk_dbcsr, r=r_ij)
1250 CALL get_negf_cell_ijk(cell_ijk_abs, r_ij, r_i=particle_coords(1:3, iatom), r_j=particle_coords(1:3, jatom), cell=cell)
1252 IF (abs(cell_ijk_abs(1)) <= max_ijk_cell_image(1) .AND. abs(cell_ijk_abs(2)) <= max_ijk_cell_image(2) .AND. &
1253 abs(cell_ijk_abs(3)) <= max_ijk_cell_image(3))
THEN
1259 nl_local(neighbor_list_dbcsr_image_index, inode) = image
1261 IF (do_symmetric .AND. iatom > jatom)
THEN
1264 cell_ijk_abs(1:3) = -cell_ijk_abs(1:3)
1270 nl_local(neighbor_list_iatom_index, inode) = irow_blk
1271 nl_local(neighbor_list_jatom_index, inode) = icol_blk
1273 cell_ijk_siesta(1:3) = index_in_canonical_enumeration(cell_ijk_abs(1:3))
1276 node_merged_indices(inode) = (((cell_ijk_siesta(2) - 1)*ncells_siesta_local(1) + cell_ijk_siesta(1) - 1)* &
1277 int(natoms, kind=
int_8) + icol_blk - 1)*int(natoms, kind=
int_8) + int(irow_blk - 1, kind=
int_8)
1278 image = cell_ijk_siesta(1) + ncells_siesta(1)*(cell_ijk_siesta(2) - 1)
1280 node_merged_indices(inode) = ((((cell_ijk_siesta(3) - 1)*ncells_siesta_local(2) + &
1281 cell_ijk_siesta(2) - 1)*ncells_siesta_local(1) + cell_ijk_siesta(1) - 1)* &
1282 int(natoms, kind=
int_8) + icol_blk - 1)*int(natoms, kind=
int_8) + int(irow_blk - 1, kind=
int_8)
1283 image = cell_ijk_siesta(1) + ncells_siesta(1)*(cell_ijk_siesta(2) - 1 + ncells_siesta(2)*(cell_ijk_siesta(3) - 1))
1285 k_cells(inode) = cell_ijk_siesta(3)
1286 nl_local(neighbor_list_siesta_image_index, inode) = image
1288 IF (do_symmetric .AND. irow_blk /= icol_blk)
THEN
1289 cell_ijk_abs(1:3) = -cell_ijk_abs(1:3)
1290 cell_ijk_siesta(1:3) = index_in_canonical_enumeration(cell_ijk_abs(1:3))
1291 IF (do_merge) cell_ijk_siesta(3) = 1
1292 nl_local(neighbor_list_siesta_transp_image_index, inode) = &
1293 cell_ijk_siesta(1) + ncells_siesta(1)*(cell_ijk_siesta(2) - 1 + ncells_siesta(2)*(cell_ijk_siesta(3) - 1))
1295 nl_local(neighbor_list_siesta_transp_image_index, inode) = 0
1301 IF (debug_this_module)
THEN
1302 cpassert(
SIZE(nl_local, 2) == inode)
1305 CALL timestop(handle)
1306 END SUBROUTINE get_nl_nodes_local
1322 SUBROUTINE replicate_neighbour_list(repl_nl, n_dbcsr_cell_images_to_merge, dbcsr_cell_image_to_merge, &
1323 nnodes_per_proc, max_ijk_cell_image, sab_nl, para_env, particle_coords, &
1324 cell, cell_to_index, do_merge)
1325 INTEGER,
ALLOCATABLE,
DIMENSION(:, :), &
1326 INTENT(inout) :: repl_nl
1327 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(inout) :: n_dbcsr_cell_images_to_merge, &
1328 dbcsr_cell_image_to_merge
1329 INTEGER,
DIMENSION(0:),
INTENT(out) :: nnodes_per_proc
1330 INTEGER,
DIMENSION(3),
INTENT(inout) :: max_ijk_cell_image
1332 INTENT(in),
POINTER :: sab_nl
1334 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :), &
1335 INTENT(in) :: particle_coords
1336 TYPE(
cell_type),
INTENT(in),
POINTER :: cell
1337 INTEGER,
DIMENSION(:, :, :),
INTENT(in),
POINTER :: cell_to_index
1338 LOGICAL,
INTENT(in) :: do_merge
1340 CHARACTER(len=*),
PARAMETER :: routinen =
'replicate_neighbour_list'
1342 INTEGER :: handle, inode, iproc, kcell_closest, &
1343 nnodes_local, nnodes_merged, &
1344 nnodes_repl, offset_inode
1345 INTEGER(kind=int_8),
ALLOCATABLE,
DIMENSION(:) :: node_merged_indices
1346 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: inodes_orig, k_cells
1347 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: nl_local
1348 INTEGER,
DIMENSION(3) :: max_ijk_cell_image_local
1350 CALL timeset(routinen, handle)
1351 cpassert(.NOT.
ALLOCATED(repl_nl))
1352 cpassert(.NOT.
ALLOCATED(n_dbcsr_cell_images_to_merge))
1353 cpassert(.NOT.
ALLOCATED(dbcsr_cell_image_to_merge))
1355 CALL get_nnodes_local(nnodes_local, max_ijk_cell_image_local, max_ijk_cell_image, sab_nl, para_env, particle_coords, cell)
1357 nnodes_per_proc(:) = 0
1359 IF (nnodes_local > 0)
THEN
1360 ALLOCATE (nl_local(neighbor_list_dim1, nnodes_local))
1361 ALLOCATE (node_merged_indices(nnodes_local))
1362 ALLOCATE (k_cells(nnodes_local))
1363 CALL get_nl_nodes_local(nl_local, max_ijk_cell_image_local, max_ijk_cell_image, sab_nl, particle_coords, cell, &
1364 cell_to_index, do_merge, node_merged_indices, k_cells)
1366 ALLOCATE (inodes_orig(nnodes_local))
1367 CALL sort(node_merged_indices, nnodes_local, inodes_orig)
1370 DO inode = 2, nnodes_local
1371 IF (node_merged_indices(inode) > node_merged_indices(inode - 1)) nnodes_merged = nnodes_merged + 1
1377 nnodes_per_proc(para_env%mepos) = nnodes_merged
1378 CALL para_env%sum(nnodes_per_proc)
1380 nnodes_repl = sum(nnodes_per_proc(:))
1381 ALLOCATE (repl_nl(neighbor_list_dim1, nnodes_repl))
1383 IF (nnodes_local > 0)
THEN
1384 IF (para_env%mepos > 0)
THEN
1385 offset_inode = sum(nnodes_per_proc(0:para_env%mepos - 1))
1390 ALLOCATE (n_dbcsr_cell_images_to_merge(nnodes_merged))
1391 ALLOCATE (dbcsr_cell_image_to_merge(nnodes_local))
1392 n_dbcsr_cell_images_to_merge(:) = 0
1395 repl_nl(:, offset_inode + 1) = nl_local(:, inodes_orig(1))
1396 n_dbcsr_cell_images_to_merge(1) = 1
1397 dbcsr_cell_image_to_merge(1) = nl_local(neighbor_list_dbcsr_image_index, inodes_orig(1))
1398 kcell_closest = k_cells(inodes_orig(1))
1399 DO inode = 2, nnodes_local
1400 IF (node_merged_indices(inode) > node_merged_indices(inode - 1))
THEN
1401 nnodes_merged = nnodes_merged + 1
1402 repl_nl(:, offset_inode + nnodes_merged) = nl_local(:, inodes_orig(inode))
1404 kcell_closest = k_cells(inodes_orig(inode))
1406 IF (abs(k_cells(inodes_orig(inode))) < abs(kcell_closest) .OR. &
1407 (abs(k_cells(inodes_orig(inode))) == abs(kcell_closest) .AND. kcell_closest < 0))
THEN
1408 repl_nl(:, offset_inode + nnodes_merged) = nl_local(:, inodes_orig(inode))
1409 kcell_closest = k_cells(inodes_orig(inode))
1412 dbcsr_cell_image_to_merge(inode) = nl_local(neighbor_list_dbcsr_image_index, inodes_orig(inode))
1413 n_dbcsr_cell_images_to_merge(nnodes_merged) = n_dbcsr_cell_images_to_merge(nnodes_merged) + 1
1416 IF (debug_this_module)
THEN
1417 cpassert(sum(n_dbcsr_cell_images_to_merge) == nnodes_local)
1420 DEALLOCATE (inodes_orig)
1421 DEALLOCATE (node_merged_indices, k_cells)
1422 DEALLOCATE (nl_local)
1425 IF (para_env%num_pe > 1)
THEN
1427 DO iproc = 0, para_env%num_pe - 1
1428 IF (nnodes_per_proc(iproc) > 0)
THEN
1429 CALL para_env%bcast(repl_nl(:, offset_inode + 1:offset_inode + nnodes_per_proc(iproc)), iproc)
1430 offset_inode = offset_inode + nnodes_per_proc(iproc)
1435 CALL timestop(handle)
1436 END SUBROUTINE replicate_neighbour_list
1451 SUBROUTINE count_remote_dbcsr_elements(nelements_per_proc, nnodes_per_proc, nl_repl, matrix_dbcsr_kp, &
1452 symmetric, para_env, gather_root)
1453 INTEGER(kind=int_8),
DIMENSION(0:, :),
INTENT(out) :: nelements_per_proc
1454 INTEGER,
DIMENSION(0:),
INTENT(in) :: nnodes_per_proc
1455 INTEGER,
DIMENSION(:, :),
INTENT(in) :: nl_repl
1456 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(in) :: matrix_dbcsr_kp
1457 LOGICAL,
INTENT(in) :: symmetric
1459 INTEGER,
INTENT(in) :: gather_root
1461 CHARACTER(len=*),
PARAMETER :: routinen =
'count_remote_dbcsr_elements'
1463 INTEGER :: first_row_minus_one, handle, icol_blk, image, image_transp, inode, inode_proc, &
1464 iproc, iproc_orb, irow_blk, irow_local, mepos, ncols_blk, ncols_local, nnodes_proc, &
1465 nprocs, nrows_blk, nrows_local, offset_inode
1466 INTEGER,
DIMENSION(:),
POINTER :: col_blk_offset, col_blk_size, &
1467 row_blk_offset, row_blk_size
1468 LOGICAL :: do_distribute
1470 CALL timeset(routinen, handle)
1471 nelements_per_proc(:, :) = 0
1472 mepos = para_env%mepos
1473 nprocs = para_env%num_pe
1474 do_distribute = gather_root < 0
1475 IF (debug_this_module)
THEN
1476 cpassert(
SIZE(nnodes_per_proc) == nprocs)
1480 nblkrows_total=nrows_blk, nblkcols_total=ncols_blk, &
1481 row_blk_size=row_blk_size, col_blk_size=col_blk_size, &
1482 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset)
1485 iproc_orb = gather_root
1486 DO iproc = lbound(nnodes_per_proc, 1), ubound(nnodes_per_proc, 1)
1487 nnodes_proc = nnodes_per_proc(iproc)
1488 DO inode_proc = 1, nnodes_proc
1489 inode = inode_proc + offset_inode
1491 irow_blk = nl_repl(neighbor_list_iatom_index, inode)
1492 icol_blk = nl_repl(neighbor_list_jatom_index, inode)
1493 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
1494 image = nl_repl(neighbor_list_siesta_image_index, inode)
1495 image_transp = nl_repl(neighbor_list_siesta_transp_image_index, inode)
1498 nrows_local = row_blk_size(irow_blk)
1499 first_row_minus_one = row_blk_offset(irow_blk) - 1
1500 ncols_local = col_blk_size(icol_blk)
1501 DO irow_local = 1, nrows_local
1502 IF (do_distribute)
THEN
1503#if defined(__SMEAGOL)
1504 CALL whichnodeorb(irow_local + first_row_minus_one, nprocs, iproc_orb)
1506 CALL cp_abort(__location__, &
1507 "CP2K was compiled with no SMEAGOL support.")
1510 IF (iproc_orb == mepos)
THEN
1511 nelements_per_proc(iproc, nelements_dbcsr_recv) = nelements_per_proc(iproc, nelements_dbcsr_recv) + &
1515 IF (iproc == mepos)
THEN
1516 nelements_per_proc(iproc_orb, nelements_dbcsr_send) = nelements_per_proc(iproc_orb, nelements_dbcsr_send) + &
1523 IF (image_transp > 0)
THEN
1524 nrows_local = col_blk_size(icol_blk)
1525 first_row_minus_one = col_blk_offset(icol_blk) - 1
1526 ncols_local = row_blk_size(irow_blk)
1527 DO irow_local = 1, nrows_local
1528 IF (do_distribute)
THEN
1529#if defined(__SMEAGOL)
1530 CALL whichnodeorb(irow_local + first_row_minus_one, nprocs, iproc_orb)
1532 CALL cp_abort(__location__, &
1533 "CP2K was compiled with no SMEAGOL support.")
1536 IF (iproc_orb == mepos)
THEN
1537 nelements_per_proc(iproc, nelements_dbcsr_recv) = nelements_per_proc(iproc, nelements_dbcsr_recv) + &
1541 IF (iproc == mepos)
THEN
1542 nelements_per_proc(iproc_orb, nelements_dbcsr_send) = nelements_per_proc(iproc_orb, nelements_dbcsr_send) + &
1548 offset_inode = offset_inode + nnodes_proc
1550 CALL timestop(handle)
1551 END SUBROUTINE count_remote_dbcsr_elements
1568 SUBROUTINE get_nonzero_element_indices(n_nonzero_cols, row_offset, col_index, packed_index, &
1569 nl_repl, matrix_dbcsr_kp, symmetric, para_env, gather_root)
1570 INTEGER,
DIMENSION(:),
INTENT(out) :: n_nonzero_cols, row_offset, col_index, &
1572 INTEGER,
DIMENSION(:, :),
INTENT(in) :: nl_repl
1573 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(in) :: matrix_dbcsr_kp
1574 LOGICAL,
INTENT(in) :: symmetric
1576 INTEGER,
INTENT(in) :: gather_root
1578 CHARACTER(len=*),
PARAMETER :: routinen =
'get_nonzero_element_indices'
1580 INTEGER :: first_col_minus_one, first_row_minus_one, handle, icol_blk, icol_local, &
1581 icol_offset, image, image_transp, inode, irow_blk, irow_local, irow_proc, mepos, &
1582 ncols_blk, ncols_local, ncols_total, nnodes, nprocs, nrows_blk, nrows_local, nrows_total
1583 INTEGER,
DIMENSION(:),
POINTER :: col_blk_offset, col_blk_size, &
1584 row_blk_offset, row_blk_size
1585 LOGICAL :: do_distribute, is_root_rank
1587 CALL timeset(routinen, handle)
1588 n_nonzero_cols(:) = 0
1589 mepos = para_env%mepos
1590 nprocs = para_env%num_pe
1591 do_distribute = gather_root < 0
1592 is_root_rank = gather_root == mepos
1595 nblkrows_total=nrows_blk, nblkcols_total=ncols_blk, &
1596 nfullrows_total=nrows_total, nfullcols_total=ncols_total, &
1597 row_blk_size=row_blk_size, col_blk_size=col_blk_size, &
1598 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset)
1600 nnodes =
SIZE(nl_repl, 2)
1601 DO inode = 1, nnodes
1602 irow_blk = nl_repl(neighbor_list_iatom_index, inode)
1603 icol_blk = nl_repl(neighbor_list_jatom_index, inode)
1604 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
1605 image = nl_repl(neighbor_list_siesta_image_index, inode)
1606 image_transp = nl_repl(neighbor_list_siesta_transp_image_index, inode)
1609 nrows_local = row_blk_size(irow_blk)
1610 first_row_minus_one = row_blk_offset(irow_blk) - 1
1611 ncols_local = col_blk_size(icol_blk)
1612 DO irow_local = 1, nrows_local
1613 IF (do_distribute)
THEN
1614#if defined(__SMEAGOL)
1615 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
1617 CALL cp_abort(__location__, &
1618 "CP2K was compiled with no SMEAGOL support.")
1621 IF (is_root_rank)
THEN
1622 irow_proc = irow_local + first_row_minus_one
1627 IF (irow_proc > 0)
THEN
1628 n_nonzero_cols(irow_proc) = n_nonzero_cols(irow_proc) + ncols_local
1634 IF (image_transp > 0)
THEN
1635 nrows_local = col_blk_size(icol_blk)
1636 first_row_minus_one = col_blk_offset(icol_blk) - 1
1637 ncols_local = row_blk_size(irow_blk)
1638 DO irow_local = 1, nrows_local
1639 IF (do_distribute)
THEN
1640#if defined(__SMEAGOL)
1641 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
1643 CALL cp_abort(__location__, &
1644 "CP2K was compiled with no SMEAGOL support.")
1647 IF (is_root_rank)
THEN
1648 irow_proc = irow_local + first_row_minus_one
1653 IF (irow_proc > 0)
THEN
1654 n_nonzero_cols(irow_proc) = n_nonzero_cols(irow_proc) + ncols_local
1661 DO irow_local = 1,
SIZE(n_nonzero_cols) - 1
1662 row_offset(irow_local + 1) = row_offset(irow_local) + n_nonzero_cols(irow_local)
1665 n_nonzero_cols(:) = 0
1667 DO inode = 1, nnodes
1668 irow_blk = nl_repl(neighbor_list_iatom_index, inode)
1669 icol_blk = nl_repl(neighbor_list_jatom_index, inode)
1670 cpassert(irow_blk <= icol_blk .OR. (.NOT. symmetric))
1671 image = nl_repl(neighbor_list_siesta_image_index, inode)
1672 image_transp = nl_repl(neighbor_list_siesta_transp_image_index, inode)
1675 nrows_local = row_blk_size(irow_blk)
1676 first_row_minus_one = row_blk_offset(irow_blk) - 1
1677 ncols_local = col_blk_size(icol_blk)
1678 first_col_minus_one = col_blk_offset(icol_blk) + (image - 1)*ncols_total - 1
1679 DO irow_local = 1, nrows_local
1680 IF (do_distribute)
THEN
1681#if defined(__SMEAGOL)
1682 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
1684 CALL cp_abort(__location__, &
1685 "CP2K was compiled with no SMEAGOL support.")
1688 IF (is_root_rank)
THEN
1689 irow_proc = irow_local + first_row_minus_one
1694 IF (irow_proc > 0)
THEN
1695 icol_offset = row_offset(irow_proc) + n_nonzero_cols(irow_proc)
1696 DO icol_local = 1, ncols_local
1697 col_index(icol_offset + icol_local) = first_col_minus_one + icol_local
1699 n_nonzero_cols(irow_proc) = n_nonzero_cols(irow_proc) + ncols_local
1705 IF (image_transp > 0)
THEN
1706 nrows_local = col_blk_size(icol_blk)
1707 first_row_minus_one = col_blk_offset(icol_blk) - 1
1708 ncols_local = row_blk_size(irow_blk)
1709 first_col_minus_one = row_blk_offset(irow_blk) + (image_transp - 1)*nrows_total - 1
1710 DO irow_local = 1, nrows_local
1711 IF (do_distribute)
THEN
1712#if defined(__SMEAGOL)
1713 CALL globaltolocalorb(irow_local + first_row_minus_one, mepos, nprocs, irow_proc)
1715 CALL cp_abort(__location__, &
1716 "CP2K was compiled with no SMEAGOL support.")
1719 IF (is_root_rank)
THEN
1720 irow_proc = irow_local + first_row_minus_one
1725 IF (irow_proc > 0)
THEN
1726 icol_offset = row_offset(irow_proc) + n_nonzero_cols(irow_proc)
1727 DO icol_local = 1, ncols_local
1728 col_index(icol_offset + icol_local) = first_col_minus_one + icol_local
1730 n_nonzero_cols(irow_proc) = n_nonzero_cols(irow_proc) + ncols_local
1736 IF (
SIZE(n_nonzero_cols) > 0)
THEN
1737 DO irow_local = 1,
SIZE(n_nonzero_cols)
1738 CALL sort(col_index(row_offset(irow_local) + 1:row_offset(irow_local) + n_nonzero_cols(irow_local)), &
1739 n_nonzero_cols(irow_local), &
1740 packed_index(row_offset(irow_local) + 1:row_offset(irow_local) + n_nonzero_cols(irow_local)))
1744 CALL timestop(handle)
1745 END SUBROUTINE get_nonzero_element_indices
1758 SUBROUTINE get_negf_cell_ijk(cell_ijk, r_ij, r_i, r_j, cell)
1759 INTEGER,
DIMENSION(3),
INTENT(out) :: cell_ijk
1760 REAL(kind=
dp),
DIMENSION(3),
INTENT(in) :: r_ij, r_i, r_j
1761 TYPE(
cell_type),
INTENT(in),
POINTER :: cell
1763 REAL(kind=
dp),
DIMENSION(3) :: coords_scaled, r
1765 r(:) = r_ij(:) + r_i(:) - r_j(:)
1767 cell_ijk(:) = nint(coords_scaled(:))
1768 END SUBROUTINE get_negf_cell_ijk
1779 ELEMENTAL FUNCTION index_in_canonical_enumeration(inum)
RESULT(ind)
1780 INTEGER,
INTENT(in) :: inum
1783 INTEGER :: inum_abs, is_non_positive
1785 inum_abs = abs(inum)
1787 is_non_positive = merge(1, 0, inum <= 0)
1792 ind = 2*inum_abs + is_non_positive
1793 END FUNCTION index_in_canonical_enumeration
1804 ELEMENTAL FUNCTION number_from_canonical_enumeration(ind)
RESULT(inum)
1805 INTEGER,
INTENT(in) :: ind
1812 inum = sign(ind/2, -mod(ind, 2))
1813 END FUNCTION number_from_canonical_enumeration
1823 PURE SUBROUTINE pbc_0_1(r_pbc, r, cell)
1824 REAL(kind=
dp),
DIMENSION(3),
INTENT(out) :: r_pbc
1825 REAL(kind=
dp),
DIMENSION(3),
INTENT(in) :: r
1826 TYPE(
cell_type),
INTENT(in),
POINTER :: cell
1828 REAL(kind=
dp),
DIMENSION(3) :: s
1830 IF (cell%orthorhombic)
THEN
1831 r_pbc(1) = r(1) - cell%hmat(1, 1)*cell%perd(1)*real(floor(cell%h_inv(1, 1)*r(1)),
dp)
1832 r_pbc(2) = r(2) - cell%hmat(2, 2)*cell%perd(2)*real(floor(cell%h_inv(2, 2)*r(2)),
dp)
1833 r_pbc(3) = r(3) - cell%hmat(3, 3)*cell%perd(3)*real(floor(cell%h_inv(3, 3)*r(3)),
dp)
1835 s(1) = cell%h_inv(1, 1)*r(1) + cell%h_inv(1, 2)*r(2) + cell%h_inv(1, 3)*r(3)
1836 s(2) = cell%h_inv(2, 1)*r(1) + cell%h_inv(2, 2)*r(2) + cell%h_inv(2, 3)*r(3)
1837 s(3) = cell%h_inv(3, 1)*r(1) + cell%h_inv(3, 2)*r(2) + cell%h_inv(3, 3)*r(3)
1838 s(1) = s(1) - cell%perd(1)*real(floor(s(1)),
dp)
1839 s(2) = s(2) - cell%perd(2)*real(floor(s(2)),
dp)
1840 s(3) = s(3) - cell%perd(3)*real(floor(s(3)),
dp)
1841 r_pbc(1) = cell%hmat(1, 1)*s(1) + cell%hmat(1, 2)*s(2) + cell%hmat(1, 3)*s(3)
1842 r_pbc(2) = cell%hmat(2, 1)*s(1) + cell%hmat(2, 2)*s(2) + cell%hmat(2, 3)*s(3)
1843 r_pbc(3) = cell%hmat(3, 1)*s(1) + cell%hmat(3, 2)*s(2) + cell%hmat(3, 3)*s(3)
1845 END SUBROUTINE pbc_0_1
1856 PURE FUNCTION get_number_of_mpi_sendrecv_requests(mepos, nelements_per_proc, max_nelements_per_packet)
RESULT(nrequests)
1857 INTEGER,
INTENT(in) :: mepos
1858 INTEGER(kind=int_8),
DIMENSION(0:),
INTENT(in) :: nelements_per_proc
1859 INTEGER(kind=int_8),
INTENT(in) :: max_nelements_per_packet
1860 INTEGER :: nrequests
1865 DO iproc = lbound(nelements_per_proc, 1), ubound(nelements_per_proc, 1)
1867 IF (iproc /= mepos)
THEN
1868 nrequests = nrequests + int(nelements_per_proc(iproc)/max_nelements_per_packet)
1869 IF (mod(nelements_per_proc(iproc), max_nelements_per_packet) > 0) &
1870 nrequests = nrequests + 1
1873 END FUNCTION get_number_of_mpi_sendrecv_requests
1885 SUBROUTINE assign_nonzero_elements_to_requests(element_offset, nelements_per_request, peer_rank, tag, &
1886 mepos, nelements_per_proc, max_nelements_per_packet)
1887 INTEGER(kind=int_8),
DIMENSION(:),
INTENT(out) :: element_offset, nelements_per_request
1888 INTEGER,
DIMENSION(:),
INTENT(out) :: peer_rank, tag
1889 INTEGER,
INTENT(in) :: mepos
1890 INTEGER(kind=int_8),
DIMENSION(0:),
INTENT(in) :: nelements_per_proc
1891 INTEGER(kind=int_8),
INTENT(in) :: max_nelements_per_packet
1893 INTEGER :: iproc, irequest, nrequests, &
1895 INTEGER(kind=int_8) :: element_offset_tmp, nelements
1898 element_offset_tmp = 0
1899 DO iproc = lbound(nelements_per_proc, 1), ubound(nelements_per_proc, 1)
1900 IF (iproc /= mepos)
THEN
1901 nrequests = int(nelements_per_proc(iproc)/max_nelements_per_packet)
1902 IF (mod(nelements_per_proc(iproc), max_nelements_per_packet) > 0) nrequests = nrequests + 1
1903 cpassert(nrequests <= max_mpi_rank + 1)
1904 IF (nrequests > 0)
THEN
1905 nelements = nelements_per_proc(iproc)/nrequests
1906 IF (nelements_per_proc(iproc) - nelements*nrequests > 0) nelements = nelements + 1
1907 cpassert(nelements <= max_nelements_per_packet)
1909 DO irequest = 1, nrequests
1910 element_offset(request_offset + irequest) = (irequest - 1)*nelements + element_offset_tmp
1911 IF (irequest < nrequests)
THEN
1912 nelements_per_request(request_offset + irequest) = nelements
1914 nelements_per_request(request_offset + irequest) = nelements_per_proc(iproc) - nelements*(nrequests - 1)
1916 peer_rank(request_offset + irequest) = iproc
1917 tag(request_offset + irequest) = irequest - 1
1920 request_offset = request_offset + nrequests
1922 element_offset_tmp = element_offset_tmp + nelements_per_proc(iproc)
1925 IF (debug_this_module)
THEN
1926 cpassert(
SIZE(element_offset) == request_offset)
1927 cpassert(
SIZE(nelements_per_request) == request_offset)
1928 cpassert(
SIZE(peer_rank) == request_offset)
1929 cpassert(
SIZE(tag) == request_offset)
1931 END SUBROUTINE assign_nonzero_elements_to_requests
Handles all functions related to the CELL.
subroutine, public scaled_to_real(r, s, cell)
Transform scaled cell coordinates real coordinates. r=h*s.
subroutine, public real_to_scaled(s, r, cell)
Transform real to scaled cell coordinates. s=h_inv*r.
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_set(matrix, alpha)
...
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp_size
integer, parameter, public dp
Interface to the message passing library MPI.
Helper routines to manipulate with matrices.
pure integer function, public get_index_by_cell(cell, cell_to_index)
Helper routine to obtain index of a DBCSR matrix image by its unit cell replica. Can be used with any...
Define the data structure for the particle information.
Define the neighbor list data types and the corresponding functionality.
subroutine, public neighbor_list_iterator_create(iterator_set, nl, search, nthread)
Neighbor list iterator functions.
subroutine, public neighbor_list_iterator_release(iterator_set)
...
subroutine, public get_neighbor_list_set_p(neighbor_list_sets, nlist, symmetric)
Return the components of the first neighbor list set.
integer function, public neighbor_list_iterate(iterator_set, mepos)
...
subroutine, public get_iterator_info(iterator_set, mepos, ikind, jkind, nkind, ilist, nlist, inode, nnode, iatom, jatom, r, cell)
...
types that represent a quickstep subsys
subroutine, public qs_subsys_get(subsys, atomic_kinds, atomic_kind_set, particles, particle_set, local_particles, molecules, molecule_set, molecule_kinds, molecule_kind_set, local_molecules, para_env, colvar_p, shell_particles, core_particles, gci, multipoles, natom, nparticle, ncore, nshell, nkind, atprop, virial, results, cell, cell_ref, use_ref_cell, energy, force, qs_kind_set, cp_subsys, nelectron_total, nelectron_spin)
...
Routines to convert sparse matrices between DBCSR (distributed-blocks compressed sparse rows) and SIE...
subroutine, public siesta_struct_release(siesta_struct)
Release a SIESTA matrix structure.
subroutine, public siesta_struct_create(siesta_struct, matrix_dbcsr_kp, subsys, cell_to_index, sab_nl, para_env, max_ij_cell_image, do_merge, gather_root)
Map non-zero matrix blocks between sparse matrices in DBCSR and SIESTA formats.
subroutine, public convert_distributed_siesta_to_dbcsr(matrix_dbcsr_kp, matrix_siesta, siesta_struct, para_env)
Convert matrix from DBCSR to sparse SIESTA format.
subroutine, public convert_dbcsr_to_distributed_siesta(matrix_siesta, matrix_dbcsr_kp, siesta_struct, para_env)
Convert matrix from DBCSR to sparse SIESTA format.
All kind of helpful little routines.
Type defining parameters related to the simulation cell.
stores all the informations relevant to an mpi environment
Sparsity pattern of replicated SIESTA compressed sparse column (CSC) matrices.