31 USE dbcsr_api,
ONLY: dbcsr_p_type,&
33 dbcsr_type_no_symmetry
45 USE mp2_types,
ONLY: integ_mat_buffer_type
47 #include "./base/base_uses.f90"
54 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: map
57 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'rpa_communication'
80 homo, virtual, mo_coeff_o, ngroup, my_group_L_start, my_group_L_end, &
82 TYPE(cp_fm_type),
INTENT(INOUT) :: fm_mat_gamma_3
83 TYPE(dbcsr_p_type),
DIMENSION(:),
POINTER :: dbcsr_gamma_3
84 TYPE(mp_para_env_type),
INTENT(IN) :: para_env_rpa
85 TYPE(mp_para_env_type),
INTENT(IN),
POINTER :: para_env_sub
86 INTEGER,
INTENT(IN) :: homo, virtual
87 TYPE(dbcsr_type),
POINTER :: mo_coeff_o
88 INTEGER,
INTENT(IN) :: ngroup, my_group_l_start, &
89 my_group_l_end, my_group_l_size
91 CHARACTER(LEN=*),
PARAMETER :: routinen =
'gamma_fm_to_dbcsr'
93 INTEGER :: dimen_ia, dummy_proc, handle, i_global, i_local, iaia, iib, iii, itmp(2), &
94 j_global, j_local, jjb, jjj, kkb, my_ia_end, my_ia_size, my_ia_start, mypcol, myprow, &
95 ncol_block, ncol_local, npcol, nprow, nrow_block, nrow_local, number_of_rec, &
96 number_of_send, proc_receive, proc_send, proc_shift, rec_counter, rec_iaia_end, &
97 rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, ref_send_pcol, ref_send_prow, &
98 send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
99 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: iii_vet, map_rec_size, map_send_size
100 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
101 group_grid_2_mepos, indices_map_my, &
102 mepos_2_grid, mepos_2_grid_group
103 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
104 REAL(kind=
dp) :: part_ia
105 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: gamma_2d
106 TYPE(cp_blacs_env_type),
POINTER :: blacs_env
107 TYPE(cp_fm_struct_type),
POINTER :: fm_struct
108 TYPE(cp_fm_type) :: fm_ia
109 TYPE(group_dist_d1_type) :: gd_ia
110 TYPE(index_map),
ALLOCATABLE,
DIMENSION(:) :: indices_rec
111 TYPE(integ_mat_buffer_type),
ALLOCATABLE, &
112 DIMENSION(:) :: buffer_rec, buffer_send
113 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: req_send
115 CALL timeset(routinen, handle)
117 dimen_ia = virtual*homo
120 CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
121 CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
126 group_grid_2_mepos, mepos_2_grid_group)
129 CALL fm2array(gamma_2d, my_ia_size, my_ia_start, my_ia_end, &
130 my_group_l_size, my_group_l_start, my_group_l_end, &
131 group_grid_2_mepos, mepos_2_grid_group, &
132 para_env_sub%num_pe, ngroup, &
142 ncol_global=virtual, para_env=para_env_sub)
152 nrow_local=nrow_local, &
153 ncol_local=ncol_local, &
154 row_indices=row_indices, &
155 col_indices=col_indices, &
156 nrow_block=nrow_block, &
157 ncol_block=ncol_block)
158 myprow = fm_ia%matrix_struct%context%mepos(1)
159 mypcol = fm_ia%matrix_struct%context%mepos(2)
160 nprow = fm_ia%matrix_struct%context%num_pe(1)
161 npcol = fm_ia%matrix_struct%context%num_pe(2)
164 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
166 ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
168 grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
170 CALL para_env_sub%sum(grid_2_mepos)
171 CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
174 ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
177 DO iaia = my_ia_start, my_ia_end
178 i_global = (iaia - 1)/virtual + 1
179 j_global = mod(iaia - 1, virtual) + 1
180 send_prow =
cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
181 fm_ia%matrix_struct%first_p_pos(1), nprow)
182 send_pcol =
cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
183 fm_ia%matrix_struct%first_p_pos(2), npcol)
184 proc_send = grid_2_mepos(send_prow, send_pcol)
185 map_send_size(proc_send) = map_send_size(proc_send) + 1
189 ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
191 part_ia = real(dimen_ia, kind=
dp)/real(para_env_sub%num_pe, kind=
dp)
193 DO iib = 1, nrow_local
194 i_global = row_indices(iib)
195 DO jjb = 1, ncol_local
196 j_global = col_indices(jjb)
197 iaia = (i_global - 1)*virtual + j_global
198 proc_receive = int(real(iaia - 1, kind=
dp)/part_ia)
199 proc_receive = max(0, proc_receive)
200 proc_receive = min(proc_receive, para_env_sub%num_pe - 1)
202 itmp =
get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
203 IF (iaia >= itmp(1) .AND. iaia <= itmp(2))
EXIT
204 IF (iaia < itmp(1)) proc_receive = proc_receive - 1
205 IF (iaia > itmp(2)) proc_receive = proc_receive + 1
207 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
213 DO proc_shift = 1, para_env_sub%num_pe - 1
214 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
215 IF (map_send_size(proc_send) > 0)
THEN
216 number_of_send = number_of_send + 1
220 ALLOCATE (buffer_send(number_of_send))
222 ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
223 grid_ref_2_send_pos = 0
226 DO proc_shift = 1, para_env_sub%num_pe - 1
227 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
228 size_send_buffer = map_send_size(proc_send)
229 IF (map_send_size(proc_send) > 0)
THEN
230 send_counter = send_counter + 1
232 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
233 buffer_send(send_counter)%proc = proc_send
236 ref_send_prow = mepos_2_grid(1, proc_send)
237 ref_send_pcol = mepos_2_grid(2, proc_send)
239 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
245 DO proc_shift = 1, para_env_sub%num_pe - 1
246 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
247 IF (map_rec_size(proc_receive) > 0)
THEN
248 number_of_rec = number_of_rec + 1
254 ALLOCATE (buffer_rec(number_of_rec))
255 ALLOCATE (indices_rec(number_of_rec))
258 DO proc_shift = 1, para_env_sub%num_pe - 1
259 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
260 size_rec_buffer = map_rec_size(proc_receive)
261 IF (map_rec_size(proc_receive) > 0)
THEN
262 rec_counter = rec_counter + 1
264 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
265 buffer_rec(rec_counter)%proc = proc_receive
267 ALLOCATE (indices_rec(rec_counter)%map(2, size_rec_buffer))
268 indices_rec(rec_counter)%map = 0
269 CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
271 DO iaia = rec_iaia_start, rec_iaia_end
272 i_global = (iaia - 1)/virtual + 1
273 j_global = mod(iaia - 1, virtual) + 1
275 fm_ia%matrix_struct%first_p_pos(1), nprow)
277 fm_ia%matrix_struct%first_p_pos(2), npcol)
278 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
281 fm_ia%matrix_struct%first_p_pos(1), nprow)
283 fm_ia%matrix_struct%first_p_pos(2), npcol)
284 indices_rec(rec_counter)%map(1, iii) = i_local
285 indices_rec(rec_counter)%map(2, iii) = j_local
291 IF (map_rec_size(para_env_sub%mepos) > 0)
THEN
292 size_rec_buffer = map_rec_size(para_env_sub%mepos)
293 ALLOCATE (indices_map_my(2, size_rec_buffer))
296 DO iaia = my_ia_start, my_ia_end
297 i_global = (iaia - 1)/virtual + 1
298 j_global = mod(iaia - 1, virtual) + 1
300 fm_ia%matrix_struct%first_p_pos(1), nprow)
302 fm_ia%matrix_struct%first_p_pos(2), npcol)
303 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
306 fm_ia%matrix_struct%first_p_pos(1), nprow)
308 fm_ia%matrix_struct%first_p_pos(2), npcol)
309 indices_map_my(1, iii) = i_local
310 indices_map_my(2, iii) = j_local
315 NULLIFY (dbcsr_gamma_3)
321 ALLOCATE (iii_vet(number_of_send))
323 ALLOCATE (req_send(number_of_send))
328 DO kkb = 1, my_group_l_size
332 DO proc_shift = 1, para_env_sub%num_pe - 1
333 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
334 IF (map_rec_size(proc_receive) > 0)
THEN
335 rec_counter = rec_counter + 1
336 buffer_rec(rec_counter)%msg = 0.0_dp
337 CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
338 buffer_rec(rec_counter)%msg_req)
342 DO send_counter = 1, number_of_send
343 buffer_send(send_counter)%msg = 0.0_dp
347 DO iaia = my_ia_start, my_ia_end
348 i_global = (iaia - 1)/virtual + 1
349 j_global = mod(iaia - 1, virtual) + 1
350 send_prow =
cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
351 fm_ia%matrix_struct%first_p_pos(1), nprow)
352 send_pcol =
cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
353 fm_ia%matrix_struct%first_p_pos(2), npcol)
354 proc_send = grid_2_mepos(send_prow, send_pcol)
356 IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos)
THEN
359 i_local = indices_map_my(1, jjj)
360 j_local = indices_map_my(2, jjj)
361 fm_ia%local_data(i_local, j_local) = &
362 gamma_2d(iaia - my_ia_start + 1, kkb)
365 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
366 iii_vet(send_counter) = iii_vet(send_counter) + 1
367 iii = iii_vet(send_counter)
368 buffer_send(send_counter)%msg(iii) = &
369 gamma_2d(iaia - my_ia_start + 1, kkb)
374 DO proc_shift = 1, para_env_sub%num_pe - 1
375 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
376 IF (map_send_size(proc_send) > 0)
THEN
377 send_counter = send_counter + 1
378 CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
379 buffer_send(send_counter)%msg_req)
380 req_send(send_counter) = buffer_send(send_counter)%msg_req
386 DO proc_shift = 1, para_env_sub%num_pe - 1
387 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
388 size_rec_buffer = map_rec_size(proc_receive)
389 IF (map_rec_size(proc_receive) > 0)
THEN
390 rec_counter = rec_counter + 1
392 CALL buffer_rec(rec_counter)%msg_req%wait()
393 DO iii = 1, size_rec_buffer
394 i_local = indices_rec(rec_counter)%map(1, iii)
395 j_local = indices_rec(rec_counter)%map(2, iii)
396 fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
402 CALL mp_waitall(req_send(:))
405 ALLOCATE (dbcsr_gamma_3(kkb)%matrix)
407 template=mo_coeff_o, &
408 m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
409 CALL copy_fm_to_dbcsr(fm_ia, dbcsr_gamma_3(kkb)%matrix, keep_sparsity=.false.)
415 DEALLOCATE (gamma_2d)
417 DEALLOCATE (req_send)
418 IF (map_rec_size(para_env_sub%mepos) > 0)
THEN
419 DEALLOCATE (indices_map_my)
421 DO rec_counter = 1, number_of_rec
422 DEALLOCATE (indices_rec(rec_counter)%map)
423 DEALLOCATE (buffer_rec(rec_counter)%msg)
425 DEALLOCATE (indices_rec)
426 DEALLOCATE (buffer_rec)
427 DO send_counter = 1, number_of_send
428 DEALLOCATE (buffer_send(send_counter)%msg)
430 DEALLOCATE (buffer_send)
431 DEALLOCATE (map_send_size)
432 DEALLOCATE (map_rec_size)
433 DEALLOCATE (grid_2_mepos)
434 DEALLOCATE (mepos_2_grid)
435 CALL release_group_dist(gd_ia)
438 CALL cp_fm_release(fm_ia)
440 CALL timestop(handle)
455 SUBROUTINE communicate_buffer(para_env, num_entries_rec, num_entries_send, buffer_rec, buffer_send, &
456 req_array, do_indx, do_msg)
458 TYPE(mp_para_env_type),
INTENT(IN) :: para_env
459 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: num_entries_rec, num_entries_send
460 TYPE(integ_mat_buffer_type),
ALLOCATABLE, &
461 DIMENSION(:),
INTENT(INOUT) :: buffer_rec, buffer_send
462 TYPE(mp_request_type),
DIMENSION(:, :),
POINTER :: req_array
463 LOGICAL,
INTENT(IN),
OPTIONAL :: do_indx, do_msg
465 CHARACTER(LEN=*),
PARAMETER :: routinen =
'communicate_buffer'
467 INTEGER :: handle, imepos, rec_counter, send_counter
468 LOGICAL :: my_do_indx, my_do_msg
470 CALL timeset(routinen, handle)
473 IF (
PRESENT(do_indx)) my_do_indx = do_indx
475 IF (
PRESENT(do_msg)) my_do_msg = do_msg
477 IF (para_env%num_pe > 1)
THEN
482 DO imepos = 0, para_env%num_pe - 1
483 IF (num_entries_rec(imepos) > 0)
THEN
484 rec_counter = rec_counter + 1
486 CALL para_env%irecv(buffer_rec(imepos)%indx, imepos, req_array(rec_counter, 3), tag=4)
489 CALL para_env%irecv(buffer_rec(imepos)%msg, imepos, req_array(rec_counter, 4), tag=7)
494 DO imepos = 0, para_env%num_pe - 1
495 IF (num_entries_send(imepos) > 0)
THEN
496 send_counter = send_counter + 1
498 CALL para_env%isend(buffer_send(imepos)%indx, imepos, req_array(send_counter, 1), tag=4)
501 CALL para_env%isend(buffer_send(imepos)%msg, imepos, req_array(send_counter, 2), tag=7)
507 CALL mp_waitall(req_array(1:send_counter, 1))
508 CALL mp_waitall(req_array(1:rec_counter, 3))
512 CALL mp_waitall(req_array(1:send_counter, 2))
513 CALL mp_waitall(req_array(1:rec_counter, 4))
518 buffer_rec(0)%indx(:, :) = buffer_send(0)%indx
519 buffer_rec(0)%msg(:) = buffer_send(0)%msg
523 CALL timestop(handle)
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_m_by_n_from_template(matrix, template, m, n, sym, data_type)
Utility function to create an arbitrary shaped dbcsr matrix with the same processor grid as the templ...
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
integer function, public cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2L that computes the local index of a distributed matrix entry poi...
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2P that computes the process coordinate which possesses the entry ...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Types to describe group distributions.
Defines the basic variable types.
integer, parameter, public dp
Interface to the message passing library MPI.
type(mp_request_type), parameter, public mp_request_null
Routines for calculating RI-MP2 gradients.
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public fm2array(mat2D, my_rows, my_start_row, my_end_row, my_cols, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
Auxiliary routines necessary to redistribute an fm_matrix from a given blacs_env to another.
subroutine, public gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_env_sub, homo, virtual, mo_coeff_o, ngroup, my_group_L_start, my_group_L_end, my_group_L_size)
Redistribute RPA-AXK Gamma_3 density matrices: from fm to dbcsr.
subroutine, public communicate_buffer(para_env, num_entries_rec, num_entries_send, buffer_rec, buffer_send, req_array, do_indx, do_msg)
...
All kind of helpful little routines.
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me