28 USE dbcsr_api,
ONLY: dbcsr_p_type,&
30 dbcsr_type_no_symmetry
41 USE mp2_types,
ONLY: integ_mat_buffer_type,&
45 #include "./base/base_uses.f90"
51 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'mp2_ri_grad_util'
56 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: map
81 SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
82 my_group_L_size, my_group_L_start, my_group_L_end, &
83 my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
85 TYPE(mp2_type) :: mp2_env
86 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
87 INTENT(INOUT) :: b_ia_q
88 INTEGER,
INTENT(IN) :: dimen_ri, homo, virtual
89 TYPE(mp_para_env_type),
INTENT(IN),
POINTER :: para_env, para_env_sub
90 INTEGER,
INTENT(IN) :: ngroup, my_group_l_size, &
91 my_group_l_start, my_group_l_end, &
92 my_b_size, my_b_virtual_start
93 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_array, gd_b_virtual
94 INTEGER,
INTENT(IN) :: kspin
96 CHARACTER(LEN=*),
PARAMETER :: routinen =
'complete_gamma'
98 INTEGER :: dimen_ia, handle, i, ispin, kkb, my_ia_end, my_ia_size, my_ia_start, my_p_end, &
99 my_p_size, my_p_start, nspins, pos_group, pos_sub
100 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: pos_info
101 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
102 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: bib_c_2d, gamma_2d, gamma_pq
103 TYPE(cp_blacs_env_type),
POINTER :: blacs_env
104 TYPE(cp_fm_struct_type),
POINTER :: fm_struct_ia, fm_struct_ri
105 TYPE(cp_fm_type) :: fm_gamma, fm_gamma_pq, fm_gamma_pq_2, fm_gamma_pq_temp, &
106 fm_gamma_pq_temp_2, fm_ia_p, fm_y, operator_half, pq_half
107 TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_ia_new, gd_p, &
110 CALL timeset(routinen, handle)
115 dimen_ia = homo*virtual
116 CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
117 CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
119 CALL mat_3d_to_2d(b_ia_q, bib_c_2d, homo, my_b_size, virtual, my_b_virtual_start, &
120 my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
122 CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, gamma_2d, homo, my_b_size, virtual, my_b_virtual_start, &
123 my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
126 CALL create_group_dist(gd_ia_new, para_env%num_pe)
127 CALL create_group_dist(gd_array_new, para_env%num_pe)
128 CALL create_group_dist(gd_p, para_env_sub%num_pe, dimen_ri)
129 CALL create_group_dist(gd_p_new, para_env%num_pe)
131 CALL get_group_dist(gd_p, para_env_sub%mepos, my_p_start, my_p_end, my_p_size)
134 group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
136 DO i = 0, para_env%num_pe - 1
138 pos_group = i/para_env_sub%num_pe
140 pos_sub = pos_info(i)
142 CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
143 CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
144 CALL get_group_dist(gd_p, pos_sub, gd_p_new, i)
151 NULLIFY (fm_struct_ia)
153 ncol_global=dimen_ri, para_env=para_env)
156 CALL array2fm(gamma_2d, fm_struct_ia, my_ia_start, my_ia_end, &
157 my_group_l_start, my_group_l_end, &
158 gd_ia_new, gd_array_new, &
159 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
162 CALL array2fm(bib_c_2d, fm_struct_ia, my_ia_start, my_ia_end, &
163 my_group_l_start, my_group_l_end, &
164 gd_ia_new, gd_array_new, &
165 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
168 NULLIFY (fm_struct_ri)
170 ncol_global=dimen_ri, para_env=para_env)
173 CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_ri, my_p_start, my_p_end, &
174 my_group_l_start, my_group_l_end, &
175 gd_p_new, gd_array_new, &
176 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
177 pq_half, do_release_mat=.false.)
180 CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_ri, my_p_start, my_p_end, &
181 my_group_l_start, my_group_l_end, &
182 gd_p_new, gd_array_new, &
183 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
184 operator_half, do_release_mat=.false.)
187 CALL release_group_dist(gd_p_new)
188 CALL release_group_dist(gd_ia_new)
189 CALL release_group_dist(gd_array_new)
193 CALL cp_fm_create(fm_gamma, fm_struct_ia, name=
"fm_Gamma")
196 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
197 matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
200 CALL cp_fm_release(fm_y)
204 CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name=
"fm_Gamma_PQ_temp")
205 CALL parallel_gemm(transa=
"T", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
206 matrix_a=fm_gamma, matrix_b=fm_ia_p, beta=0.0_dp, &
207 matrix_c=fm_gamma_pq_temp)
208 CALL cp_fm_release(fm_ia_p)
210 CALL cp_fm_create(fm_gamma_pq, fm_struct_ri, name=
"fm_Gamma_PQ")
213 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
214 matrix_a=fm_gamma_pq_temp, matrix_b=pq_half, beta=0.0_dp, &
215 matrix_c=fm_gamma_pq)
216 CALL cp_fm_release(fm_gamma_pq_temp)
217 CALL cp_fm_release(pq_half)
221 CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name=
"fm_Gamma_PQ_temp")
222 CALL parallel_gemm(transa=
"T", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
223 matrix_a=fm_y, matrix_b=fm_ia_p, beta=0.0_dp, &
224 matrix_c=fm_gamma_pq_temp)
225 CALL cp_fm_release(fm_ia_p)
227 CALL cp_fm_create(fm_gamma, fm_struct_ia, name=
"fm_Gamma")
230 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
231 matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
233 CALL cp_fm_release(fm_y)
235 CALL cp_fm_create(fm_gamma_pq_temp_2, fm_struct_ri, name=
"fm_Gamma_PQ_temp_2")
236 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
237 matrix_a=fm_gamma_pq_temp, matrix_b=operator_half, beta=0.0_dp, &
238 matrix_c=fm_gamma_pq_temp_2)
240 CALL cp_fm_create(fm_gamma_pq_2, fm_struct_ri, name=
"fm_Gamma_PQ_2")
241 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
242 matrix_a=pq_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
243 matrix_c=fm_gamma_pq_temp)
244 CALL cp_fm_to_fm(fm_gamma_pq_temp, fm_gamma_pq_2)
245 CALL cp_fm_geadd(1.0_dp,
"T", fm_gamma_pq_temp, 1.0_dp, fm_gamma_pq_2)
246 CALL cp_fm_release(fm_gamma_pq_temp)
247 CALL cp_fm_release(pq_half)
251 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-1.0_dp, &
252 matrix_a=operator_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
253 matrix_c=fm_gamma_pq)
254 CALL cp_fm_release(operator_half)
255 CALL cp_fm_release(fm_gamma_pq_temp_2)
260 CALL fm2array(gamma_2d, my_ia_size, my_ia_start, my_ia_end, &
261 my_group_l_size, my_group_l_start, my_group_l_end, &
262 group_grid_2_mepos, mepos_2_grid_group, &
263 para_env_sub%num_pe, ngroup, &
266 ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
267 CALL fm2array(gamma_pq, my_p_size, my_p_start, my_p_end, &
268 my_group_l_size, my_group_l_start, my_group_l_end, &
269 group_grid_2_mepos, mepos_2_grid_group, &
270 para_env_sub%num_pe, ngroup, &
272 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%Gamma_PQ))
THEN
273 CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ)
275 mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + gamma_pq
276 DEALLOCATE (gamma_pq)
280 ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
281 CALL fm2array(gamma_pq, my_p_size, my_p_start, my_p_end, &
282 my_group_l_size, my_group_l_start, my_group_l_end, &
283 group_grid_2_mepos, mepos_2_grid_group, &
284 para_env_sub%num_pe, ngroup, &
286 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2))
THEN
287 CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ_2)
289 mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + gamma_pq
290 DEALLOCATE (gamma_pq)
295 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%G_P_ia))
THEN
296 nspins =
SIZE(mp2_env%ri_grad%mo_coeff_o)
297 ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_l_size, nspins))
299 DO kkb = 1, my_group_l_size
300 NULLIFY (mp2_env%ri_grad%G_P_ia(kkb, ispin)%matrix)
307 my_ia_start, my_ia_end, my_group_l_size, gd_ia, &
308 mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
310 DEALLOCATE (pos_info)
311 DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
312 CALL release_group_dist(gd_ia)
313 CALL release_group_dist(gd_p)
318 CALL timestop(handle)
337 SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
338 my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
339 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
340 INTENT(INOUT) :: b_ia_q
341 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :), &
342 INTENT(OUT) :: bib_c_2d
343 INTEGER,
INTENT(IN) :: homo, my_b_size, virtual, &
344 my_b_virtual_start, my_ia_start, &
345 my_ia_end, my_ia_size, my_group_l_size
346 TYPE(mp_para_env_type),
INTENT(IN) :: para_env_sub
347 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_b_virtual
349 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mat_3d_to_2d'
351 INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
352 rec_b_virtual_end, rec_b_virtual_start
353 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: bib_c_rec
355 CALL timeset(routinen, handle)
357 ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
361 DO jjb = 1, my_b_size
362 ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
363 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end)
THEN
364 bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(iib, jjb, 1:my_group_l_size)
369 DO proc_shift = 1, para_env_sub%num_pe - 1
370 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
371 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
373 CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
375 ALLOCATE (bib_c_rec(homo, rec_b_size, my_group_l_size))
378 CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
379 bib_c_rec, proc_receive)
382 DO jjb = 1, rec_b_size
383 ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
384 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end)
THEN
385 bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(iib, jjb, 1:my_group_l_size)
390 DEALLOCATE (bib_c_rec)
394 CALL timestop(handle)
395 END SUBROUTINE mat_3d_to_2d
412 SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
413 my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
414 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
415 INTENT(INOUT) :: b_ia_q
416 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :), &
417 INTENT(OUT) :: bib_c_2d
418 INTEGER,
INTENT(IN) :: homo, my_b_size, virtual, &
419 my_b_virtual_start, my_ia_start, &
420 my_ia_end, my_ia_size, my_group_l_size
421 TYPE(mp_para_env_type),
INTENT(IN) :: para_env_sub
422 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_b_virtual
424 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mat_3d_to_2d_gamma'
426 INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
427 rec_b_virtual_end, rec_b_virtual_start
428 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: bib_c_rec
430 CALL timeset(routinen, handle)
432 ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
436 DO jjb = 1, my_b_size
437 ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
438 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end)
THEN
439 bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(jjb, iib, 1:my_group_l_size)
444 DO proc_shift = 1, para_env_sub%num_pe - 1
445 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
446 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
448 CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
450 ALLOCATE (bib_c_rec(rec_b_size, homo, my_group_l_size))
453 CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
454 bib_c_rec, proc_receive)
457 DO jjb = 1, rec_b_size
458 ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
459 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end)
THEN
460 bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(jjb, iib, 1:my_group_l_size)
465 DEALLOCATE (bib_c_rec)
469 CALL timestop(handle)
470 END SUBROUTINE mat_3d_to_2d_gamma
490 SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
491 my_start_col, my_end_col, &
493 group_grid_2_mepos, ngroup_row, ngroup_col, &
494 fm_mat, integ_group_size, color_group, do_release_mat)
496 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :), &
497 INTENT(INOUT) :: mat2d
498 TYPE(cp_fm_struct_type),
POINTER :: fm_struct
499 INTEGER,
INTENT(IN) :: my_start_row, my_end_row, my_start_col, &
501 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_row, gd_col
502 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(IN) :: group_grid_2_mepos
503 INTEGER,
INTENT(IN) :: ngroup_row, ngroup_col
504 TYPE(cp_fm_type),
INTENT(OUT) :: fm_mat
505 INTEGER,
INTENT(IN),
OPTIONAL :: integ_group_size, color_group
506 LOGICAL,
INTENT(IN),
OPTIONAL :: do_release_mat
508 CHARACTER(LEN=*),
PARAMETER :: routinen =
'array2fm'
510 INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
511 i_sub, iib, iii, itmp(2), j_global, j_local, j_sub, jjb, my_num_col_blocks, &
512 my_num_row_blocks, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
513 nrow_local, num_cols, num_rec_cols, num_rows, number_of_rec, number_of_send, &
514 proc_receive, proc_send, proc_shift, rec_col_end, rec_col_size, rec_col_start, &
515 rec_counter, rec_pcol, rec_prow, rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, &
516 ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
517 INTEGER :: start_col_block, start_row_block
518 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: iii_vet, index_col_rec, map_rec_size, &
520 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: blocks_ranges_col, blocks_ranges_row, &
521 grid_2_mepos, grid_ref_2_send_pos, &
523 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
524 LOGICAL :: convert_pos, my_do_release_mat
525 REAL(kind=
dp) :: part_col, part_row
526 TYPE(integ_mat_buffer_type),
ALLOCATABLE, &
527 DIMENSION(:) :: buffer_rec, buffer_send
528 TYPE(mp_para_env_type),
POINTER :: para_env
529 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: req_send
531 CALL timeset(routinen, handle)
533 my_do_release_mat = .true.
534 IF (
PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
536 CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
545 nrow_local=nrow_local, &
546 ncol_local=ncol_local, &
547 row_indices=row_indices, &
548 col_indices=col_indices, &
549 nrow_block=nrow_block, &
550 ncol_block=ncol_block)
551 myprow = fm_mat%matrix_struct%context%mepos(1)
552 mypcol = fm_mat%matrix_struct%context%mepos(2)
553 nprow = fm_mat%matrix_struct%context%num_pe(1)
554 npcol = fm_mat%matrix_struct%context%num_pe(2)
559 CALL timeset(routinen//
"_info", handle2)
560 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
562 ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
563 grid_2_mepos(myprow, mypcol) = para_env%mepos
565 CALL para_env%sum(grid_2_mepos)
566 CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
569 ALLOCATE (map_send_size(0:para_env%num_pe - 1))
572 DO jjb = my_start_col, my_end_col
574 fm_mat%matrix_struct%first_p_pos(2), npcol)
575 DO iib = my_start_row, my_end_row
577 fm_mat%matrix_struct%first_p_pos(1), nprow)
578 proc_send = grid_2_mepos(send_prow, send_pcol)
579 map_send_size(proc_send) = map_send_size(proc_send) + 1
584 ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
586 part_row = real(num_rows, kind=
dp)/real(ngroup_row, kind=
dp)
587 part_col = real(num_cols, kind=
dp)/real(ngroup_col, kind=
dp)
590 convert_pos = .false.
591 IF (
PRESENT(integ_group_size) .AND.
PRESENT(color_group)) convert_pos = .true.
593 DO jjb = 1, nrow_local
594 j_global = row_indices(jjb)
597 rec_prow = int(real(j_global - 1, kind=
dp)/part_row)
598 rec_prow = max(0, rec_prow)
599 rec_prow = min(rec_prow, ngroup_row - 1)
601 itmp =
get_limit(num_rows, ngroup_row, rec_prow)
602 IF (j_global >= itmp(1) .AND. j_global <= itmp(2))
EXIT
603 IF (j_global < itmp(1)) rec_prow = rec_prow - 1
604 IF (j_global > itmp(2)) rec_prow = rec_prow + 1
607 IF (convert_pos)
THEN
609 IF ((rec_prow/integ_group_size) .NE. color_group) cycle
611 rec_prow = mod(rec_prow, integ_group_size)
614 DO iib = 1, ncol_local
615 i_global = col_indices(iib)
617 rec_pcol = int(real(i_global - 1, kind=
dp)/part_col)
618 rec_pcol = max(0, rec_pcol)
619 rec_pcol = min(rec_pcol, ngroup_col - 1)
621 itmp =
get_limit(num_cols, ngroup_col, rec_pcol)
622 IF (i_global >= itmp(1) .AND. i_global <= itmp(2))
EXIT
623 IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
624 IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
627 proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
629 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
635 IF (map_rec_size(para_env%mepos) > 0)
THEN
636 DO jjb = 1, ncol_local
637 j_global = col_indices(jjb)
638 IF (j_global >= my_start_col .AND. j_global <= my_end_col)
THEN
639 DO iib = 1, nrow_local
640 i_global = row_indices(iib)
641 IF (i_global >= my_start_row .AND. i_global <= my_end_row)
THEN
642 fm_mat%local_data(iib, jjb) = mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1)
648 CALL timestop(handle2)
651 CALL timeset(routinen//
"_buffer_send", handle2)
654 DO proc_shift = 1, para_env%num_pe - 1
655 proc_send =
modulo(para_env%mepos + proc_shift, para_env%num_pe)
656 IF (map_send_size(proc_send) > 0)
THEN
657 number_of_send = number_of_send + 1
661 ALLOCATE (buffer_send(number_of_send))
665 ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
666 grid_ref_2_send_pos = 0
670 DO proc_shift = 1, para_env%num_pe - 1
671 proc_send =
modulo(para_env%mepos + proc_shift, para_env%num_pe)
672 size_send_buffer = map_send_size(proc_send)
673 IF (map_send_size(proc_send) > 0)
THEN
674 send_counter = send_counter + 1
676 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
677 buffer_send(send_counter)%msg = 0.0_dp
678 buffer_send(send_counter)%proc = proc_send
681 ref_send_prow = mepos_2_grid(1, proc_send)
682 ref_send_pcol = mepos_2_grid(2, proc_send)
684 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
691 ALLOCATE (iii_vet(number_of_send))
693 DO iib = my_start_row, my_end_row
695 fm_mat%matrix_struct%first_p_pos(1), nprow)
696 DO jjb = my_start_col, my_end_col
698 fm_mat%matrix_struct%first_p_pos(2), npcol)
700 IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
702 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
703 iii_vet(send_counter) = iii_vet(send_counter) + 1
704 iii = iii_vet(send_counter)
705 buffer_send(send_counter)%msg(iii) = mat2d(iib - my_start_row + 1, jjb - my_start_col + 1)
710 DEALLOCATE (grid_ref_2_send_pos)
711 IF (my_do_release_mat)
DEALLOCATE (mat2d)
712 CALL timestop(handle2)
717 CALL timeset(routinen//
"_isendrecv", handle2)
720 DO proc_shift = 1, para_env%num_pe - 1
721 proc_receive =
modulo(para_env%mepos - proc_shift, para_env%num_pe)
722 IF (map_rec_size(proc_receive) > 0)
THEN
723 number_of_rec = number_of_rec + 1
727 ALLOCATE (buffer_rec(number_of_rec))
730 DO proc_shift = 1, para_env%num_pe - 1
731 proc_receive =
modulo(para_env%mepos - proc_shift, para_env%num_pe)
732 size_rec_buffer = map_rec_size(proc_receive)
733 IF (map_rec_size(proc_receive) > 0)
THEN
734 rec_counter = rec_counter + 1
736 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
737 buffer_rec(rec_counter)%msg = 0.0_dp
738 buffer_rec(rec_counter)%proc = proc_receive
740 CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
741 buffer_rec(rec_counter)%msg_req)
746 ALLOCATE (req_send(number_of_send))
748 DO proc_shift = 1, para_env%num_pe - 1
749 proc_send =
modulo(para_env%mepos + proc_shift, para_env%num_pe)
750 IF (map_send_size(proc_send) > 0)
THEN
751 send_counter = send_counter + 1
752 CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
753 buffer_send(send_counter)%msg_req)
754 req_send(send_counter) = buffer_send(send_counter)%msg_req
757 CALL timestop(handle2)
761 CALL timeset(routinen//
"_fill", handle2)
765 my_num_row_blocks = 1
766 DO iib = 1, nrow_local - 1
767 IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
768 my_num_row_blocks = my_num_row_blocks + 1
770 ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
771 blocks_ranges_row = 0
772 blocks_ranges_row(1, 1) = row_indices(1)
774 DO iib = 1, nrow_local - 1
775 IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
777 blocks_ranges_row(2, iii - 1) = row_indices(iib)
778 blocks_ranges_row(1, iii) = row_indices(iib + 1)
780 blocks_ranges_row(2, my_num_row_blocks) = row_indices(max(nrow_local, 1))
782 my_num_col_blocks = 1
783 DO jjb = 1, ncol_local - 1
784 IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
785 my_num_col_blocks = my_num_col_blocks + 1
787 ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
788 blocks_ranges_col = 0
789 blocks_ranges_col(1, 1) = col_indices(1)
791 DO jjb = 1, ncol_local - 1
792 IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
794 blocks_ranges_col(2, iii - 1) = col_indices(jjb)
795 blocks_ranges_col(1, iii) = col_indices(jjb + 1)
797 blocks_ranges_col(2, my_num_col_blocks) = col_indices(max(ncol_local, 1))
800 DO proc_shift = 1, para_env%num_pe - 1
801 proc_receive =
modulo(para_env%mepos - proc_shift, para_env%num_pe)
802 size_rec_buffer = map_rec_size(proc_receive)
804 IF (map_rec_size(proc_receive) > 0)
THEN
805 rec_counter = rec_counter + 1
807 CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
811 DO jjb = 1, my_num_col_blocks
812 start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
813 end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
814 DO j_sub = start_col_block, end_col_block
815 num_rec_cols = num_rec_cols + 1
818 ALLOCATE (index_col_rec(num_rec_cols))
821 DO jjb = 1, my_num_col_blocks
822 start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
823 end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
824 DO j_sub = start_col_block, end_col_block
827 fm_mat%matrix_struct%first_p_pos(2), npcol)
828 index_col_rec(iii) = j_local
832 CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
835 CALL buffer_rec(rec_counter)%msg_req%wait()
839 DO iib = 1, my_num_row_blocks
840 start_row_block = max(blocks_ranges_row(1, iib), rec_row_start)
841 end_row_block = min(blocks_ranges_row(2, iib), rec_row_end)
842 DO i_sub = start_row_block, end_row_block
844 fm_mat%matrix_struct%first_p_pos(1), nprow)
845 DO jjb = 1, num_rec_cols
847 j_local = index_col_rec(jjb)
848 fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
853 DEALLOCATE (buffer_rec(rec_counter)%msg)
854 DEALLOCATE (index_col_rec)
857 DEALLOCATE (buffer_rec)
859 DEALLOCATE (blocks_ranges_row)
860 DEALLOCATE (blocks_ranges_col)
862 CALL timestop(handle2)
865 CALL timeset(routinen//
"_waitall", handle2)
866 CALL mp_waitall(req_send(:))
867 DO send_counter = 1, number_of_send
868 DEALLOCATE (buffer_send(send_counter)%msg)
870 DEALLOCATE (buffer_send)
871 CALL timestop(handle2)
873 DEALLOCATE (map_send_size)
874 DEALLOCATE (map_rec_size)
875 DEALLOCATE (grid_2_mepos)
876 DEALLOCATE (mepos_2_grid)
878 CALL timestop(handle)
897 SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, &
898 my_cols, my_start_col, my_end_col, &
899 group_grid_2_mepos, mepos_2_grid_group, &
900 ngroup_row, ngroup_col, &
903 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :), &
905 INTEGER,
INTENT(IN) :: my_rows, my_start_row, my_end_row, &
906 my_cols, my_start_col, my_end_col
907 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(IN) :: group_grid_2_mepos, mepos_2_grid_group
908 INTEGER,
INTENT(IN) :: ngroup_row, ngroup_col
909 TYPE(cp_fm_type),
INTENT(INOUT) :: fm_mat
911 CHARACTER(LEN=*),
PARAMETER :: routinen =
'fm2array'
913 INTEGER :: dummy_proc, handle, handle2, i_global, iib, iii, itmp(2), j_global, jjb, mypcol, &
914 myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, nrow_local, num_cols, &
915 num_rec_rows, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
916 proc_shift, rec_col_size, rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, &
917 ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
918 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: iii_vet, index_row_rec, map_rec_size, &
920 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
922 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
923 REAL(kind=
dp) :: part_col, part_row
924 TYPE(integ_mat_buffer_type),
ALLOCATABLE, &
925 DIMENSION(:) :: buffer_rec, buffer_send
926 TYPE(mp_para_env_type),
POINTER :: para_env
927 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: req_send
929 CALL timeset(routinen, handle)
932 ALLOCATE (mat2d(my_rows, my_cols))
938 nrow_local=nrow_local, &
939 ncol_local=ncol_local, &
940 row_indices=row_indices, &
941 col_indices=col_indices, &
942 nrow_block=nrow_block, &
943 ncol_block=ncol_block, &
944 nrow_global=num_rows, &
945 ncol_global=num_cols, &
947 myprow = fm_mat%matrix_struct%context%mepos(1)
948 mypcol = fm_mat%matrix_struct%context%mepos(2)
949 nprow = fm_mat%matrix_struct%context%num_pe(1)
950 npcol = fm_mat%matrix_struct%context%num_pe(2)
955 CALL timeset(routinen//
"_info", handle2)
956 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
958 ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
961 grid_2_mepos(myprow, mypcol) = para_env%mepos
964 CALL para_env%sum(grid_2_mepos)
965 CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
968 ALLOCATE (map_send_size(0:para_env%num_pe - 1))
970 part_row = real(num_rows, kind=
dp)/real(ngroup_row, kind=
dp)
971 part_col = real(num_cols, kind=
dp)/real(ngroup_col, kind=
dp)
973 DO jjb = 1, ncol_local
974 j_global = col_indices(jjb)
977 send_pcol = int(real(j_global - 1, kind=
dp)/part_col)
978 send_pcol = max(0, send_pcol)
979 send_pcol = min(send_pcol, ngroup_col - 1)
981 itmp =
get_limit(num_cols, ngroup_col, send_pcol)
982 IF (j_global >= itmp(1) .AND. j_global <= itmp(2))
EXIT
983 IF (j_global < itmp(1)) send_pcol = send_pcol - 1
984 IF (j_global > itmp(2)) send_pcol = send_pcol + 1
987 DO iib = 1, nrow_local
988 i_global = row_indices(iib)
990 send_prow = int(real(i_global - 1, kind=
dp)/part_row)
991 send_prow = max(0, send_prow)
992 send_prow = min(send_prow, ngroup_row - 1)
994 itmp =
get_limit(num_rows, ngroup_row, send_prow)
995 IF (i_global >= itmp(1) .AND. i_global <= itmp(2))
EXIT
996 IF (i_global < itmp(1)) send_prow = send_prow - 1
997 IF (i_global > itmp(2)) send_prow = send_prow + 1
1000 proc_send = group_grid_2_mepos(send_prow, send_pcol)
1002 map_send_size(proc_send) = map_send_size(proc_send) + 1
1008 ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
1011 DO jjb = my_start_col, my_end_col
1013 fm_mat%matrix_struct%first_p_pos(2), npcol)
1014 DO iib = my_start_row, my_end_row
1016 fm_mat%matrix_struct%first_p_pos(1), nprow)
1017 proc_receive = grid_2_mepos(rec_prow, rec_pcol)
1018 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1023 IF (map_rec_size(para_env%mepos) > 0)
THEN
1024 DO jjb = 1, ncol_local
1025 j_global = col_indices(jjb)
1026 IF (j_global >= my_start_col .AND. j_global <= my_end_col)
THEN
1027 DO iib = 1, nrow_local
1028 i_global = row_indices(iib)
1029 IF (i_global >= my_start_row .AND. i_global <= my_end_row)
THEN
1030 mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iib, jjb)
1036 CALL timestop(handle2)
1039 CALL timeset(routinen//
"_buffer_send", handle2)
1042 DO proc_shift = 1, para_env%num_pe - 1
1043 proc_send =
modulo(para_env%mepos + proc_shift, para_env%num_pe)
1044 IF (map_send_size(proc_send) > 0)
THEN
1045 number_of_send = number_of_send + 1
1049 ALLOCATE (buffer_send(number_of_send))
1054 ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
1055 grid_ref_2_send_pos = 0
1060 DO proc_shift = 1, para_env%num_pe - 1
1061 proc_send =
modulo(para_env%mepos + proc_shift, para_env%num_pe)
1062 size_send_buffer = map_send_size(proc_send)
1063 IF (map_send_size(proc_send) > 0)
THEN
1064 send_counter = send_counter + 1
1066 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1067 buffer_send(send_counter)%msg = 0.0_dp
1068 buffer_send(send_counter)%proc = proc_send
1071 ref_send_prow = mepos_2_grid_group(1, proc_send)
1072 ref_send_pcol = mepos_2_grid_group(2, proc_send)
1074 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1081 ALLOCATE (iii_vet(number_of_send))
1083 DO jjb = 1, ncol_local
1084 j_global = col_indices(jjb)
1087 send_pcol = int(real(j_global - 1, kind=
dp)/part_col)
1088 send_pcol = max(0, send_pcol)
1089 send_pcol = min(send_pcol, ngroup_col - 1)
1091 itmp =
get_limit(num_cols, ngroup_col, send_pcol)
1092 IF (j_global >= itmp(1) .AND. j_global <= itmp(2))
EXIT
1093 IF (j_global < itmp(1)) send_pcol = send_pcol - 1
1094 IF (j_global > itmp(2)) send_pcol = send_pcol + 1
1097 DO iib = 1, nrow_local
1098 i_global = row_indices(iib)
1100 send_prow = int(real(i_global - 1, kind=
dp)/part_row)
1101 send_prow = max(0, send_prow)
1102 send_prow = min(send_prow, ngroup_row - 1)
1104 itmp =
get_limit(num_rows, ngroup_row, send_prow)
1105 IF (i_global >= itmp(1) .AND. i_global <= itmp(2))
EXIT
1106 IF (i_global < itmp(1)) send_prow = send_prow - 1
1107 IF (i_global > itmp(2)) send_prow = send_prow + 1
1110 IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
1112 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1113 iii_vet(send_counter) = iii_vet(send_counter) + 1
1114 iii = iii_vet(send_counter)
1115 buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iib, jjb)
1119 DEALLOCATE (iii_vet)
1120 DEALLOCATE (grid_ref_2_send_pos)
1121 CALL timestop(handle2)
1126 CALL timeset(routinen//
"_isendrecv", handle2)
1129 DO proc_shift = 1, para_env%num_pe - 1
1130 proc_receive =
modulo(para_env%mepos - proc_shift, para_env%num_pe)
1131 IF (map_rec_size(proc_receive) > 0)
THEN
1132 number_of_rec = number_of_rec + 1
1136 ALLOCATE (buffer_rec(number_of_rec))
1139 DO proc_shift = 1, para_env%num_pe - 1
1140 proc_receive =
modulo(para_env%mepos - proc_shift, para_env%num_pe)
1141 size_rec_buffer = map_rec_size(proc_receive)
1142 IF (map_rec_size(proc_receive) > 0)
THEN
1143 rec_counter = rec_counter + 1
1145 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1146 buffer_rec(rec_counter)%msg = 0.0_dp
1147 buffer_rec(rec_counter)%proc = proc_receive
1149 CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1150 buffer_rec(rec_counter)%msg_req)
1155 ALLOCATE (req_send(number_of_send))
1157 DO proc_shift = 1, para_env%num_pe - 1
1158 proc_send =
modulo(para_env%mepos + proc_shift, para_env%num_pe)
1159 IF (map_send_size(proc_send) > 0)
THEN
1160 send_counter = send_counter + 1
1161 CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
1162 buffer_send(send_counter)%msg_req)
1163 req_send(send_counter) = buffer_send(send_counter)%msg_req
1166 CALL timestop(handle2)
1170 CALL timeset(routinen//
"_fill", handle2)
1172 nrow_local=nrow_local, &
1173 ncol_local=ncol_local)
1174 ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
1175 CALL para_env%allgather([nrow_local, ncol_local], sizes)
1176 iib = maxval(sizes(1, :))
1177 ALLOCATE (index_row_rec(iib))
1180 DO proc_shift = 1, para_env%num_pe - 1
1181 proc_receive =
modulo(para_env%mepos - proc_shift, para_env%num_pe)
1182 size_rec_buffer = map_rec_size(proc_receive)
1184 IF (map_rec_size(proc_receive) > 0)
THEN
1185 rec_counter = rec_counter + 1
1187 rec_col_size = sizes(2, proc_receive)
1188 rec_row_size = sizes(1, proc_receive)
1192 DO iib = 1, rec_row_size
1193 i_global =
cp_fm_indxl2g(iib, nrow_block, mepos_2_grid(1, proc_receive), &
1194 fm_mat%matrix_struct%first_p_pos(1), nprow)
1195 IF (i_global >= my_start_row .AND. i_global <= my_end_row)
THEN
1196 num_rec_rows = num_rec_rows + 1
1197 index_row_rec(num_rec_rows) = i_global
1201 CALL buffer_rec(rec_counter)%msg_req%wait()
1204 DO jjb = 1, rec_col_size
1205 j_global =
cp_fm_indxl2g(jjb, ncol_block, mepos_2_grid(2, proc_receive), &
1206 fm_mat%matrix_struct%first_p_pos(2), npcol)
1207 IF (j_global >= my_start_col .AND. j_global <= my_end_col)
THEN
1208 DO iib = 1, num_rec_rows
1209 i_global = index_row_rec(iib)
1211 mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
1216 DEALLOCATE (buffer_rec(rec_counter)%msg)
1219 DEALLOCATE (buffer_rec)
1220 DEALLOCATE (index_row_rec)
1221 CALL cp_fm_release(fm_mat)
1222 CALL timestop(handle2)
1225 CALL timeset(routinen//
"_waitall", handle2)
1226 CALL mp_waitall(req_send(:))
1227 DO send_counter = 1, number_of_send
1228 DEALLOCATE (buffer_send(send_counter)%msg)
1230 DEALLOCATE (buffer_send)
1231 CALL timestop(handle2)
1233 CALL timestop(handle)
1252 my_ia_start, my_ia_end, my_group_L_size, &
1253 gd_ia, dbcsr_Gamma, mo_coeff_o)
1254 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: gamma_2d
1255 INTEGER,
INTENT(IN) :: homo, virtual, dimen_ia
1256 TYPE(mp_para_env_type),
INTENT(IN),
POINTER :: para_env_sub
1257 INTEGER,
INTENT(IN) :: my_ia_start, my_ia_end, my_group_l_size
1258 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_ia
1259 TYPE(dbcsr_p_type),
DIMENSION(:),
INTENT(INOUT) :: dbcsr_gamma
1260 TYPE(dbcsr_type),
INTENT(INOUT) :: mo_coeff_o
1262 CHARACTER(LEN=*),
PARAMETER :: routinen =
'create_dbcsr_gamma'
1264 INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iib, iii, itmp(2), j_global, &
1265 j_local, jjb, jjj, kkb, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
1266 nrow_local, number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, &
1267 rec_counter, rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, &
1268 ref_send_pcol, ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, &
1270 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: iii_vet, map_rec_size, map_send_size
1271 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
1272 indeces_map_my, mepos_2_grid
1273 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1274 REAL(kind=
dp) :: part_ia
1275 TYPE(cp_blacs_env_type),
POINTER :: blacs_env
1276 TYPE(cp_fm_struct_type),
POINTER :: fm_struct
1277 TYPE(cp_fm_type) :: fm_ia
1278 TYPE(index_map),
ALLOCATABLE,
DIMENSION(:) :: indeces_rec
1279 TYPE(integ_mat_buffer_type),
ALLOCATABLE, &
1280 DIMENSION(:) :: buffer_rec, buffer_send
1281 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: req_send
1283 CALL timeset(routinen, handle)
1292 ncol_global=virtual, para_env=para_env_sub)
1301 nrow_local=nrow_local, &
1302 ncol_local=ncol_local, &
1303 row_indices=row_indices, &
1304 col_indices=col_indices, &
1305 nrow_block=nrow_block, &
1306 ncol_block=ncol_block)
1307 myprow = fm_ia%matrix_struct%context%mepos(1)
1308 mypcol = fm_ia%matrix_struct%context%mepos(2)
1309 nprow = fm_ia%matrix_struct%context%num_pe(1)
1310 npcol = fm_ia%matrix_struct%context%num_pe(2)
1313 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
1315 ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1317 grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
1319 CALL para_env_sub%sum(grid_2_mepos)
1320 CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1323 ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
1326 DO iaia = my_ia_start, my_ia_end
1327 i_global = (iaia - 1)/virtual + 1
1328 j_global = mod(iaia - 1, virtual) + 1
1329 send_prow =
cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1330 fm_ia%matrix_struct%first_p_pos(1), nprow)
1331 send_pcol =
cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1332 fm_ia%matrix_struct%first_p_pos(2), npcol)
1333 proc_send = grid_2_mepos(send_prow, send_pcol)
1334 map_send_size(proc_send) = map_send_size(proc_send) + 1
1338 ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
1340 part_ia = real(dimen_ia, kind=
dp)/real(para_env_sub%num_pe, kind=
dp)
1342 DO iib = 1, nrow_local
1343 i_global = row_indices(iib)
1344 DO jjb = 1, ncol_local
1345 j_global = col_indices(jjb)
1346 iaia = (i_global - 1)*virtual + j_global
1347 proc_receive = int(real(iaia - 1, kind=
dp)/part_ia)
1348 proc_receive = max(0, proc_receive)
1349 proc_receive = min(proc_receive, para_env_sub%num_pe - 1)
1351 itmp =
get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
1352 IF (iaia >= itmp(1) .AND. iaia <= itmp(2))
EXIT
1353 IF (iaia < itmp(1)) proc_receive = proc_receive - 1
1354 IF (iaia > itmp(2)) proc_receive = proc_receive + 1
1356 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1362 DO proc_shift = 1, para_env_sub%num_pe - 1
1363 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1364 IF (map_send_size(proc_send) > 0)
THEN
1365 number_of_send = number_of_send + 1
1369 ALLOCATE (buffer_send(number_of_send))
1371 ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
1372 grid_ref_2_send_pos = 0
1375 DO proc_shift = 1, para_env_sub%num_pe - 1
1376 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1377 size_send_buffer = map_send_size(proc_send)
1378 IF (map_send_size(proc_send) > 0)
THEN
1379 send_counter = send_counter + 1
1381 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1382 buffer_send(send_counter)%proc = proc_send
1385 ref_send_prow = mepos_2_grid(1, proc_send)
1386 ref_send_pcol = mepos_2_grid(2, proc_send)
1388 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1394 DO proc_shift = 1, para_env_sub%num_pe - 1
1395 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1396 IF (map_rec_size(proc_receive) > 0)
THEN
1397 number_of_rec = number_of_rec + 1
1402 ALLOCATE (buffer_rec(number_of_rec))
1403 ALLOCATE (indeces_rec(number_of_rec))
1406 DO proc_shift = 1, para_env_sub%num_pe - 1
1407 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1408 size_rec_buffer = map_rec_size(proc_receive)
1409 IF (map_rec_size(proc_receive) > 0)
THEN
1410 rec_counter = rec_counter + 1
1412 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1413 buffer_rec(rec_counter)%proc = proc_receive
1415 ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
1416 indeces_rec(rec_counter)%map = 0
1417 CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
1419 DO iaia = rec_iaia_start, rec_iaia_end
1420 i_global = (iaia - 1)/virtual + 1
1421 j_global = mod(iaia - 1, virtual) + 1
1422 rec_prow =
cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1423 fm_ia%matrix_struct%first_p_pos(1), nprow)
1424 rec_pcol =
cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1425 fm_ia%matrix_struct%first_p_pos(2), npcol)
1426 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1429 fm_ia%matrix_struct%first_p_pos(1), nprow)
1431 fm_ia%matrix_struct%first_p_pos(2), npcol)
1432 indeces_rec(rec_counter)%map(1, iii) = i_local
1433 indeces_rec(rec_counter)%map(2, iii) = j_local
1438 IF (map_rec_size(para_env_sub%mepos) > 0)
THEN
1439 size_rec_buffer = map_rec_size(para_env_sub%mepos)
1440 ALLOCATE (indeces_map_my(2, size_rec_buffer))
1443 DO iaia = my_ia_start, my_ia_end
1444 i_global = (iaia - 1)/virtual + 1
1445 j_global = mod(iaia - 1, virtual) + 1
1446 rec_prow =
cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1447 fm_ia%matrix_struct%first_p_pos(1), nprow)
1448 rec_pcol =
cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1449 fm_ia%matrix_struct%first_p_pos(2), npcol)
1450 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1453 fm_ia%matrix_struct%first_p_pos(1), nprow)
1455 fm_ia%matrix_struct%first_p_pos(2), npcol)
1456 indeces_map_my(1, iii) = i_local
1457 indeces_map_my(2, iii) = j_local
1462 ALLOCATE (iii_vet(number_of_send))
1464 ALLOCATE (req_send(number_of_send))
1467 DO kkb = 1, my_group_l_size
1471 DO proc_shift = 1, para_env_sub%num_pe - 1
1472 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1473 IF (map_rec_size(proc_receive) > 0)
THEN
1474 rec_counter = rec_counter + 1
1475 buffer_rec(rec_counter)%msg = 0.0_dp
1476 CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1477 buffer_rec(rec_counter)%msg_req)
1481 DO send_counter = 1, number_of_send
1482 buffer_send(send_counter)%msg = 0.0_dp
1486 DO iaia = my_ia_start, my_ia_end
1487 i_global = (iaia - 1)/virtual + 1
1488 j_global = mod(iaia - 1, virtual) + 1
1489 send_prow =
cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1490 fm_ia%matrix_struct%first_p_pos(1), nprow)
1491 send_pcol =
cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1492 fm_ia%matrix_struct%first_p_pos(2), npcol)
1493 proc_send = grid_2_mepos(send_prow, send_pcol)
1495 IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos)
THEN
1498 i_local = indeces_map_my(1, jjj)
1499 j_local = indeces_map_my(2, jjj)
1500 fm_ia%local_data(i_local, j_local) = gamma_2d(iaia - my_ia_start + 1, kkb)
1502 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1503 iii_vet(send_counter) = iii_vet(send_counter) + 1
1504 iii = iii_vet(send_counter)
1505 buffer_send(send_counter)%msg(iii) = gamma_2d(iaia - my_ia_start + 1, kkb)
1510 DO proc_shift = 1, para_env_sub%num_pe - 1
1511 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1512 IF (map_send_size(proc_send) > 0)
THEN
1513 send_counter = send_counter + 1
1514 CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
1515 buffer_send(send_counter)%msg_req)
1516 req_send(send_counter) = buffer_send(send_counter)%msg_req
1522 DO proc_shift = 1, para_env_sub%num_pe - 1
1523 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1524 size_rec_buffer = map_rec_size(proc_receive)
1525 IF (map_rec_size(proc_receive) > 0)
THEN
1526 rec_counter = rec_counter + 1
1528 CALL buffer_rec(rec_counter)%msg_req%wait()
1529 DO iii = 1, size_rec_buffer
1530 i_local = indeces_rec(rec_counter)%map(1, iii)
1531 j_local = indeces_rec(rec_counter)%map(2, iii)
1532 fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1538 CALL mp_waitall(req_send(:))
1541 ALLOCATE (dbcsr_gamma(kkb)%matrix)
1543 template=mo_coeff_o, &
1544 m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
1545 CALL copy_fm_to_dbcsr(fm_ia, dbcsr_gamma(kkb)%matrix, keep_sparsity=.false.)
1550 DEALLOCATE (gamma_2d)
1551 DEALLOCATE (iii_vet)
1552 DEALLOCATE (req_send)
1553 IF (map_rec_size(para_env_sub%mepos) > 0)
THEN
1554 DEALLOCATE (indeces_map_my)
1556 DO rec_counter = 1, number_of_rec
1557 DEALLOCATE (indeces_rec(rec_counter)%map)
1558 DEALLOCATE (buffer_rec(rec_counter)%msg)
1560 DEALLOCATE (indeces_rec)
1561 DEALLOCATE (buffer_rec)
1562 DO send_counter = 1, number_of_send
1563 DEALLOCATE (buffer_send(send_counter)%msg)
1565 DEALLOCATE (buffer_send)
1566 DEALLOCATE (map_send_size)
1567 DEALLOCATE (map_rec_size)
1568 DEALLOCATE (grid_2_mepos)
1569 DEALLOCATE (mepos_2_grid)
1572 CALL cp_fm_release(fm_ia)
1574 CALL timestop(handle)
1588 group_grid_2_mepos, mepos_2_grid_group, &
1590 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1591 INTEGER,
INTENT(IN) :: ngroup
1592 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
1593 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT), &
1594 OPTIONAL :: pos_info
1596 INTEGER :: i, pos_group, pos_sub
1597 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: my_pos_info
1599 ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
1600 CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
1602 ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
1603 group_grid_2_mepos = 0
1604 ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
1605 mepos_2_grid_group = 0
1607 DO i = 0, para_env%num_pe - 1
1609 pos_group = i/para_env_sub%num_pe
1611 pos_sub = my_pos_info(i)
1613 group_grid_2_mepos(pos_sub, pos_group) = i
1615 mepos_2_grid_group(1, i) = pos_sub
1616 mepos_2_grid_group(2, i) = pos_group
1619 IF (
PRESENT(pos_info))
CALL move_alloc(my_pos_info, pos_info)
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_m_by_n_from_template(matrix, template, m, n, sym, data_type)
Utility function to create an arbitrary shaped dbcsr matrix with the same processor grid as the templ...
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
basic linear algebra operations for full matrices
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
integer function, public cp_fm_indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXL2G that computes the global index of a distributed matrix entry po...
integer function, public cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2L that computes the local index of a distributed matrix entry poi...
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2P that computes the process coordinate which possesses the entry ...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Types to describe group distributions.
Defines the basic variable types.
integer, parameter, public dp
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
Interface to the message passing library MPI.
type(mp_request_type), parameter, public mp_request_null
Routines for calculating RI-MP2 gradients.
subroutine, public complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, my_group_L_size, my_group_L_start, my_group_L_end, my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
complete the calculation of the Gamma matrices
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public array2fm(mat2D, fm_struct, my_start_row, my_end_row, my_start_col, my_end_col, gd_row, gd_col, group_grid_2_mepos, ngroup_row, ngroup_col, fm_mat, integ_group_size, color_group, do_release_mat)
redistribute local part of array to fm
subroutine, public create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, my_ia_start, my_ia_end, my_group_L_size, gd_ia, dbcsr_Gamma, mo_coeff_o)
redistribute 2D representation of 3d tensor to a set of dbcsr matrices
subroutine, public fm2array(mat2D, my_rows, my_start_row, my_end_row, my_cols, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
basic linear algebra operations for full matrixes
All kind of helpful little routines.
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me