Loading [MathJax]/jax/input/TeX/config.js
 (git:aabdcc8)
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
mp2_ri_grad_util.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines for calculating RI-MP2 gradients
10!> \par History
11!> 10.2013 created [Mauro Del Ben]
12! **************************************************************************************************
14!
18 USE cp_dbcsr_api, ONLY: dbcsr_p_type,&
20 dbcsr_type_no_symmetry
28 USE cp_fm_types, ONLY: cp_fm_create,&
38 USE kinds, ONLY: dp
47 USE util, ONLY: get_limit
48#include "./base/base_uses.f90"
49
50 IMPLICIT NONE
51
52 PRIVATE
53
54 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad_util'
55
57
58 TYPE index_map
59 INTEGER, DIMENSION(:, :), ALLOCATABLE :: map
60 END TYPE
61
62CONTAINS
63
64! **************************************************************************************************
65!> \brief complete the calculation of the Gamma matrices
66!> \param mp2_env ...
67!> \param B_ia_Q ...
68!> \param dimen_RI ...
69!> \param homo ...
70!> \param virtual ...
71!> \param para_env ...
72!> \param para_env_sub ...
73!> \param ngroup ...
74!> \param my_group_L_size ...
75!> \param my_group_L_start ...
76!> \param my_group_L_end ...
77!> \param my_B_size ...
78!> \param my_B_virtual_start ...
79!> \param gd_array ...
80!> \param gd_B_virtual ...
81!> \param kspin ...
82!> \author Mauro Del Ben
83! **************************************************************************************************
84 SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
85 my_group_L_size, my_group_L_start, my_group_L_end, &
86 my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
87
88 TYPE(mp2_type) :: mp2_env
89 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
90 INTENT(INOUT) :: b_ia_q
91 INTEGER, INTENT(IN) :: dimen_ri, homo, virtual
92 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
93 INTEGER, INTENT(IN) :: ngroup, my_group_l_size, &
94 my_group_l_start, my_group_l_end, &
95 my_b_size, my_b_virtual_start
96 TYPE(group_dist_d1_type), INTENT(IN) :: gd_array, gd_b_virtual
97 INTEGER, INTENT(IN) :: kspin
98
99 CHARACTER(LEN=*), PARAMETER :: routinen = 'complete_gamma'
100
101 INTEGER :: dimen_ia, handle, i, ispin, kkb, my_ia_end, my_ia_size, my_ia_start, my_p_end, &
102 my_p_size, my_p_start, nspins, pos_group, pos_sub
103 INTEGER, ALLOCATABLE, DIMENSION(:) :: pos_info
104 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
105 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bib_c_2d, gamma_2d, gamma_pq
106 TYPE(cp_blacs_env_type), POINTER :: blacs_env
107 TYPE(cp_fm_struct_type), POINTER :: fm_struct_ia, fm_struct_ri
108 TYPE(cp_fm_type) :: fm_gamma, fm_gamma_pq, fm_gamma_pq_2, fm_gamma_pq_temp, &
109 fm_gamma_pq_temp_2, fm_ia_p, fm_y, operator_half, pq_half
110 TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_ia_new, gd_p, &
111 gd_p_new
112
113 CALL timeset(routinen, handle)
114
115 ! reshape the data into a 2D array
116 ! reorder the local data in such a way to help the next stage of matrix creation
117 ! now the data inside the group are divided into a ia x K matrix
118 dimen_ia = homo*virtual
119 CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
120 CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
121
122 CALL mat_3d_to_2d(b_ia_q, bib_c_2d, homo, my_b_size, virtual, my_b_virtual_start, &
123 my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
124
125 CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, gamma_2d, homo, my_b_size, virtual, my_b_virtual_start, &
126 my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
127
128 ! create the processor map and size arrays
129 CALL create_group_dist(gd_ia_new, para_env%num_pe)
130 CALL create_group_dist(gd_array_new, para_env%num_pe)
131 CALL create_group_dist(gd_p, para_env_sub%num_pe, dimen_ri)
132 CALL create_group_dist(gd_p_new, para_env%num_pe)
133
134 CALL get_group_dist(gd_p, para_env_sub%mepos, my_p_start, my_p_end, my_p_size)
135
136 CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
137 group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
138
139 DO i = 0, para_env%num_pe - 1
140 ! calculate position of the group
141 pos_group = i/para_env_sub%num_pe
142 ! calculate position in the subgroup
143 pos_sub = pos_info(i)
144 ! 1 -> rows, 2 -> cols
145 CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
146 CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
147 CALL get_group_dist(gd_p, pos_sub, gd_p_new, i)
148 END DO
149
150 ! create the blacs env
151 NULLIFY (blacs_env)
152 CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env)
153
154 NULLIFY (fm_struct_ia)
155 CALL cp_fm_struct_create(fm_struct_ia, context=blacs_env, nrow_global=dimen_ia, &
156 ncol_global=dimen_ri, para_env=para_env)
157
158 ! create the fm matrix Gamma
159 CALL array2fm(gamma_2d, fm_struct_ia, my_ia_start, my_ia_end, &
160 my_group_l_start, my_group_l_end, &
161 gd_ia_new, gd_array_new, &
162 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
163 fm_y)
164 ! create the fm matrix B_ia_P
165 CALL array2fm(bib_c_2d, fm_struct_ia, my_ia_start, my_ia_end, &
166 my_group_l_start, my_group_l_end, &
167 gd_ia_new, gd_array_new, &
168 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
169 fm_ia_p)
170
171 NULLIFY (fm_struct_ri)
172 CALL cp_fm_struct_create(fm_struct_ri, context=blacs_env, nrow_global=dimen_ri, &
173 ncol_global=dimen_ri, para_env=para_env)
174
175 ! since we will need (P|Q)^(-1/2) in the future, make a copy
176 CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_ri, my_p_start, my_p_end, &
177 my_group_l_start, my_group_l_end, &
178 gd_p_new, gd_array_new, &
179 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
180 pq_half, do_release_mat=.false.)
181
182 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
183 CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_ri, my_p_start, my_p_end, &
184 my_group_l_start, my_group_l_end, &
185 gd_p_new, gd_array_new, &
186 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
187 operator_half, do_release_mat=.false.)
188 END IF
189
190 CALL release_group_dist(gd_p_new)
191 CALL release_group_dist(gd_ia_new)
192 CALL release_group_dist(gd_array_new)
193
194 ! complete the gamma matrix Gamma_ia^P = sum_Q (Y_ia^Q * (Q|P)^(-1/2))
195 IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
196 CALL cp_fm_create(fm_gamma, fm_struct_ia, name="fm_Gamma")
197 CALL cp_fm_struct_release(fm_struct_ia)
198 ! perform the matrix multiplication
199 CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
200 matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
201 matrix_c=fm_gamma)
202 ! release the Y matrix
203 CALL cp_fm_release(fm_y)
204
205 ! complete gamma small (fm_Gamma_PQ)
206 ! create temp matrix
207 CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name="fm_Gamma_PQ_temp")
208 CALL parallel_gemm(transa="T", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
209 matrix_a=fm_gamma, matrix_b=fm_ia_p, beta=0.0_dp, &
210 matrix_c=fm_gamma_pq_temp)
211 CALL cp_fm_release(fm_ia_p)
212 ! create fm_Gamma_PQ matrix
213 CALL cp_fm_create(fm_gamma_pq, fm_struct_ri, name="fm_Gamma_PQ")
214 CALL cp_fm_struct_release(fm_struct_ri)
215 ! perform matrix multiplication
216 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
217 matrix_a=fm_gamma_pq_temp, matrix_b=pq_half, beta=0.0_dp, &
218 matrix_c=fm_gamma_pq)
219 CALL cp_fm_release(fm_gamma_pq_temp)
220 CALL cp_fm_release(pq_half)
221
222 ELSE
223 ! create temp matrix
224 CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name="fm_Gamma_PQ_temp")
225 CALL parallel_gemm(transa="T", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
226 matrix_a=fm_y, matrix_b=fm_ia_p, beta=0.0_dp, &
227 matrix_c=fm_gamma_pq_temp)
228 CALL cp_fm_release(fm_ia_p)
229
230 CALL cp_fm_create(fm_gamma, fm_struct_ia, name="fm_Gamma")
231 CALL cp_fm_struct_release(fm_struct_ia)
232 ! perform the matrix multiplication
233 CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
234 matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
235 matrix_c=fm_gamma)
236 CALL cp_fm_release(fm_y)
237
238 CALL cp_fm_create(fm_gamma_pq_temp_2, fm_struct_ri, name="fm_Gamma_PQ_temp_2")
239 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
240 matrix_a=fm_gamma_pq_temp, matrix_b=operator_half, beta=0.0_dp, &
241 matrix_c=fm_gamma_pq_temp_2)
242
243 CALL cp_fm_create(fm_gamma_pq_2, fm_struct_ri, name="fm_Gamma_PQ_2")
244 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
245 matrix_a=pq_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
246 matrix_c=fm_gamma_pq_temp)
247 CALL cp_fm_to_fm(fm_gamma_pq_temp, fm_gamma_pq_2)
248 CALL cp_fm_geadd(1.0_dp, "T", fm_gamma_pq_temp, 1.0_dp, fm_gamma_pq_2)
249 CALL cp_fm_release(fm_gamma_pq_temp)
250 CALL cp_fm_release(pq_half)
251
252 CALL cp_fm_create(fm_gamma_pq, fm_struct_ri)
253 CALL cp_fm_struct_release(fm_struct_ri)
254 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-1.0_dp, &
255 matrix_a=operator_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
256 matrix_c=fm_gamma_pq)
257 CALL cp_fm_release(operator_half)
258 CALL cp_fm_release(fm_gamma_pq_temp_2)
259 END IF
260
261 ! now redistribute the data, in this case we go back
262 ! to the original way the integrals were distributed
263 CALL fm2array(gamma_2d, my_ia_size, my_ia_start, my_ia_end, &
264 my_group_l_size, my_group_l_start, my_group_l_end, &
265 group_grid_2_mepos, mepos_2_grid_group, &
266 para_env_sub%num_pe, ngroup, &
267 fm_gamma)
268
269 ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
270 CALL fm2array(gamma_pq, my_p_size, my_p_start, my_p_end, &
271 my_group_l_size, my_group_l_start, my_group_l_end, &
272 group_grid_2_mepos, mepos_2_grid_group, &
273 para_env_sub%num_pe, ngroup, &
274 fm_gamma_pq)
275 IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ)) THEN
276 CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ)
277 ELSE
278 mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + gamma_pq
279 DEALLOCATE (gamma_pq)
280 END IF
281
282 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
283 ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
284 CALL fm2array(gamma_pq, my_p_size, my_p_start, my_p_end, &
285 my_group_l_size, my_group_l_start, my_group_l_end, &
286 group_grid_2_mepos, mepos_2_grid_group, &
287 para_env_sub%num_pe, ngroup, &
288 fm_gamma_pq_2)
289 IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2)) THEN
290 CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ_2)
291 ELSE
292 mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + gamma_pq
293 DEALLOCATE (gamma_pq)
294 END IF
295 END IF
296
297 ! allocate G_P_ia (DBCSR)
298 IF (.NOT. ALLOCATED(mp2_env%ri_grad%G_P_ia)) THEN
299 nspins = SIZE(mp2_env%ri_grad%mo_coeff_o)
300 ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_l_size, nspins))
301 DO ispin = 1, nspins
302 DO kkb = 1, my_group_l_size
303 NULLIFY (mp2_env%ri_grad%G_P_ia(kkb, ispin)%matrix)
304 END DO
305 END DO
306 END IF
307
308 ! create the Gamma_ia_P in DBCSR style
309 CALL create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, &
310 my_ia_start, my_ia_end, my_group_l_size, gd_ia, &
311 mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
312
313 DEALLOCATE (pos_info)
314 DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
315 CALL release_group_dist(gd_ia)
316 CALL release_group_dist(gd_p)
317
318 ! release blacs_env
319 CALL cp_blacs_env_release(blacs_env)
320
321 CALL timestop(handle)
322
323 END SUBROUTINE complete_gamma
324
325! **************************************************************************************************
326!> \brief Redistribute a 3d matrix to a 2d matrix
327!> \param B_ia_Q ...
328!> \param BIb_C_2D ...
329!> \param homo ...
330!> \param my_B_size ...
331!> \param virtual ...
332!> \param my_B_virtual_start ...
333!> \param my_ia_start ...
334!> \param my_ia_end ...
335!> \param my_ia_size ...
336!> \param my_group_L_size ...
337!> \param para_env_sub ...
338!> \param gd_B_virtual ...
339! **************************************************************************************************
340 SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
341 my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
342 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
343 INTENT(INOUT) :: b_ia_q
344 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
345 INTENT(OUT) :: bib_c_2d
346 INTEGER, INTENT(IN) :: homo, my_b_size, virtual, &
347 my_b_virtual_start, my_ia_start, &
348 my_ia_end, my_ia_size, my_group_l_size
349 TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
350 TYPE(group_dist_d1_type), INTENT(IN) :: gd_b_virtual
351
352 CHARACTER(LEN=*), PARAMETER :: routinen = 'mat_3d_to_2d'
353
354 INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
355 rec_b_virtual_end, rec_b_virtual_start
356 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_c_rec
357
358 CALL timeset(routinen, handle)
359
360 ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
361 bib_c_2d = 0.0_dp
362
363 DO iib = 1, homo
364 DO jjb = 1, my_b_size
365 ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
366 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
367 bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(iib, jjb, 1:my_group_l_size)
368 END IF
369 END DO
370 END DO
371
372 DO proc_shift = 1, para_env_sub%num_pe - 1
373 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
374 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
375
376 CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
377
378 ALLOCATE (bib_c_rec(homo, rec_b_size, my_group_l_size))
379 bib_c_rec = 0.0_dp
380
381 CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
382 bib_c_rec, proc_receive)
383
384 DO iib = 1, homo
385 DO jjb = 1, rec_b_size
386 ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
387 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
388 bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(iib, jjb, 1:my_group_l_size)
389 END IF
390 END DO
391 END DO
392
393 DEALLOCATE (bib_c_rec)
394 END DO
395 DEALLOCATE (b_ia_q)
396
397 CALL timestop(handle)
398 END SUBROUTINE mat_3d_to_2d
399
400! **************************************************************************************************
401!> \brief Redistribute a 3d matrix to a 2d matrix, specialized for Gamma_P_ia to account for a different order of indices
402!> \param B_ia_Q ...
403!> \param BIb_C_2D ...
404!> \param homo ...
405!> \param my_B_size ...
406!> \param virtual ...
407!> \param my_B_virtual_start ...
408!> \param my_ia_start ...
409!> \param my_ia_end ...
410!> \param my_ia_size ...
411!> \param my_group_L_size ...
412!> \param para_env_sub ...
413!> \param gd_B_virtual ...
414! **************************************************************************************************
415 SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
416 my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
417 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
418 INTENT(INOUT) :: b_ia_q
419 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
420 INTENT(OUT) :: bib_c_2d
421 INTEGER, INTENT(IN) :: homo, my_b_size, virtual, &
422 my_b_virtual_start, my_ia_start, &
423 my_ia_end, my_ia_size, my_group_l_size
424 TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
425 TYPE(group_dist_d1_type), INTENT(IN) :: gd_b_virtual
426
427 CHARACTER(LEN=*), PARAMETER :: routinen = 'mat_3d_to_2d_gamma'
428
429 INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
430 rec_b_virtual_end, rec_b_virtual_start
431 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_c_rec
432
433 CALL timeset(routinen, handle)
434
435 ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
436 bib_c_2d = 0.0_dp
437
438 DO iib = 1, homo
439 DO jjb = 1, my_b_size
440 ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
441 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
442 bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(jjb, iib, 1:my_group_l_size)
443 END IF
444 END DO
445 END DO
446
447 DO proc_shift = 1, para_env_sub%num_pe - 1
448 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
449 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
450
451 CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
452
453 ALLOCATE (bib_c_rec(rec_b_size, homo, my_group_l_size))
454 bib_c_rec = 0.0_dp
455
456 CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
457 bib_c_rec, proc_receive)
458
459 DO iib = 1, homo
460 DO jjb = 1, rec_b_size
461 ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
462 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
463 bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(jjb, iib, 1:my_group_l_size)
464 END IF
465 END DO
466 END DO
467
468 DEALLOCATE (bib_c_rec)
469 END DO
470 DEALLOCATE (b_ia_q)
471
472 CALL timestop(handle)
473 END SUBROUTINE mat_3d_to_2d_gamma
474
475! **************************************************************************************************
476!> \brief redistribute local part of array to fm
477!> \param mat2D ...
478!> \param fm_struct ...
479!> \param my_start_row ...
480!> \param my_end_row ...
481!> \param my_start_col ...
482!> \param my_end_col ...
483!> \param gd_row ...
484!> \param gd_col ...
485!> \param group_grid_2_mepos ...
486!> \param ngroup_row ...
487!> \param ngroup_col ...
488!> \param fm_mat ...
489!> \param integ_group_size ...
490!> \param color_group ...
491!> \param do_release_mat whether to release the array (default: yes)
492! **************************************************************************************************
493 SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
494 my_start_col, my_end_col, &
495 gd_row, gd_col, &
496 group_grid_2_mepos, ngroup_row, ngroup_col, &
497 fm_mat, integ_group_size, color_group, do_release_mat)
498
499 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
500 INTENT(INOUT) :: mat2d
501 TYPE(cp_fm_struct_type), POINTER :: fm_struct
502 INTEGER, INTENT(IN) :: my_start_row, my_end_row, my_start_col, &
503 my_end_col
504 TYPE(group_dist_d1_type), INTENT(IN) :: gd_row, gd_col
505 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos
506 INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
507 TYPE(cp_fm_type), INTENT(OUT) :: fm_mat
508 INTEGER, INTENT(IN), OPTIONAL :: integ_group_size, color_group
509 LOGICAL, INTENT(IN), OPTIONAL :: do_release_mat
510
511 CHARACTER(LEN=*), PARAMETER :: routinen = 'array2fm'
512
513 INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
514 i_sub, iib, iii, itmp(2), j_global, j_local, j_sub, jjb, my_num_col_blocks, &
515 my_num_row_blocks, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, num_cols, &
516 num_rec_cols, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
517 proc_shift, rec_col_end, rec_col_size, rec_col_start, rec_counter, rec_pcol, rec_prow, &
518 rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, ref_send_prow, send_counter, &
519 send_pcol, send_prow, size_rec_buffer, size_send_buffer, start_col_block, start_row_block
520 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_col_rec, map_rec_size, &
521 map_send_size
522 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blocks_ranges_col, blocks_ranges_row, &
523 grid_2_mepos, grid_ref_2_send_pos, &
524 mepos_2_grid
525 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
526 LOGICAL :: convert_pos, my_do_release_mat
527 REAL(kind=dp) :: part_col, part_row
528 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
529 DIMENSION(:) :: buffer_rec, buffer_send
530 TYPE(mp_para_env_type), POINTER :: para_env
531 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
532
533 CALL timeset(routinen, handle)
534
535 my_do_release_mat = .true.
536 IF (PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
537
538 CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
539
540 ! create the full matrix, (num_rows x num_cols)
541 CALL cp_fm_create(fm_mat, fm_struct, name="fm_mat")
542 CALL cp_fm_set_all(matrix=fm_mat, alpha=0.0_dp)
543
544 ! start filling procedure
545 ! fill the matrix
546 CALL cp_fm_get_info(matrix=fm_mat, &
547 nrow_local=nrow_local, &
548 ncol_local=ncol_local, &
549 row_indices=row_indices, &
550 col_indices=col_indices)
551 myprow = fm_mat%matrix_struct%context%mepos(1)
552 mypcol = fm_mat%matrix_struct%context%mepos(2)
553 nprow = fm_mat%matrix_struct%context%num_pe(1)
554 npcol = fm_mat%matrix_struct%context%num_pe(2)
555
556 ! start the communication part
557 ! 0) create array containing the processes position
558 ! and supporting infos
559 CALL timeset(routinen//"_info", handle2)
560 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
561 grid_2_mepos = 0
562 ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
563 grid_2_mepos(myprow, mypcol) = para_env%mepos
564 ! sum infos
565 CALL para_env%sum(grid_2_mepos)
566 CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
567
568 ! 1) loop over my local data and define a map for the proc to send data
569 ALLOCATE (map_send_size(0:para_env%num_pe - 1))
570 map_send_size = 0
571 dummy_proc = 0
572 DO jjb = my_start_col, my_end_col
573 send_pcol = fm_mat%matrix_struct%g2p_col(jjb)
574 DO iib = my_start_row, my_end_row
575 send_prow = fm_mat%matrix_struct%g2p_row(iib)
576 proc_send = grid_2_mepos(send_prow, send_pcol)
577 map_send_size(proc_send) = map_send_size(proc_send) + 1
578 END DO
579 END DO
580
581 ! 2) loop over my local data of fm_mat and define a map for the proc from which rec data
582 ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
583 map_rec_size = 0
584 part_row = real(num_rows, kind=dp)/real(ngroup_row, kind=dp)
585 part_col = real(num_cols, kind=dp)/real(ngroup_col, kind=dp)
586
587 ! In some cases we have to convert global positions to positions in a subgroup (RPA)
588 convert_pos = .false.
589 IF (PRESENT(integ_group_size) .AND. PRESENT(color_group)) convert_pos = .true.
590
591 DO jjb = 1, nrow_local
592 j_global = row_indices(jjb)
593 ! check the group holding this element
594 ! dirty way, if someone has a better idea ...
595 rec_prow = int(real(j_global - 1, kind=dp)/part_row)
596 rec_prow = max(0, rec_prow)
597 rec_prow = min(rec_prow, ngroup_row - 1)
598 DO
599 itmp = get_limit(num_rows, ngroup_row, rec_prow)
600 IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
601 IF (j_global < itmp(1)) rec_prow = rec_prow - 1
602 IF (j_global > itmp(2)) rec_prow = rec_prow + 1
603 END DO
604
605 IF (convert_pos) THEN
606 ! if the group is not in the same RPA group cycle
607 IF ((rec_prow/integ_group_size) .NE. color_group) cycle
608 ! convert global position to position into the RPA group
609 rec_prow = mod(rec_prow, integ_group_size)
610 END IF
611
612 DO iib = 1, ncol_local
613 i_global = col_indices(iib)
614 ! check the process in the group holding this element
615 rec_pcol = int(real(i_global - 1, kind=dp)/part_col)
616 rec_pcol = max(0, rec_pcol)
617 rec_pcol = min(rec_pcol, ngroup_col - 1)
618 DO
619 itmp = get_limit(num_cols, ngroup_col, rec_pcol)
620 IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
621 IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
622 IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
623 END DO
624
625 proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
626
627 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
628
629 END DO ! i_global
630 END DO ! j_global
631
632 ! 3) check if the local data has to be stored in the new fm_mat
633 IF (map_rec_size(para_env%mepos) > 0) THEN
634 DO jjb = 1, ncol_local
635 j_global = col_indices(jjb)
636 IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
637 DO iib = 1, nrow_local
638 i_global = row_indices(iib)
639 IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
640 fm_mat%local_data(iib, jjb) = mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1)
641 END IF
642 END DO
643 END IF
644 END DO
645 END IF
646 CALL timestop(handle2)
647
648 ! 4) reorder data in the send_buffer
649 CALL timeset(routinen//"_buffer_send", handle2)
650 ! count the number of messages to send
651 number_of_send = 0
652 DO proc_shift = 1, para_env%num_pe - 1
653 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
654 IF (map_send_size(proc_send) > 0) THEN
655 number_of_send = number_of_send + 1
656 END IF
657 END DO
658 ! allocate the structure that will hold the messages to be sent
659 ALLOCATE (buffer_send(number_of_send))
660 ! grid_ref_2_send_pos is an array (map) that given a pair
661 ! (ref_send_prow,ref_send_pcol) returns
662 ! the position in the buffer_send associated to that process
663 ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
664 grid_ref_2_send_pos = 0
665 ! finalize the allocation of buffer_send with the actual size
666 ! of each message (actual size is size_send_buffer)
667 send_counter = 0
668 DO proc_shift = 1, para_env%num_pe - 1
669 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
670 size_send_buffer = map_send_size(proc_send)
671 IF (map_send_size(proc_send) > 0) THEN
672 send_counter = send_counter + 1
673 ! allocate the sending buffer (msg)
674 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
675 buffer_send(send_counter)%msg = 0.0_dp
676 buffer_send(send_counter)%proc = proc_send
677 ! get the pointer to prow, pcol of the process that has
678 ! to receive this message
679 ref_send_prow = mepos_2_grid(1, proc_send)
680 ref_send_pcol = mepos_2_grid(2, proc_send)
681 ! save the rank of the process that has to receive this message
682 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
683 END IF
684 END DO
685
686 ! loop over the locally held data and fill the buffer_send
687 ! for doing that we need an array that keep track if the
688 ! sequential increase of the index for each message
689 ALLOCATE (iii_vet(number_of_send))
690 iii_vet = 0
691 DO iib = my_start_row, my_end_row
692 send_prow = fm_mat%matrix_struct%g2p_row(iib)
693 DO jjb = my_start_col, my_end_col
694 send_pcol = fm_mat%matrix_struct%g2p_col(jjb)
695 ! we don't need to send to ourselves
696 IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
697
698 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
699 iii_vet(send_counter) = iii_vet(send_counter) + 1
700 iii = iii_vet(send_counter)
701 buffer_send(send_counter)%msg(iii) = mat2d(iib - my_start_row + 1, jjb - my_start_col + 1)
702 END DO
703 END DO
704
705 DEALLOCATE (iii_vet)
706 DEALLOCATE (grid_ref_2_send_pos)
707 IF (my_do_release_mat) DEALLOCATE (mat2d)
708 CALL timestop(handle2)
709
710 ! 5) similarly to what done for the buffer_send
711 ! create the buffer for receive, post the message with irecv
712 ! and send the messages non-blocking
713 CALL timeset(routinen//"_isendrecv", handle2)
714 ! count the number of messages to be received
715 number_of_rec = 0
716 DO proc_shift = 1, para_env%num_pe - 1
717 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
718 IF (map_rec_size(proc_receive) > 0) THEN
719 number_of_rec = number_of_rec + 1
720 END IF
721 END DO
722
723 ALLOCATE (buffer_rec(number_of_rec))
724
725 rec_counter = 0
726 DO proc_shift = 1, para_env%num_pe - 1
727 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
728 size_rec_buffer = map_rec_size(proc_receive)
729 IF (map_rec_size(proc_receive) > 0) THEN
730 rec_counter = rec_counter + 1
731 ! prepare the buffer for receive
732 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
733 buffer_rec(rec_counter)%msg = 0.0_dp
734 buffer_rec(rec_counter)%proc = proc_receive
735 ! post the message to be received
736 CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
737 buffer_rec(rec_counter)%msg_req)
738 END IF
739 END DO
740
741 ! send messages
742 ALLOCATE (req_send(number_of_send))
743 send_counter = 0
744 DO proc_shift = 1, para_env%num_pe - 1
745 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
746 IF (map_send_size(proc_send) > 0) THEN
747 send_counter = send_counter + 1
748 CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
749 buffer_send(send_counter)%msg_req)
750 req_send(send_counter) = buffer_send(send_counter)%msg_req
751 END IF
752 END DO
753 CALL timestop(handle2)
754
755 ! 6) fill the fm_mat matrix with the messages received from the
756 ! other processes
757 CALL timeset(routinen//"_fill", handle2)
758 ! In order to perform this step efficiently first we have to know the
759 ! ranges of the blocks over which a given process hold its local data.
760 ! Start with the rows ...
761 my_num_row_blocks = 1
762 DO iib = 1, nrow_local - 1
763 IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
764 my_num_row_blocks = my_num_row_blocks + 1
765 END DO
766 ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
767 blocks_ranges_row = 0
768 blocks_ranges_row(1, 1) = row_indices(1)
769 iii = 1
770 DO iib = 1, nrow_local - 1
771 IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
772 iii = iii + 1
773 blocks_ranges_row(2, iii - 1) = row_indices(iib)
774 blocks_ranges_row(1, iii) = row_indices(iib + 1)
775 END DO
776 blocks_ranges_row(2, my_num_row_blocks) = row_indices(max(nrow_local, 1))
777 ! ... and columns
778 my_num_col_blocks = 1
779 DO jjb = 1, ncol_local - 1
780 IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
781 my_num_col_blocks = my_num_col_blocks + 1
782 END DO
783 ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
784 blocks_ranges_col = 0
785 blocks_ranges_col(1, 1) = col_indices(1)
786 iii = 1
787 DO jjb = 1, ncol_local - 1
788 IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
789 iii = iii + 1
790 blocks_ranges_col(2, iii - 1) = col_indices(jjb)
791 blocks_ranges_col(1, iii) = col_indices(jjb + 1)
792 END DO
793 blocks_ranges_col(2, my_num_col_blocks) = col_indices(max(ncol_local, 1))
794
795 rec_counter = 0
796 DO proc_shift = 1, para_env%num_pe - 1
797 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
798 size_rec_buffer = map_rec_size(proc_receive)
799
800 IF (map_rec_size(proc_receive) > 0) THEN
801 rec_counter = rec_counter + 1
802
803 CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
804
805 ! precompute the number of received columns and relative index
806 num_rec_cols = 0
807 DO jjb = 1, my_num_col_blocks
808 start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
809 end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
810 DO j_sub = start_col_block, end_col_block
811 num_rec_cols = num_rec_cols + 1
812 END DO
813 END DO
814 ALLOCATE (index_col_rec(num_rec_cols))
815 index_col_rec = 0
816 iii = 0
817 DO jjb = 1, my_num_col_blocks
818 start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
819 end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
820 DO j_sub = start_col_block, end_col_block
821 iii = iii + 1
822 j_local = fm_mat%matrix_struct%g2l_col(j_sub)
823 index_col_rec(iii) = j_local
824 END DO
825 END DO
826
827 CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
828
829 ! wait for the message
830 CALL buffer_rec(rec_counter)%msg_req%wait()
831
832 ! fill the local data in fm_mat
833 iii = 0
834 DO iib = 1, my_num_row_blocks
835 start_row_block = max(blocks_ranges_row(1, iib), rec_row_start)
836 end_row_block = min(blocks_ranges_row(2, iib), rec_row_end)
837 DO i_sub = start_row_block, end_row_block
838 i_local = fm_mat%matrix_struct%g2l_row(i_sub)
839 DO jjb = 1, num_rec_cols
840 iii = iii + 1
841 j_local = index_col_rec(jjb)
842 fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
843 END DO
844 END DO
845 END DO
846
847 DEALLOCATE (buffer_rec(rec_counter)%msg)
848 DEALLOCATE (index_col_rec)
849 END IF
850 END DO
851 DEALLOCATE (buffer_rec)
852
853 DEALLOCATE (blocks_ranges_row)
854 DEALLOCATE (blocks_ranges_col)
855
856 CALL timestop(handle2)
857
858 ! 7) Finally wait for all messeges to be sent
859 CALL timeset(routinen//"_waitall", handle2)
860 CALL mp_waitall(req_send(:))
861 DO send_counter = 1, number_of_send
862 DEALLOCATE (buffer_send(send_counter)%msg)
863 END DO
864 DEALLOCATE (buffer_send)
865 CALL timestop(handle2)
866
867 DEALLOCATE (map_send_size)
868 DEALLOCATE (map_rec_size)
869 DEALLOCATE (grid_2_mepos)
870 DEALLOCATE (mepos_2_grid)
871
872 CALL timestop(handle)
873
874 END SUBROUTINE array2fm
875
876! **************************************************************************************************
877!> \brief redistribute fm to local part of array
878!> \param mat2D ...
879!> \param my_rows ...
880!> \param my_start_row ...
881!> \param my_end_row ...
882!> \param my_cols ...
883!> \param my_start_col ...
884!> \param my_end_col ...
885!> \param group_grid_2_mepos ...
886!> \param mepos_2_grid_group ...
887!> \param ngroup_row ...
888!> \param ngroup_col ...
889!> \param fm_mat ...
890! **************************************************************************************************
891 SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, &
892 my_cols, my_start_col, my_end_col, &
893 group_grid_2_mepos, mepos_2_grid_group, &
894 ngroup_row, ngroup_col, &
895 fm_mat)
896
897 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
898 INTENT(OUT) :: mat2d
899 INTEGER, INTENT(IN) :: my_rows, my_start_row, my_end_row, &
900 my_cols, my_start_col, my_end_col
901 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos, mepos_2_grid_group
902 INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
903 TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat
904
905 CHARACTER(LEN=*), PARAMETER :: routinen = 'fm2array'
906
907 INTEGER :: dummy_proc, handle, handle2, i_global, iib, iii, itmp(2), j_global, jjb, mypcol, &
908 myprow, ncol_local, npcol, nprow, nrow_local, num_cols, num_rec_rows, num_rows, &
909 number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_col_size, &
910 rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, ref_send_prow, &
911 send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
912 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_row_rec, map_rec_size, &
913 map_send_size
914 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
915 mepos_2_grid, sizes
916 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
917 REAL(kind=dp) :: part_col, part_row
918 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
919 DIMENSION(:) :: buffer_rec, buffer_send
920 TYPE(mp_para_env_type), POINTER :: para_env
921 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
922
923 CALL timeset(routinen, handle)
924
925 ! allocate the array
926 ALLOCATE (mat2d(my_rows, my_cols))
927 mat2d = 0.0_dp
928
929 ! start procedure
930 ! get information of from the full matrix
931 CALL cp_fm_get_info(matrix=fm_mat, &
932 nrow_local=nrow_local, &
933 ncol_local=ncol_local, &
934 row_indices=row_indices, &
935 col_indices=col_indices, &
936 nrow_global=num_rows, &
937 ncol_global=num_cols, &
938 para_env=para_env)
939 myprow = fm_mat%matrix_struct%context%mepos(1)
940 mypcol = fm_mat%matrix_struct%context%mepos(2)
941 nprow = fm_mat%matrix_struct%context%num_pe(1)
942 npcol = fm_mat%matrix_struct%context%num_pe(2)
943
944 ! start the communication part
945 ! 0) create array containing the processes position
946 ! and supporting infos
947 CALL timeset(routinen//"_info", handle2)
948 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
949 grid_2_mepos = 0
950 ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
951
952 ! fill the info array
953 grid_2_mepos(myprow, mypcol) = para_env%mepos
954
955 ! sum infos
956 CALL para_env%sum(grid_2_mepos)
957 CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
958
959 ! 1) loop over my local data and define a map for the proc to send data
960 ALLOCATE (map_send_size(0:para_env%num_pe - 1))
961 map_send_size = 0
962 part_row = real(num_rows, kind=dp)/real(ngroup_row, kind=dp)
963 part_col = real(num_cols, kind=dp)/real(ngroup_col, kind=dp)
964
965 DO jjb = 1, ncol_local
966 j_global = col_indices(jjb)
967 ! check the group holding this element
968 ! dirty way, if someone has a better idea ...
969 send_pcol = int(real(j_global - 1, kind=dp)/part_col)
970 send_pcol = max(0, send_pcol)
971 send_pcol = min(send_pcol, ngroup_col - 1)
972 DO
973 itmp = get_limit(num_cols, ngroup_col, send_pcol)
974 IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
975 IF (j_global < itmp(1)) send_pcol = send_pcol - 1
976 IF (j_global > itmp(2)) send_pcol = send_pcol + 1
977 END DO
978
979 DO iib = 1, nrow_local
980 i_global = row_indices(iib)
981 ! check the process in the group holding this element
982 send_prow = int(real(i_global - 1, kind=dp)/part_row)
983 send_prow = max(0, send_prow)
984 send_prow = min(send_prow, ngroup_row - 1)
985 DO
986 itmp = get_limit(num_rows, ngroup_row, send_prow)
987 IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
988 IF (i_global < itmp(1)) send_prow = send_prow - 1
989 IF (i_global > itmp(2)) send_prow = send_prow + 1
990 END DO
991
992 proc_send = group_grid_2_mepos(send_prow, send_pcol)
993
994 map_send_size(proc_send) = map_send_size(proc_send) + 1
995
996 END DO ! i_global
997 END DO ! j_global
998
999 ! 2) loop over my local data of the array and define a map for the proc from which rec data
1000 ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
1001 map_rec_size = 0
1002 dummy_proc = 0
1003 DO jjb = my_start_col, my_end_col
1004 rec_pcol = fm_mat%matrix_struct%g2p_col(jjb)
1005 DO iib = my_start_row, my_end_row
1006 rec_prow = fm_mat%matrix_struct%g2p_row(iib)
1007 proc_receive = grid_2_mepos(rec_prow, rec_pcol)
1008 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1009 END DO
1010 END DO
1011
1012 ! 3) check if the local data in the fm_mat has to be stored in the new array
1013 IF (map_rec_size(para_env%mepos) > 0) THEN
1014 DO jjb = 1, ncol_local
1015 j_global = col_indices(jjb)
1016 IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1017 DO iib = 1, nrow_local
1018 i_global = row_indices(iib)
1019 IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1020 mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iib, jjb)
1021 END IF
1022 END DO
1023 END IF
1024 END DO
1025 END IF
1026 CALL timestop(handle2)
1027
1028 ! 4) reorder data in the send_buffer
1029 CALL timeset(routinen//"_buffer_send", handle2)
1030 ! count the number of messages to send
1031 number_of_send = 0
1032 DO proc_shift = 1, para_env%num_pe - 1
1033 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1034 IF (map_send_size(proc_send) > 0) THEN
1035 number_of_send = number_of_send + 1
1036 END IF
1037 END DO
1038 ! allocate the structure that will hold the messages to be sent
1039 ALLOCATE (buffer_send(number_of_send))
1040 ! grid_ref_2_send_pos is an array (map) that given a pair
1041 ! (ref_send_prow,ref_send_pcol) returns
1042 ! the position in the buffer_send associated to that process
1043
1044 ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
1045 grid_ref_2_send_pos = 0
1046
1047 ! finalize the allocation of buffer_send with the actual size
1048 ! of each message (actual size is size_send_buffer)
1049 send_counter = 0
1050 DO proc_shift = 1, para_env%num_pe - 1
1051 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1052 size_send_buffer = map_send_size(proc_send)
1053 IF (map_send_size(proc_send) > 0) THEN
1054 send_counter = send_counter + 1
1055 ! allocate the sending buffer (msg)
1056 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1057 buffer_send(send_counter)%msg = 0.0_dp
1058 buffer_send(send_counter)%proc = proc_send
1059 ! get the pointer to prow, pcol of the process that has
1060 ! to receive this message
1061 ref_send_prow = mepos_2_grid_group(1, proc_send)
1062 ref_send_pcol = mepos_2_grid_group(2, proc_send)
1063 ! save the rank of the process that has to receive this message
1064 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1065 END IF
1066 END DO
1067
1068 ! loop over the locally held data and fill the buffer_send
1069 ! for doing that we need an array that keep track if the
1070 ! sequential increase of the index for each message
1071 ALLOCATE (iii_vet(number_of_send))
1072 iii_vet = 0
1073 DO jjb = 1, ncol_local
1074 j_global = col_indices(jjb)
1075 ! check the group holding this element
1076 ! dirty way, if someone has a better idea ...
1077 send_pcol = int(real(j_global - 1, kind=dp)/part_col)
1078 send_pcol = max(0, send_pcol)
1079 send_pcol = min(send_pcol, ngroup_col - 1)
1080 DO
1081 itmp = get_limit(num_cols, ngroup_col, send_pcol)
1082 IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
1083 IF (j_global < itmp(1)) send_pcol = send_pcol - 1
1084 IF (j_global > itmp(2)) send_pcol = send_pcol + 1
1085 END DO
1086
1087 DO iib = 1, nrow_local
1088 i_global = row_indices(iib)
1089 ! check the process in the group holding this element
1090 send_prow = int(real(i_global - 1, kind=dp)/part_row)
1091 send_prow = max(0, send_prow)
1092 send_prow = min(send_prow, ngroup_row - 1)
1093 DO
1094 itmp = get_limit(num_rows, ngroup_row, send_prow)
1095 IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
1096 IF (i_global < itmp(1)) send_prow = send_prow - 1
1097 IF (i_global > itmp(2)) send_prow = send_prow + 1
1098 END DO
1099 ! we don't need to send to ourselves
1100 IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
1101
1102 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1103 iii_vet(send_counter) = iii_vet(send_counter) + 1
1104 iii = iii_vet(send_counter)
1105 buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iib, jjb)
1106 END DO
1107 END DO
1108
1109 DEALLOCATE (iii_vet)
1110 DEALLOCATE (grid_ref_2_send_pos)
1111 CALL timestop(handle2)
1112
1113 ! 5) similarly to what done for the buffer_send
1114 ! create the buffer for receive, post the message with irecv
1115 ! and send the messages with non-blocking
1116 CALL timeset(routinen//"_isendrecv", handle2)
1117 ! count the number of messages to be received
1118 number_of_rec = 0
1119 DO proc_shift = 1, para_env%num_pe - 1
1120 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1121 IF (map_rec_size(proc_receive) > 0) THEN
1122 number_of_rec = number_of_rec + 1
1123 END IF
1124 END DO
1125
1126 ALLOCATE (buffer_rec(number_of_rec))
1127
1128 rec_counter = 0
1129 DO proc_shift = 1, para_env%num_pe - 1
1130 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1131 size_rec_buffer = map_rec_size(proc_receive)
1132 IF (map_rec_size(proc_receive) > 0) THEN
1133 rec_counter = rec_counter + 1
1134 ! prepare the buffer for receive
1135 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1136 buffer_rec(rec_counter)%msg = 0.0_dp
1137 buffer_rec(rec_counter)%proc = proc_receive
1138 ! post the message to be received
1139 CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1140 buffer_rec(rec_counter)%msg_req)
1141 END IF
1142 END DO
1143
1144 ! send messages
1145 ALLOCATE (req_send(number_of_send))
1146 send_counter = 0
1147 DO proc_shift = 1, para_env%num_pe - 1
1148 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1149 IF (map_send_size(proc_send) > 0) THEN
1150 send_counter = send_counter + 1
1151 CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
1152 buffer_send(send_counter)%msg_req)
1153 req_send(send_counter) = buffer_send(send_counter)%msg_req
1154 END IF
1155 END DO
1156 CALL timestop(handle2)
1157
1158 ! 6) fill the fm_mat matrix with the messages received from the
1159 ! other processes
1160 CALL timeset(routinen//"_fill", handle2)
1161 CALL cp_fm_get_info(matrix=fm_mat, &
1162 nrow_local=nrow_local, &
1163 ncol_local=ncol_local)
1164 ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
1165 CALL para_env%allgather([nrow_local, ncol_local], sizes)
1166 iib = maxval(sizes(1, :))
1167 ALLOCATE (index_row_rec(iib))
1168 index_row_rec = 0
1169 rec_counter = 0
1170 DO proc_shift = 1, para_env%num_pe - 1
1171 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1172 size_rec_buffer = map_rec_size(proc_receive)
1173
1174 IF (map_rec_size(proc_receive) > 0) THEN
1175 rec_counter = rec_counter + 1
1176
1177 rec_col_size = sizes(2, proc_receive)
1178 rec_row_size = sizes(1, proc_receive)
1179
1180 ! precompute the indeces of the rows
1181 num_rec_rows = 0
1182 DO iib = 1, rec_row_size
1183 i_global = fm_mat%matrix_struct%l2g_row(iib, mepos_2_grid(1, proc_receive))
1184 IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1185 num_rec_rows = num_rec_rows + 1
1186 index_row_rec(num_rec_rows) = i_global
1187 END IF
1188 END DO
1189
1190 CALL buffer_rec(rec_counter)%msg_req%wait()
1191
1192 iii = 0
1193 DO jjb = 1, rec_col_size
1194 j_global = fm_mat%matrix_struct%l2g_col(jjb, mepos_2_grid(2, proc_receive))
1195 IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1196 DO iib = 1, num_rec_rows
1197 i_global = index_row_rec(iib)
1198 iii = iii + 1
1199 mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
1200 END DO
1201 END IF
1202 END DO
1203
1204 DEALLOCATE (buffer_rec(rec_counter)%msg)
1205 END IF
1206 END DO
1207 DEALLOCATE (buffer_rec)
1208 DEALLOCATE (index_row_rec)
1209 CALL cp_fm_release(fm_mat)
1210 CALL timestop(handle2)
1211
1212 ! 7) Finally wait for all messeges to be sent
1213 CALL timeset(routinen//"_waitall", handle2)
1214 CALL mp_waitall(req_send(:))
1215 DO send_counter = 1, number_of_send
1216 DEALLOCATE (buffer_send(send_counter)%msg)
1217 END DO
1218 DEALLOCATE (buffer_send)
1219 CALL timestop(handle2)
1220
1221 CALL timestop(handle)
1222
1223 END SUBROUTINE fm2array
1224
1225! **************************************************************************************************
1226!> \brief redistribute 2D representation of 3d tensor to a set of dbcsr matrices
1227!> \param Gamma_2D ...
1228!> \param homo ...
1229!> \param virtual ...
1230!> \param dimen_ia ...
1231!> \param para_env_sub ...
1232!> \param my_ia_start ...
1233!> \param my_ia_end ...
1234!> \param my_group_L_size ...
1235!> \param gd_ia ...
1236!> \param dbcsr_Gamma ...
1237!> \param mo_coeff_o ...
1238! **************************************************************************************************
1239 SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
1240 my_ia_start, my_ia_end, my_group_L_size, &
1241 gd_ia, dbcsr_Gamma, mo_coeff_o)
1242 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: gamma_2d
1243 INTEGER, INTENT(IN) :: homo, virtual, dimen_ia
1244 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env_sub
1245 INTEGER, INTENT(IN) :: my_ia_start, my_ia_end, my_group_l_size
1246 TYPE(group_dist_d1_type), INTENT(IN) :: gd_ia
1247 TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT) :: dbcsr_gamma
1248 TYPE(dbcsr_type), INTENT(INOUT) :: mo_coeff_o
1249
1250 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_dbcsr_gamma'
1251
1252 INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iib, iii, itmp(2), j_global, &
1253 j_local, jjb, jjj, kkb, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, &
1254 number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_counter, &
1255 rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, ref_send_pcol, &
1256 ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
1257 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size
1258 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
1259 indeces_map_my, mepos_2_grid
1260 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1261 REAL(kind=dp) :: part_ia
1262 TYPE(cp_blacs_env_type), POINTER :: blacs_env
1263 TYPE(cp_fm_struct_type), POINTER :: fm_struct
1264 TYPE(cp_fm_type) :: fm_ia
1265 TYPE(index_map), ALLOCATABLE, DIMENSION(:) :: indeces_rec
1266 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
1267 DIMENSION(:) :: buffer_rec, buffer_send
1268 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
1269
1270 CALL timeset(routinen, handle)
1271
1272 ! create sub blacs env
1273 NULLIFY (blacs_env)
1274 CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env_sub)
1275
1276 ! create the fm_ia buffer matrix
1277 NULLIFY (fm_struct)
1278 CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=homo, &
1279 ncol_global=virtual, para_env=para_env_sub)
1280 CALL cp_fm_create(fm_ia, fm_struct, name="fm_ia")
1281 ! release structure
1282 CALL cp_fm_struct_release(fm_struct)
1283 ! release blacs_env
1284 CALL cp_blacs_env_release(blacs_env)
1285
1286 ! get array information
1287 CALL cp_fm_get_info(matrix=fm_ia, &
1288 nrow_local=nrow_local, &
1289 ncol_local=ncol_local, &
1290 row_indices=row_indices, &
1291 col_indices=col_indices)
1292 myprow = fm_ia%matrix_struct%context%mepos(1)
1293 mypcol = fm_ia%matrix_struct%context%mepos(2)
1294 nprow = fm_ia%matrix_struct%context%num_pe(1)
1295 npcol = fm_ia%matrix_struct%context%num_pe(2)
1296
1297 ! 0) create array containing the processes position and supporting infos
1298 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
1299 grid_2_mepos = 0
1300 ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1301 ! fill the info array
1302 grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
1303 ! sum infos
1304 CALL para_env_sub%sum(grid_2_mepos)
1305 CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1306
1307 ! loop over local index range and define the sending map
1308 ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
1309 map_send_size = 0
1310 dummy_proc = 0
1311 DO iaia = my_ia_start, my_ia_end
1312 i_global = (iaia - 1)/virtual + 1
1313 j_global = mod(iaia - 1, virtual) + 1
1314 send_prow = fm_ia%matrix_struct%g2p_row(i_global)
1315 send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1316 proc_send = grid_2_mepos(send_prow, send_pcol)
1317 map_send_size(proc_send) = map_send_size(proc_send) + 1
1318 END DO
1319
1320 ! loop over local data of fm_ia and define the receiving map
1321 ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
1322 map_rec_size = 0
1323 part_ia = real(dimen_ia, kind=dp)/real(para_env_sub%num_pe, kind=dp)
1324
1325 DO iib = 1, nrow_local
1326 i_global = row_indices(iib)
1327 DO jjb = 1, ncol_local
1328 j_global = col_indices(jjb)
1329 iaia = (i_global - 1)*virtual + j_global
1330 proc_receive = int(real(iaia - 1, kind=dp)/part_ia)
1331 proc_receive = max(0, proc_receive)
1332 proc_receive = min(proc_receive, para_env_sub%num_pe - 1)
1333 DO
1334 itmp = get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
1335 IF (iaia >= itmp(1) .AND. iaia <= itmp(2)) EXIT
1336 IF (iaia < itmp(1)) proc_receive = proc_receive - 1
1337 IF (iaia > itmp(2)) proc_receive = proc_receive + 1
1338 END DO
1339 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1340 END DO
1341 END DO
1342
1343 ! allocate the buffer for sending data
1344 number_of_send = 0
1345 DO proc_shift = 1, para_env_sub%num_pe - 1
1346 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1347 IF (map_send_size(proc_send) > 0) THEN
1348 number_of_send = number_of_send + 1
1349 END IF
1350 END DO
1351 ! allocate the structure that will hold the messages to be sent
1352 ALLOCATE (buffer_send(number_of_send))
1353 ! and the map from the grid of processess to the message position
1354 ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
1355 grid_ref_2_send_pos = 0
1356 ! finally allocate each message
1357 send_counter = 0
1358 DO proc_shift = 1, para_env_sub%num_pe - 1
1359 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1360 size_send_buffer = map_send_size(proc_send)
1361 IF (map_send_size(proc_send) > 0) THEN
1362 send_counter = send_counter + 1
1363 ! allocate the sending buffer (msg)
1364 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1365 buffer_send(send_counter)%proc = proc_send
1366 ! get the pointer to prow, pcol of the process that has
1367 ! to receive this message
1368 ref_send_prow = mepos_2_grid(1, proc_send)
1369 ref_send_pcol = mepos_2_grid(2, proc_send)
1370 ! save the rank of the process that has to receive this message
1371 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1372 END IF
1373 END DO
1374
1375 ! allocate the buffer for receiving data
1376 number_of_rec = 0
1377 DO proc_shift = 1, para_env_sub%num_pe - 1
1378 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1379 IF (map_rec_size(proc_receive) > 0) THEN
1380 number_of_rec = number_of_rec + 1
1381 END IF
1382 END DO
1383 ! allocate the structure that will hold the messages to be received
1384 ! and relative indeces
1385 ALLOCATE (buffer_rec(number_of_rec))
1386 ALLOCATE (indeces_rec(number_of_rec))
1387 ! finally allocate each message and fill the array of indeces
1388 rec_counter = 0
1389 DO proc_shift = 1, para_env_sub%num_pe - 1
1390 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1391 size_rec_buffer = map_rec_size(proc_receive)
1392 IF (map_rec_size(proc_receive) > 0) THEN
1393 rec_counter = rec_counter + 1
1394 ! prepare the buffer for receive
1395 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1396 buffer_rec(rec_counter)%proc = proc_receive
1397 ! create the indeces array
1398 ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
1399 indeces_rec(rec_counter)%map = 0
1400 CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
1401 iii = 0
1402 DO iaia = rec_iaia_start, rec_iaia_end
1403 i_global = (iaia - 1)/virtual + 1
1404 j_global = mod(iaia - 1, virtual) + 1
1405 rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
1406 rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1407 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1408 iii = iii + 1
1409 i_local = fm_ia%matrix_struct%g2l_row(i_global)
1410 j_local = fm_ia%matrix_struct%g2l_col(j_global)
1411 indeces_rec(rec_counter)%map(1, iii) = i_local
1412 indeces_rec(rec_counter)%map(2, iii) = j_local
1413 END DO
1414 END IF
1415 END DO
1416 ! and create the index map for my local data
1417 IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1418 size_rec_buffer = map_rec_size(para_env_sub%mepos)
1419 ALLOCATE (indeces_map_my(2, size_rec_buffer))
1420 indeces_map_my = 0
1421 iii = 0
1422 DO iaia = my_ia_start, my_ia_end
1423 i_global = (iaia - 1)/virtual + 1
1424 j_global = mod(iaia - 1, virtual) + 1
1425 rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
1426 rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1427 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1428 iii = iii + 1
1429 i_local = fm_ia%matrix_struct%g2l_row(i_global)
1430 j_local = fm_ia%matrix_struct%g2l_col(j_global)
1431 indeces_map_my(1, iii) = i_local
1432 indeces_map_my(2, iii) = j_local
1433 END DO
1434 END IF
1435
1436 ! auxiliary vector of indeces for the send buffer
1437 ALLOCATE (iii_vet(number_of_send))
1438 ! vector for the send requests
1439 ALLOCATE (req_send(number_of_send))
1440 ! loop over auxiliary basis function and redistribute into a fm
1441 ! and then compy the fm into a dbcsr matrix
1442 DO kkb = 1, my_group_l_size
1443 ! zero the matries of the buffers and post the messages to be received
1444 CALL cp_fm_set_all(matrix=fm_ia, alpha=0.0_dp)
1445 rec_counter = 0
1446 DO proc_shift = 1, para_env_sub%num_pe - 1
1447 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1448 IF (map_rec_size(proc_receive) > 0) THEN
1449 rec_counter = rec_counter + 1
1450 buffer_rec(rec_counter)%msg = 0.0_dp
1451 CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1452 buffer_rec(rec_counter)%msg_req)
1453 END IF
1454 END DO
1455 ! fill the sending buffer and send the messages
1456 DO send_counter = 1, number_of_send
1457 buffer_send(send_counter)%msg = 0.0_dp
1458 END DO
1459 iii_vet = 0
1460 jjj = 0
1461 DO iaia = my_ia_start, my_ia_end
1462 i_global = (iaia - 1)/virtual + 1
1463 j_global = mod(iaia - 1, virtual) + 1
1464 send_prow = fm_ia%matrix_struct%g2p_row(i_global)
1465 send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1466 proc_send = grid_2_mepos(send_prow, send_pcol)
1467 ! we don't need to send to ourselves
1468 IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN
1469 ! filling fm_ia with local data
1470 jjj = jjj + 1
1471 i_local = indeces_map_my(1, jjj)
1472 j_local = indeces_map_my(2, jjj)
1473 fm_ia%local_data(i_local, j_local) = gamma_2d(iaia - my_ia_start + 1, kkb)
1474 ELSE
1475 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1476 iii_vet(send_counter) = iii_vet(send_counter) + 1
1477 iii = iii_vet(send_counter)
1478 buffer_send(send_counter)%msg(iii) = gamma_2d(iaia - my_ia_start + 1, kkb)
1479 END IF
1480 END DO
1481 req_send = mp_request_null
1482 send_counter = 0
1483 DO proc_shift = 1, para_env_sub%num_pe - 1
1484 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1485 IF (map_send_size(proc_send) > 0) THEN
1486 send_counter = send_counter + 1
1487 CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
1488 buffer_send(send_counter)%msg_req)
1489 req_send(send_counter) = buffer_send(send_counter)%msg_req
1490 END IF
1491 END DO
1492
1493 ! receive the massages and fill the fm_ia
1494 rec_counter = 0
1495 DO proc_shift = 1, para_env_sub%num_pe - 1
1496 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1497 size_rec_buffer = map_rec_size(proc_receive)
1498 IF (map_rec_size(proc_receive) > 0) THEN
1499 rec_counter = rec_counter + 1
1500 ! wait for the message
1501 CALL buffer_rec(rec_counter)%msg_req%wait()
1502 DO iii = 1, size_rec_buffer
1503 i_local = indeces_rec(rec_counter)%map(1, iii)
1504 j_local = indeces_rec(rec_counter)%map(2, iii)
1505 fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1506 END DO
1507 END IF
1508 END DO
1509
1510 ! wait all
1511 CALL mp_waitall(req_send(:))
1512
1513 ! now create the DBCSR matrix and copy fm_ia into it
1514 ALLOCATE (dbcsr_gamma(kkb)%matrix)
1515 CALL cp_dbcsr_m_by_n_from_template(dbcsr_gamma(kkb)%matrix, &
1516 template=mo_coeff_o, &
1517 m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
1518 CALL copy_fm_to_dbcsr(fm_ia, dbcsr_gamma(kkb)%matrix, keep_sparsity=.false.)
1519
1520 END DO
1521
1522 ! deallocate stuff
1523 DEALLOCATE (gamma_2d)
1524 DEALLOCATE (iii_vet)
1525 DEALLOCATE (req_send)
1526 IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1527 DEALLOCATE (indeces_map_my)
1528 END IF
1529 DO rec_counter = 1, number_of_rec
1530 DEALLOCATE (indeces_rec(rec_counter)%map)
1531 DEALLOCATE (buffer_rec(rec_counter)%msg)
1532 END DO
1533 DEALLOCATE (indeces_rec)
1534 DEALLOCATE (buffer_rec)
1535 DO send_counter = 1, number_of_send
1536 DEALLOCATE (buffer_send(send_counter)%msg)
1537 END DO
1538 DEALLOCATE (buffer_send)
1539 DEALLOCATE (map_send_size)
1540 DEALLOCATE (map_rec_size)
1541 DEALLOCATE (grid_2_mepos)
1542 DEALLOCATE (mepos_2_grid)
1543
1544 ! release buffer matrix
1545 CALL cp_fm_release(fm_ia)
1546
1547 CALL timestop(handle)
1548
1549 END SUBROUTINE create_dbcsr_gamma
1550
1551! **************************************************************************************************
1552!> \brief prepare array for redistribution
1553!> \param para_env ...
1554!> \param para_env_sub ...
1555!> \param ngroup ...
1556!> \param group_grid_2_mepos ...
1557!> \param mepos_2_grid_group ...
1558!> \param pos_info ...
1559! **************************************************************************************************
1560 SUBROUTINE prepare_redistribution(para_env, para_env_sub, ngroup, &
1561 group_grid_2_mepos, mepos_2_grid_group, &
1562 pos_info)
1563 TYPE(mp_para_env_type), INTENT(IN) :: para_env, para_env_sub
1564 INTEGER, INTENT(IN) :: ngroup
1565 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
1566 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
1567 OPTIONAL :: pos_info
1568
1569 INTEGER :: i, pos_group, pos_sub
1570 INTEGER, ALLOCATABLE, DIMENSION(:) :: my_pos_info
1571
1572 ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
1573 CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
1574
1575 ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
1576 group_grid_2_mepos = 0
1577 ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
1578 mepos_2_grid_group = 0
1579
1580 DO i = 0, para_env%num_pe - 1
1581 ! calculate position of the group
1582 pos_group = i/para_env_sub%num_pe
1583 ! calculate position in the subgroup
1584 pos_sub = my_pos_info(i)
1585 ! fill the map from the grid of groups to process
1586 group_grid_2_mepos(pos_sub, pos_group) = i
1587 ! and the opposite, from the global pos to the grid pos
1588 mepos_2_grid_group(1, i) = pos_sub
1589 mepos_2_grid_group(2, i) = pos_group
1590 END DO
1591
1592 IF (PRESENT(pos_info)) CALL move_alloc(my_pos_info, pos_info)
1593
1594 END SUBROUTINE prepare_redistribution
1595
1596END MODULE mp2_ri_grad_util
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_m_by_n_from_template(matrix, template, m, n, sym)
Utility function to create an arbitrary shaped dbcsr matrix with the same processor grid as the templ...
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
basic linear algebra operations for full matrices
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Types to describe group distributions.
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
Interface to the message passing library MPI.
type(mp_request_type), parameter, public mp_request_null
Routines for calculating RI-MP2 gradients.
subroutine, public complete_gamma(mp2_env, b_ia_q, dimen_ri, homo, virtual, para_env, para_env_sub, ngroup, my_group_l_size, my_group_l_start, my_group_l_end, my_b_size, my_b_virtual_start, gd_array, gd_b_virtual, kspin)
complete the calculation of the Gamma matrices
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public array2fm(mat2d, fm_struct, my_start_row, my_end_row, my_start_col, my_end_col, gd_row, gd_col, group_grid_2_mepos, ngroup_row, ngroup_col, fm_mat, integ_group_size, color_group, do_release_mat)
redistribute local part of array to fm
subroutine, public create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, my_ia_start, my_ia_end, my_group_l_size, gd_ia, dbcsr_gamma, mo_coeff_o)
redistribute 2D representation of 3d tensor to a set of dbcsr matrices
subroutine, public fm2array(mat2d, my_rows, my_start_row, my_end_row, my_cols, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
Definition mp2_types.F:14
basic linear algebra operations for full matrixes
All kind of helpful little routines.
Definition util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment