(git:374b731)
Loading...
Searching...
No Matches
mp2_ri_grad_util.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines for calculating RI-MP2 gradients
10!> \par History
11!> 10.2013 created [Mauro Del Ben]
12! **************************************************************************************************
14!
25 USE cp_fm_types, ONLY: &
28 USE dbcsr_api, ONLY: dbcsr_p_type,&
29 dbcsr_type,&
30 dbcsr_type_no_symmetry
35 USE kinds, ONLY: dp
44 USE util, ONLY: get_limit
45#include "./base/base_uses.f90"
46
47 IMPLICIT NONE
48
49 PRIVATE
50
51 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad_util'
52
54
55 TYPE index_map
56 INTEGER, DIMENSION(:, :), ALLOCATABLE :: map
57 END TYPE
58
59CONTAINS
60
61! **************************************************************************************************
62!> \brief complete the calculation of the Gamma matrices
63!> \param mp2_env ...
64!> \param B_ia_Q ...
65!> \param dimen_RI ...
66!> \param homo ...
67!> \param virtual ...
68!> \param para_env ...
69!> \param para_env_sub ...
70!> \param ngroup ...
71!> \param my_group_L_size ...
72!> \param my_group_L_start ...
73!> \param my_group_L_end ...
74!> \param my_B_size ...
75!> \param my_B_virtual_start ...
76!> \param gd_array ...
77!> \param gd_B_virtual ...
78!> \param kspin ...
79!> \author Mauro Del Ben
80! **************************************************************************************************
81 SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
82 my_group_L_size, my_group_L_start, my_group_L_end, &
83 my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
84
85 TYPE(mp2_type) :: mp2_env
86 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
87 INTENT(INOUT) :: b_ia_q
88 INTEGER, INTENT(IN) :: dimen_ri, homo, virtual
89 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
90 INTEGER, INTENT(IN) :: ngroup, my_group_l_size, &
91 my_group_l_start, my_group_l_end, &
92 my_b_size, my_b_virtual_start
93 TYPE(group_dist_d1_type), INTENT(IN) :: gd_array, gd_b_virtual
94 INTEGER, INTENT(IN) :: kspin
95
96 CHARACTER(LEN=*), PARAMETER :: routinen = 'complete_gamma'
97
98 INTEGER :: dimen_ia, handle, i, ispin, kkb, my_ia_end, my_ia_size, my_ia_start, my_p_end, &
99 my_p_size, my_p_start, nspins, pos_group, pos_sub
100 INTEGER, ALLOCATABLE, DIMENSION(:) :: pos_info
101 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
102 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bib_c_2d, gamma_2d, gamma_pq
103 TYPE(cp_blacs_env_type), POINTER :: blacs_env
104 TYPE(cp_fm_struct_type), POINTER :: fm_struct_ia, fm_struct_ri
105 TYPE(cp_fm_type) :: fm_gamma, fm_gamma_pq, fm_gamma_pq_2, fm_gamma_pq_temp, &
106 fm_gamma_pq_temp_2, fm_ia_p, fm_y, operator_half, pq_half
107 TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_ia_new, gd_p, &
108 gd_p_new
109
110 CALL timeset(routinen, handle)
111
112 ! reshape the data into a 2D array
113 ! reorder the local data in such a way to help the next stage of matrix creation
114 ! now the data inside the group are divided into a ia x K matrix
115 dimen_ia = homo*virtual
116 CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
117 CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
118
119 CALL mat_3d_to_2d(b_ia_q, bib_c_2d, homo, my_b_size, virtual, my_b_virtual_start, &
120 my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
121
122 CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, gamma_2d, homo, my_b_size, virtual, my_b_virtual_start, &
123 my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
124
125 ! create the processor map and size arrays
126 CALL create_group_dist(gd_ia_new, para_env%num_pe)
127 CALL create_group_dist(gd_array_new, para_env%num_pe)
128 CALL create_group_dist(gd_p, para_env_sub%num_pe, dimen_ri)
129 CALL create_group_dist(gd_p_new, para_env%num_pe)
130
131 CALL get_group_dist(gd_p, para_env_sub%mepos, my_p_start, my_p_end, my_p_size)
132
133 CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
134 group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
135
136 DO i = 0, para_env%num_pe - 1
137 ! calculate position of the group
138 pos_group = i/para_env_sub%num_pe
139 ! calculate position in the subgroup
140 pos_sub = pos_info(i)
141 ! 1 -> rows, 2 -> cols
142 CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
143 CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
144 CALL get_group_dist(gd_p, pos_sub, gd_p_new, i)
145 END DO
146
147 ! create the blacs env
148 NULLIFY (blacs_env)
149 CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env)
150
151 NULLIFY (fm_struct_ia)
152 CALL cp_fm_struct_create(fm_struct_ia, context=blacs_env, nrow_global=dimen_ia, &
153 ncol_global=dimen_ri, para_env=para_env)
154
155 ! create the fm matrix Gamma
156 CALL array2fm(gamma_2d, fm_struct_ia, my_ia_start, my_ia_end, &
157 my_group_l_start, my_group_l_end, &
158 gd_ia_new, gd_array_new, &
159 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
160 fm_y)
161 ! create the fm matrix B_ia_P
162 CALL array2fm(bib_c_2d, fm_struct_ia, my_ia_start, my_ia_end, &
163 my_group_l_start, my_group_l_end, &
164 gd_ia_new, gd_array_new, &
165 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
166 fm_ia_p)
167
168 NULLIFY (fm_struct_ri)
169 CALL cp_fm_struct_create(fm_struct_ri, context=blacs_env, nrow_global=dimen_ri, &
170 ncol_global=dimen_ri, para_env=para_env)
171
172 ! since we will need (P|Q)^(-1/2) in the future, make a copy
173 CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_ri, my_p_start, my_p_end, &
174 my_group_l_start, my_group_l_end, &
175 gd_p_new, gd_array_new, &
176 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
177 pq_half, do_release_mat=.false.)
178
179 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
180 CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_ri, my_p_start, my_p_end, &
181 my_group_l_start, my_group_l_end, &
182 gd_p_new, gd_array_new, &
183 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
184 operator_half, do_release_mat=.false.)
185 END IF
186
187 CALL release_group_dist(gd_p_new)
188 CALL release_group_dist(gd_ia_new)
189 CALL release_group_dist(gd_array_new)
190
191 ! complete the gamma matrix Gamma_ia^P = sum_Q (Y_ia^Q * (Q|P)^(-1/2))
192 IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
193 CALL cp_fm_create(fm_gamma, fm_struct_ia, name="fm_Gamma")
194 CALL cp_fm_struct_release(fm_struct_ia)
195 ! perform the matrix multiplication
196 CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
197 matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
198 matrix_c=fm_gamma)
199 ! release the Y matrix
200 CALL cp_fm_release(fm_y)
201
202 ! complete gamma small (fm_Gamma_PQ)
203 ! create temp matrix
204 CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name="fm_Gamma_PQ_temp")
205 CALL parallel_gemm(transa="T", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
206 matrix_a=fm_gamma, matrix_b=fm_ia_p, beta=0.0_dp, &
207 matrix_c=fm_gamma_pq_temp)
208 CALL cp_fm_release(fm_ia_p)
209 ! create fm_Gamma_PQ matrix
210 CALL cp_fm_create(fm_gamma_pq, fm_struct_ri, name="fm_Gamma_PQ")
211 CALL cp_fm_struct_release(fm_struct_ri)
212 ! perform matrix multiplication
213 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
214 matrix_a=fm_gamma_pq_temp, matrix_b=pq_half, beta=0.0_dp, &
215 matrix_c=fm_gamma_pq)
216 CALL cp_fm_release(fm_gamma_pq_temp)
217 CALL cp_fm_release(pq_half)
218
219 ELSE
220 ! create temp matrix
221 CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name="fm_Gamma_PQ_temp")
222 CALL parallel_gemm(transa="T", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
223 matrix_a=fm_y, matrix_b=fm_ia_p, beta=0.0_dp, &
224 matrix_c=fm_gamma_pq_temp)
225 CALL cp_fm_release(fm_ia_p)
226
227 CALL cp_fm_create(fm_gamma, fm_struct_ia, name="fm_Gamma")
228 CALL cp_fm_struct_release(fm_struct_ia)
229 ! perform the matrix multiplication
230 CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
231 matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
232 matrix_c=fm_gamma)
233 CALL cp_fm_release(fm_y)
234
235 CALL cp_fm_create(fm_gamma_pq_temp_2, fm_struct_ri, name="fm_Gamma_PQ_temp_2")
236 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
237 matrix_a=fm_gamma_pq_temp, matrix_b=operator_half, beta=0.0_dp, &
238 matrix_c=fm_gamma_pq_temp_2)
239
240 CALL cp_fm_create(fm_gamma_pq_2, fm_struct_ri, name="fm_Gamma_PQ_2")
241 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
242 matrix_a=pq_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
243 matrix_c=fm_gamma_pq_temp)
244 CALL cp_fm_to_fm(fm_gamma_pq_temp, fm_gamma_pq_2)
245 CALL cp_fm_geadd(1.0_dp, "T", fm_gamma_pq_temp, 1.0_dp, fm_gamma_pq_2)
246 CALL cp_fm_release(fm_gamma_pq_temp)
247 CALL cp_fm_release(pq_half)
248
249 CALL cp_fm_create(fm_gamma_pq, fm_struct_ri)
250 CALL cp_fm_struct_release(fm_struct_ri)
251 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-1.0_dp, &
252 matrix_a=operator_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
253 matrix_c=fm_gamma_pq)
254 CALL cp_fm_release(operator_half)
255 CALL cp_fm_release(fm_gamma_pq_temp_2)
256 END IF
257
258 ! now redistribute the data, in this case we go back
259 ! to the original way the integrals were distributed
260 CALL fm2array(gamma_2d, my_ia_size, my_ia_start, my_ia_end, &
261 my_group_l_size, my_group_l_start, my_group_l_end, &
262 group_grid_2_mepos, mepos_2_grid_group, &
263 para_env_sub%num_pe, ngroup, &
264 fm_gamma)
265
266 ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
267 CALL fm2array(gamma_pq, my_p_size, my_p_start, my_p_end, &
268 my_group_l_size, my_group_l_start, my_group_l_end, &
269 group_grid_2_mepos, mepos_2_grid_group, &
270 para_env_sub%num_pe, ngroup, &
271 fm_gamma_pq)
272 IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ)) THEN
273 CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ)
274 ELSE
275 mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + gamma_pq
276 DEALLOCATE (gamma_pq)
277 END IF
278
279 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
280 ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
281 CALL fm2array(gamma_pq, my_p_size, my_p_start, my_p_end, &
282 my_group_l_size, my_group_l_start, my_group_l_end, &
283 group_grid_2_mepos, mepos_2_grid_group, &
284 para_env_sub%num_pe, ngroup, &
285 fm_gamma_pq_2)
286 IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2)) THEN
287 CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ_2)
288 ELSE
289 mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + gamma_pq
290 DEALLOCATE (gamma_pq)
291 END IF
292 END IF
293
294 ! allocate G_P_ia (DBCSR)
295 IF (.NOT. ALLOCATED(mp2_env%ri_grad%G_P_ia)) THEN
296 nspins = SIZE(mp2_env%ri_grad%mo_coeff_o)
297 ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_l_size, nspins))
298 DO ispin = 1, nspins
299 DO kkb = 1, my_group_l_size
300 NULLIFY (mp2_env%ri_grad%G_P_ia(kkb, ispin)%matrix)
301 END DO
302 END DO
303 END IF
304
305 ! create the Gamma_ia_P in DBCSR style
306 CALL create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, &
307 my_ia_start, my_ia_end, my_group_l_size, gd_ia, &
308 mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
309
310 DEALLOCATE (pos_info)
311 DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
312 CALL release_group_dist(gd_ia)
313 CALL release_group_dist(gd_p)
314
315 ! release blacs_env
316 CALL cp_blacs_env_release(blacs_env)
317
318 CALL timestop(handle)
319
320 END SUBROUTINE complete_gamma
321
322! **************************************************************************************************
323!> \brief Redistribute a 3d matrix to a 2d matrix
324!> \param B_ia_Q ...
325!> \param BIb_C_2D ...
326!> \param homo ...
327!> \param my_B_size ...
328!> \param virtual ...
329!> \param my_B_virtual_start ...
330!> \param my_ia_start ...
331!> \param my_ia_end ...
332!> \param my_ia_size ...
333!> \param my_group_L_size ...
334!> \param para_env_sub ...
335!> \param gd_B_virtual ...
336! **************************************************************************************************
337 SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
338 my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
339 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
340 INTENT(INOUT) :: b_ia_q
341 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
342 INTENT(OUT) :: bib_c_2d
343 INTEGER, INTENT(IN) :: homo, my_b_size, virtual, &
344 my_b_virtual_start, my_ia_start, &
345 my_ia_end, my_ia_size, my_group_l_size
346 TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
347 TYPE(group_dist_d1_type), INTENT(IN) :: gd_b_virtual
348
349 CHARACTER(LEN=*), PARAMETER :: routinen = 'mat_3d_to_2d'
350
351 INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
352 rec_b_virtual_end, rec_b_virtual_start
353 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_c_rec
354
355 CALL timeset(routinen, handle)
356
357 ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
358 bib_c_2d = 0.0_dp
359
360 DO iib = 1, homo
361 DO jjb = 1, my_b_size
362 ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
363 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
364 bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(iib, jjb, 1:my_group_l_size)
365 END IF
366 END DO
367 END DO
368
369 DO proc_shift = 1, para_env_sub%num_pe - 1
370 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
371 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
372
373 CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
374
375 ALLOCATE (bib_c_rec(homo, rec_b_size, my_group_l_size))
376 bib_c_rec = 0.0_dp
377
378 CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
379 bib_c_rec, proc_receive)
380
381 DO iib = 1, homo
382 DO jjb = 1, rec_b_size
383 ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
384 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
385 bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(iib, jjb, 1:my_group_l_size)
386 END IF
387 END DO
388 END DO
389
390 DEALLOCATE (bib_c_rec)
391 END DO
392 DEALLOCATE (b_ia_q)
393
394 CALL timestop(handle)
395 END SUBROUTINE mat_3d_to_2d
396
397! **************************************************************************************************
398!> \brief Redistribute a 3d matrix to a 2d matrix, specialized for Gamma_P_ia to account for a different order of indices
399!> \param B_ia_Q ...
400!> \param BIb_C_2D ...
401!> \param homo ...
402!> \param my_B_size ...
403!> \param virtual ...
404!> \param my_B_virtual_start ...
405!> \param my_ia_start ...
406!> \param my_ia_end ...
407!> \param my_ia_size ...
408!> \param my_group_L_size ...
409!> \param para_env_sub ...
410!> \param gd_B_virtual ...
411! **************************************************************************************************
412 SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
413 my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
414 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
415 INTENT(INOUT) :: b_ia_q
416 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
417 INTENT(OUT) :: bib_c_2d
418 INTEGER, INTENT(IN) :: homo, my_b_size, virtual, &
419 my_b_virtual_start, my_ia_start, &
420 my_ia_end, my_ia_size, my_group_l_size
421 TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
422 TYPE(group_dist_d1_type), INTENT(IN) :: gd_b_virtual
423
424 CHARACTER(LEN=*), PARAMETER :: routinen = 'mat_3d_to_2d_gamma'
425
426 INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
427 rec_b_virtual_end, rec_b_virtual_start
428 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_c_rec
429
430 CALL timeset(routinen, handle)
431
432 ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
433 bib_c_2d = 0.0_dp
434
435 DO iib = 1, homo
436 DO jjb = 1, my_b_size
437 ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
438 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
439 bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(jjb, iib, 1:my_group_l_size)
440 END IF
441 END DO
442 END DO
443
444 DO proc_shift = 1, para_env_sub%num_pe - 1
445 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
446 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
447
448 CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
449
450 ALLOCATE (bib_c_rec(rec_b_size, homo, my_group_l_size))
451 bib_c_rec = 0.0_dp
452
453 CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
454 bib_c_rec, proc_receive)
455
456 DO iib = 1, homo
457 DO jjb = 1, rec_b_size
458 ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
459 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
460 bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(jjb, iib, 1:my_group_l_size)
461 END IF
462 END DO
463 END DO
464
465 DEALLOCATE (bib_c_rec)
466 END DO
467 DEALLOCATE (b_ia_q)
468
469 CALL timestop(handle)
470 END SUBROUTINE mat_3d_to_2d_gamma
471
472! **************************************************************************************************
473!> \brief redistribute local part of array to fm
474!> \param mat2D ...
475!> \param fm_struct ...
476!> \param my_start_row ...
477!> \param my_end_row ...
478!> \param my_start_col ...
479!> \param my_end_col ...
480!> \param gd_row ...
481!> \param gd_col ...
482!> \param group_grid_2_mepos ...
483!> \param ngroup_row ...
484!> \param ngroup_col ...
485!> \param fm_mat ...
486!> \param integ_group_size ...
487!> \param color_group ...
488!> \param do_release_mat whether to release the array (default: yes)
489! **************************************************************************************************
490 SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
491 my_start_col, my_end_col, &
492 gd_row, gd_col, &
493 group_grid_2_mepos, ngroup_row, ngroup_col, &
494 fm_mat, integ_group_size, color_group, do_release_mat)
495
496 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
497 INTENT(INOUT) :: mat2d
498 TYPE(cp_fm_struct_type), POINTER :: fm_struct
499 INTEGER, INTENT(IN) :: my_start_row, my_end_row, my_start_col, &
500 my_end_col
501 TYPE(group_dist_d1_type), INTENT(IN) :: gd_row, gd_col
502 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos
503 INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
504 TYPE(cp_fm_type), INTENT(OUT) :: fm_mat
505 INTEGER, INTENT(IN), OPTIONAL :: integ_group_size, color_group
506 LOGICAL, INTENT(IN), OPTIONAL :: do_release_mat
507
508 CHARACTER(LEN=*), PARAMETER :: routinen = 'array2fm'
509
510 INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
511 i_sub, iib, iii, itmp(2), j_global, j_local, j_sub, jjb, my_num_col_blocks, &
512 my_num_row_blocks, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
513 nrow_local, num_cols, num_rec_cols, num_rows, number_of_rec, number_of_send, &
514 proc_receive, proc_send, proc_shift, rec_col_end, rec_col_size, rec_col_start, &
515 rec_counter, rec_pcol, rec_prow, rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, &
516 ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
517 INTEGER :: start_col_block, start_row_block
518 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_col_rec, map_rec_size, &
519 map_send_size
520 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blocks_ranges_col, blocks_ranges_row, &
521 grid_2_mepos, grid_ref_2_send_pos, &
522 mepos_2_grid
523 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
524 LOGICAL :: convert_pos, my_do_release_mat
525 REAL(kind=dp) :: part_col, part_row
526 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
527 DIMENSION(:) :: buffer_rec, buffer_send
528 TYPE(mp_para_env_type), POINTER :: para_env
529 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
530
531 CALL timeset(routinen, handle)
532
533 my_do_release_mat = .true.
534 IF (PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
535
536 CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
537
538 ! create the full matrix, (num_rows x num_cols)
539 CALL cp_fm_create(fm_mat, fm_struct, name="fm_mat")
540 CALL cp_fm_set_all(matrix=fm_mat, alpha=0.0_dp)
541
542 ! start filling procedure
543 ! fill the matrix
544 CALL cp_fm_get_info(matrix=fm_mat, &
545 nrow_local=nrow_local, &
546 ncol_local=ncol_local, &
547 row_indices=row_indices, &
548 col_indices=col_indices, &
549 nrow_block=nrow_block, &
550 ncol_block=ncol_block)
551 myprow = fm_mat%matrix_struct%context%mepos(1)
552 mypcol = fm_mat%matrix_struct%context%mepos(2)
553 nprow = fm_mat%matrix_struct%context%num_pe(1)
554 npcol = fm_mat%matrix_struct%context%num_pe(2)
555
556 ! start the communication part
557 ! 0) create array containing the processes position
558 ! and supporting infos
559 CALL timeset(routinen//"_info", handle2)
560 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
561 grid_2_mepos = 0
562 ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
563 grid_2_mepos(myprow, mypcol) = para_env%mepos
564 ! sum infos
565 CALL para_env%sum(grid_2_mepos)
566 CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
567
568 ! 1) loop over my local data and define a map for the proc to send data
569 ALLOCATE (map_send_size(0:para_env%num_pe - 1))
570 map_send_size = 0
571 dummy_proc = 0
572 DO jjb = my_start_col, my_end_col
573 send_pcol = cp_fm_indxg2p(jjb, ncol_block, dummy_proc, &
574 fm_mat%matrix_struct%first_p_pos(2), npcol)
575 DO iib = my_start_row, my_end_row
576 send_prow = cp_fm_indxg2p(iib, nrow_block, dummy_proc, &
577 fm_mat%matrix_struct%first_p_pos(1), nprow)
578 proc_send = grid_2_mepos(send_prow, send_pcol)
579 map_send_size(proc_send) = map_send_size(proc_send) + 1
580 END DO
581 END DO
582
583 ! 2) loop over my local data of fm_mat and define a map for the proc from which rec data
584 ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
585 map_rec_size = 0
586 part_row = real(num_rows, kind=dp)/real(ngroup_row, kind=dp)
587 part_col = real(num_cols, kind=dp)/real(ngroup_col, kind=dp)
588
589 ! In some cases we have to convert global positions to positions in a subgroup (RPA)
590 convert_pos = .false.
591 IF (PRESENT(integ_group_size) .AND. PRESENT(color_group)) convert_pos = .true.
592
593 DO jjb = 1, nrow_local
594 j_global = row_indices(jjb)
595 ! check the group holding this element
596 ! dirty way, if someone has a better idea ...
597 rec_prow = int(real(j_global - 1, kind=dp)/part_row)
598 rec_prow = max(0, rec_prow)
599 rec_prow = min(rec_prow, ngroup_row - 1)
600 DO
601 itmp = get_limit(num_rows, ngroup_row, rec_prow)
602 IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
603 IF (j_global < itmp(1)) rec_prow = rec_prow - 1
604 IF (j_global > itmp(2)) rec_prow = rec_prow + 1
605 END DO
606
607 IF (convert_pos) THEN
608 ! if the group is not in the same RPA group cycle
609 IF ((rec_prow/integ_group_size) .NE. color_group) cycle
610 ! convert global position to position into the RPA group
611 rec_prow = mod(rec_prow, integ_group_size)
612 END IF
613
614 DO iib = 1, ncol_local
615 i_global = col_indices(iib)
616 ! check the process in the group holding this element
617 rec_pcol = int(real(i_global - 1, kind=dp)/part_col)
618 rec_pcol = max(0, rec_pcol)
619 rec_pcol = min(rec_pcol, ngroup_col - 1)
620 DO
621 itmp = get_limit(num_cols, ngroup_col, rec_pcol)
622 IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
623 IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
624 IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
625 END DO
626
627 proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
628
629 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
630
631 END DO ! i_global
632 END DO ! j_global
633
634 ! 3) check if the local data has to be stored in the new fm_mat
635 IF (map_rec_size(para_env%mepos) > 0) THEN
636 DO jjb = 1, ncol_local
637 j_global = col_indices(jjb)
638 IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
639 DO iib = 1, nrow_local
640 i_global = row_indices(iib)
641 IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
642 fm_mat%local_data(iib, jjb) = mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1)
643 END IF
644 END DO
645 END IF
646 END DO
647 END IF
648 CALL timestop(handle2)
649
650 ! 4) reorder data in the send_buffer
651 CALL timeset(routinen//"_buffer_send", handle2)
652 ! count the number of messages to send
653 number_of_send = 0
654 DO proc_shift = 1, para_env%num_pe - 1
655 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
656 IF (map_send_size(proc_send) > 0) THEN
657 number_of_send = number_of_send + 1
658 END IF
659 END DO
660 ! allocate the structure that will hold the messages to be sent
661 ALLOCATE (buffer_send(number_of_send))
662 ! grid_ref_2_send_pos is an array (map) that given a pair
663 ! (ref_send_prow,ref_send_pcol) returns
664 ! the position in the buffer_send associated to that process
665 ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
666 grid_ref_2_send_pos = 0
667 ! finalize the allocation of buffer_send with the actual size
668 ! of each message (actual size is size_send_buffer)
669 send_counter = 0
670 DO proc_shift = 1, para_env%num_pe - 1
671 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
672 size_send_buffer = map_send_size(proc_send)
673 IF (map_send_size(proc_send) > 0) THEN
674 send_counter = send_counter + 1
675 ! allocate the sending buffer (msg)
676 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
677 buffer_send(send_counter)%msg = 0.0_dp
678 buffer_send(send_counter)%proc = proc_send
679 ! get the pointer to prow, pcol of the process that has
680 ! to receive this message
681 ref_send_prow = mepos_2_grid(1, proc_send)
682 ref_send_pcol = mepos_2_grid(2, proc_send)
683 ! save the rank of the process that has to receive this message
684 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
685 END IF
686 END DO
687
688 ! loop over the locally held data and fill the buffer_send
689 ! for doing that we need an array that keep track if the
690 ! sequential increase of the index for each message
691 ALLOCATE (iii_vet(number_of_send))
692 iii_vet = 0
693 DO iib = my_start_row, my_end_row
694 send_prow = cp_fm_indxg2p(iib, nrow_block, dummy_proc, &
695 fm_mat%matrix_struct%first_p_pos(1), nprow)
696 DO jjb = my_start_col, my_end_col
697 send_pcol = cp_fm_indxg2p(jjb, ncol_block, dummy_proc, &
698 fm_mat%matrix_struct%first_p_pos(2), npcol)
699 ! we don't need to send to ourselves
700 IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
701
702 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
703 iii_vet(send_counter) = iii_vet(send_counter) + 1
704 iii = iii_vet(send_counter)
705 buffer_send(send_counter)%msg(iii) = mat2d(iib - my_start_row + 1, jjb - my_start_col + 1)
706 END DO
707 END DO
708
709 DEALLOCATE (iii_vet)
710 DEALLOCATE (grid_ref_2_send_pos)
711 IF (my_do_release_mat) DEALLOCATE (mat2d)
712 CALL timestop(handle2)
713
714 ! 5) similarly to what done for the buffer_send
715 ! create the buffer for receive, post the message with irecv
716 ! and send the messages non-blocking
717 CALL timeset(routinen//"_isendrecv", handle2)
718 ! count the number of messages to be received
719 number_of_rec = 0
720 DO proc_shift = 1, para_env%num_pe - 1
721 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
722 IF (map_rec_size(proc_receive) > 0) THEN
723 number_of_rec = number_of_rec + 1
724 END IF
725 END DO
726
727 ALLOCATE (buffer_rec(number_of_rec))
728
729 rec_counter = 0
730 DO proc_shift = 1, para_env%num_pe - 1
731 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
732 size_rec_buffer = map_rec_size(proc_receive)
733 IF (map_rec_size(proc_receive) > 0) THEN
734 rec_counter = rec_counter + 1
735 ! prepare the buffer for receive
736 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
737 buffer_rec(rec_counter)%msg = 0.0_dp
738 buffer_rec(rec_counter)%proc = proc_receive
739 ! post the message to be received
740 CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
741 buffer_rec(rec_counter)%msg_req)
742 END IF
743 END DO
744
745 ! send messages
746 ALLOCATE (req_send(number_of_send))
747 send_counter = 0
748 DO proc_shift = 1, para_env%num_pe - 1
749 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
750 IF (map_send_size(proc_send) > 0) THEN
751 send_counter = send_counter + 1
752 CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
753 buffer_send(send_counter)%msg_req)
754 req_send(send_counter) = buffer_send(send_counter)%msg_req
755 END IF
756 END DO
757 CALL timestop(handle2)
758
759 ! 6) fill the fm_mat matrix with the messages received from the
760 ! other processes
761 CALL timeset(routinen//"_fill", handle2)
762 ! In order to perform this step efficiently first we have to know the
763 ! ranges of the blocks over which a given process hold its local data.
764 ! Start with the rows ...
765 my_num_row_blocks = 1
766 DO iib = 1, nrow_local - 1
767 IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
768 my_num_row_blocks = my_num_row_blocks + 1
769 END DO
770 ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
771 blocks_ranges_row = 0
772 blocks_ranges_row(1, 1) = row_indices(1)
773 iii = 1
774 DO iib = 1, nrow_local - 1
775 IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
776 iii = iii + 1
777 blocks_ranges_row(2, iii - 1) = row_indices(iib)
778 blocks_ranges_row(1, iii) = row_indices(iib + 1)
779 END DO
780 blocks_ranges_row(2, my_num_row_blocks) = row_indices(max(nrow_local, 1))
781 ! ... and columns
782 my_num_col_blocks = 1
783 DO jjb = 1, ncol_local - 1
784 IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
785 my_num_col_blocks = my_num_col_blocks + 1
786 END DO
787 ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
788 blocks_ranges_col = 0
789 blocks_ranges_col(1, 1) = col_indices(1)
790 iii = 1
791 DO jjb = 1, ncol_local - 1
792 IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
793 iii = iii + 1
794 blocks_ranges_col(2, iii - 1) = col_indices(jjb)
795 blocks_ranges_col(1, iii) = col_indices(jjb + 1)
796 END DO
797 blocks_ranges_col(2, my_num_col_blocks) = col_indices(max(ncol_local, 1))
798
799 rec_counter = 0
800 DO proc_shift = 1, para_env%num_pe - 1
801 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
802 size_rec_buffer = map_rec_size(proc_receive)
803
804 IF (map_rec_size(proc_receive) > 0) THEN
805 rec_counter = rec_counter + 1
806
807 CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
808
809 ! precompute the number of received columns and relative index
810 num_rec_cols = 0
811 DO jjb = 1, my_num_col_blocks
812 start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
813 end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
814 DO j_sub = start_col_block, end_col_block
815 num_rec_cols = num_rec_cols + 1
816 END DO
817 END DO
818 ALLOCATE (index_col_rec(num_rec_cols))
819 index_col_rec = 0
820 iii = 0
821 DO jjb = 1, my_num_col_blocks
822 start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
823 end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
824 DO j_sub = start_col_block, end_col_block
825 iii = iii + 1
826 j_local = cp_fm_indxg2l(j_sub, ncol_block, dummy_proc, &
827 fm_mat%matrix_struct%first_p_pos(2), npcol)
828 index_col_rec(iii) = j_local
829 END DO
830 END DO
831
832 CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
833
834 ! wait for the message
835 CALL buffer_rec(rec_counter)%msg_req%wait()
836
837 ! fill the local data in fm_mat
838 iii = 0
839 DO iib = 1, my_num_row_blocks
840 start_row_block = max(blocks_ranges_row(1, iib), rec_row_start)
841 end_row_block = min(blocks_ranges_row(2, iib), rec_row_end)
842 DO i_sub = start_row_block, end_row_block
843 i_local = cp_fm_indxg2l(i_sub, nrow_block, dummy_proc, &
844 fm_mat%matrix_struct%first_p_pos(1), nprow)
845 DO jjb = 1, num_rec_cols
846 iii = iii + 1
847 j_local = index_col_rec(jjb)
848 fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
849 END DO
850 END DO
851 END DO
852
853 DEALLOCATE (buffer_rec(rec_counter)%msg)
854 DEALLOCATE (index_col_rec)
855 END IF
856 END DO
857 DEALLOCATE (buffer_rec)
858
859 DEALLOCATE (blocks_ranges_row)
860 DEALLOCATE (blocks_ranges_col)
861
862 CALL timestop(handle2)
863
864 ! 7) Finally wait for all messeges to be sent
865 CALL timeset(routinen//"_waitall", handle2)
866 CALL mp_waitall(req_send(:))
867 DO send_counter = 1, number_of_send
868 DEALLOCATE (buffer_send(send_counter)%msg)
869 END DO
870 DEALLOCATE (buffer_send)
871 CALL timestop(handle2)
872
873 DEALLOCATE (map_send_size)
874 DEALLOCATE (map_rec_size)
875 DEALLOCATE (grid_2_mepos)
876 DEALLOCATE (mepos_2_grid)
877
878 CALL timestop(handle)
879
880 END SUBROUTINE array2fm
881
882! **************************************************************************************************
883!> \brief redistribute fm to local part of array
884!> \param mat2D ...
885!> \param my_rows ...
886!> \param my_start_row ...
887!> \param my_end_row ...
888!> \param my_cols ...
889!> \param my_start_col ...
890!> \param my_end_col ...
891!> \param group_grid_2_mepos ...
892!> \param mepos_2_grid_group ...
893!> \param ngroup_row ...
894!> \param ngroup_col ...
895!> \param fm_mat ...
896! **************************************************************************************************
897 SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, &
898 my_cols, my_start_col, my_end_col, &
899 group_grid_2_mepos, mepos_2_grid_group, &
900 ngroup_row, ngroup_col, &
901 fm_mat)
902
903 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
904 INTENT(OUT) :: mat2d
905 INTEGER, INTENT(IN) :: my_rows, my_start_row, my_end_row, &
906 my_cols, my_start_col, my_end_col
907 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos, mepos_2_grid_group
908 INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
909 TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat
910
911 CHARACTER(LEN=*), PARAMETER :: routinen = 'fm2array'
912
913 INTEGER :: dummy_proc, handle, handle2, i_global, iib, iii, itmp(2), j_global, jjb, mypcol, &
914 myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, nrow_local, num_cols, &
915 num_rec_rows, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
916 proc_shift, rec_col_size, rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, &
917 ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
918 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_row_rec, map_rec_size, &
919 map_send_size
920 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
921 mepos_2_grid, sizes
922 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
923 REAL(kind=dp) :: part_col, part_row
924 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
925 DIMENSION(:) :: buffer_rec, buffer_send
926 TYPE(mp_para_env_type), POINTER :: para_env
927 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
928
929 CALL timeset(routinen, handle)
930
931 ! allocate the array
932 ALLOCATE (mat2d(my_rows, my_cols))
933 mat2d = 0.0_dp
934
935 ! start procedure
936 ! get information of from the full matrix
937 CALL cp_fm_get_info(matrix=fm_mat, &
938 nrow_local=nrow_local, &
939 ncol_local=ncol_local, &
940 row_indices=row_indices, &
941 col_indices=col_indices, &
942 nrow_block=nrow_block, &
943 ncol_block=ncol_block, &
944 nrow_global=num_rows, &
945 ncol_global=num_cols, &
946 para_env=para_env)
947 myprow = fm_mat%matrix_struct%context%mepos(1)
948 mypcol = fm_mat%matrix_struct%context%mepos(2)
949 nprow = fm_mat%matrix_struct%context%num_pe(1)
950 npcol = fm_mat%matrix_struct%context%num_pe(2)
951
952 ! start the communication part
953 ! 0) create array containing the processes position
954 ! and supporting infos
955 CALL timeset(routinen//"_info", handle2)
956 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
957 grid_2_mepos = 0
958 ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
959
960 ! fill the info array
961 grid_2_mepos(myprow, mypcol) = para_env%mepos
962
963 ! sum infos
964 CALL para_env%sum(grid_2_mepos)
965 CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
966
967 ! 1) loop over my local data and define a map for the proc to send data
968 ALLOCATE (map_send_size(0:para_env%num_pe - 1))
969 map_send_size = 0
970 part_row = real(num_rows, kind=dp)/real(ngroup_row, kind=dp)
971 part_col = real(num_cols, kind=dp)/real(ngroup_col, kind=dp)
972
973 DO jjb = 1, ncol_local
974 j_global = col_indices(jjb)
975 ! check the group holding this element
976 ! dirty way, if someone has a better idea ...
977 send_pcol = int(real(j_global - 1, kind=dp)/part_col)
978 send_pcol = max(0, send_pcol)
979 send_pcol = min(send_pcol, ngroup_col - 1)
980 DO
981 itmp = get_limit(num_cols, ngroup_col, send_pcol)
982 IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
983 IF (j_global < itmp(1)) send_pcol = send_pcol - 1
984 IF (j_global > itmp(2)) send_pcol = send_pcol + 1
985 END DO
986
987 DO iib = 1, nrow_local
988 i_global = row_indices(iib)
989 ! check the process in the group holding this element
990 send_prow = int(real(i_global - 1, kind=dp)/part_row)
991 send_prow = max(0, send_prow)
992 send_prow = min(send_prow, ngroup_row - 1)
993 DO
994 itmp = get_limit(num_rows, ngroup_row, send_prow)
995 IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
996 IF (i_global < itmp(1)) send_prow = send_prow - 1
997 IF (i_global > itmp(2)) send_prow = send_prow + 1
998 END DO
999
1000 proc_send = group_grid_2_mepos(send_prow, send_pcol)
1001
1002 map_send_size(proc_send) = map_send_size(proc_send) + 1
1003
1004 END DO ! i_global
1005 END DO ! j_global
1006
1007 ! 2) loop over my local data of the array and define a map for the proc from which rec data
1008 ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
1009 map_rec_size = 0
1010 dummy_proc = 0
1011 DO jjb = my_start_col, my_end_col
1012 rec_pcol = cp_fm_indxg2p(jjb, ncol_block, dummy_proc, &
1013 fm_mat%matrix_struct%first_p_pos(2), npcol)
1014 DO iib = my_start_row, my_end_row
1015 rec_prow = cp_fm_indxg2p(iib, nrow_block, dummy_proc, &
1016 fm_mat%matrix_struct%first_p_pos(1), nprow)
1017 proc_receive = grid_2_mepos(rec_prow, rec_pcol)
1018 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1019 END DO
1020 END DO
1021
1022 ! 3) check if the local data in the fm_mat has to be stored in the new array
1023 IF (map_rec_size(para_env%mepos) > 0) THEN
1024 DO jjb = 1, ncol_local
1025 j_global = col_indices(jjb)
1026 IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1027 DO iib = 1, nrow_local
1028 i_global = row_indices(iib)
1029 IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1030 mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iib, jjb)
1031 END IF
1032 END DO
1033 END IF
1034 END DO
1035 END IF
1036 CALL timestop(handle2)
1037
1038 ! 4) reorder data in the send_buffer
1039 CALL timeset(routinen//"_buffer_send", handle2)
1040 ! count the number of messages to send
1041 number_of_send = 0
1042 DO proc_shift = 1, para_env%num_pe - 1
1043 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1044 IF (map_send_size(proc_send) > 0) THEN
1045 number_of_send = number_of_send + 1
1046 END IF
1047 END DO
1048 ! allocate the structure that will hold the messages to be sent
1049 ALLOCATE (buffer_send(number_of_send))
1050 ! grid_ref_2_send_pos is an array (map) that given a pair
1051 ! (ref_send_prow,ref_send_pcol) returns
1052 ! the position in the buffer_send associated to that process
1053
1054 ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
1055 grid_ref_2_send_pos = 0
1056
1057 ! finalize the allocation of buffer_send with the actual size
1058 ! of each message (actual size is size_send_buffer)
1059 send_counter = 0
1060 DO proc_shift = 1, para_env%num_pe - 1
1061 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1062 size_send_buffer = map_send_size(proc_send)
1063 IF (map_send_size(proc_send) > 0) THEN
1064 send_counter = send_counter + 1
1065 ! allocate the sending buffer (msg)
1066 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1067 buffer_send(send_counter)%msg = 0.0_dp
1068 buffer_send(send_counter)%proc = proc_send
1069 ! get the pointer to prow, pcol of the process that has
1070 ! to receive this message
1071 ref_send_prow = mepos_2_grid_group(1, proc_send)
1072 ref_send_pcol = mepos_2_grid_group(2, proc_send)
1073 ! save the rank of the process that has to receive this message
1074 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1075 END IF
1076 END DO
1077
1078 ! loop over the locally held data and fill the buffer_send
1079 ! for doing that we need an array that keep track if the
1080 ! sequential increase of the index for each message
1081 ALLOCATE (iii_vet(number_of_send))
1082 iii_vet = 0
1083 DO jjb = 1, ncol_local
1084 j_global = col_indices(jjb)
1085 ! check the group holding this element
1086 ! dirty way, if someone has a better idea ...
1087 send_pcol = int(real(j_global - 1, kind=dp)/part_col)
1088 send_pcol = max(0, send_pcol)
1089 send_pcol = min(send_pcol, ngroup_col - 1)
1090 DO
1091 itmp = get_limit(num_cols, ngroup_col, send_pcol)
1092 IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
1093 IF (j_global < itmp(1)) send_pcol = send_pcol - 1
1094 IF (j_global > itmp(2)) send_pcol = send_pcol + 1
1095 END DO
1096
1097 DO iib = 1, nrow_local
1098 i_global = row_indices(iib)
1099 ! check the process in the group holding this element
1100 send_prow = int(real(i_global - 1, kind=dp)/part_row)
1101 send_prow = max(0, send_prow)
1102 send_prow = min(send_prow, ngroup_row - 1)
1103 DO
1104 itmp = get_limit(num_rows, ngroup_row, send_prow)
1105 IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
1106 IF (i_global < itmp(1)) send_prow = send_prow - 1
1107 IF (i_global > itmp(2)) send_prow = send_prow + 1
1108 END DO
1109 ! we don't need to send to ourselves
1110 IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
1111
1112 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1113 iii_vet(send_counter) = iii_vet(send_counter) + 1
1114 iii = iii_vet(send_counter)
1115 buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iib, jjb)
1116 END DO
1117 END DO
1118
1119 DEALLOCATE (iii_vet)
1120 DEALLOCATE (grid_ref_2_send_pos)
1121 CALL timestop(handle2)
1122
1123 ! 5) similarly to what done for the buffer_send
1124 ! create the buffer for receive, post the message with irecv
1125 ! and send the messages with non-blocking
1126 CALL timeset(routinen//"_isendrecv", handle2)
1127 ! count the number of messages to be received
1128 number_of_rec = 0
1129 DO proc_shift = 1, para_env%num_pe - 1
1130 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1131 IF (map_rec_size(proc_receive) > 0) THEN
1132 number_of_rec = number_of_rec + 1
1133 END IF
1134 END DO
1135
1136 ALLOCATE (buffer_rec(number_of_rec))
1137
1138 rec_counter = 0
1139 DO proc_shift = 1, para_env%num_pe - 1
1140 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1141 size_rec_buffer = map_rec_size(proc_receive)
1142 IF (map_rec_size(proc_receive) > 0) THEN
1143 rec_counter = rec_counter + 1
1144 ! prepare the buffer for receive
1145 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1146 buffer_rec(rec_counter)%msg = 0.0_dp
1147 buffer_rec(rec_counter)%proc = proc_receive
1148 ! post the message to be received
1149 CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1150 buffer_rec(rec_counter)%msg_req)
1151 END IF
1152 END DO
1153
1154 ! send messages
1155 ALLOCATE (req_send(number_of_send))
1156 send_counter = 0
1157 DO proc_shift = 1, para_env%num_pe - 1
1158 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1159 IF (map_send_size(proc_send) > 0) THEN
1160 send_counter = send_counter + 1
1161 CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
1162 buffer_send(send_counter)%msg_req)
1163 req_send(send_counter) = buffer_send(send_counter)%msg_req
1164 END IF
1165 END DO
1166 CALL timestop(handle2)
1167
1168 ! 6) fill the fm_mat matrix with the messages received from the
1169 ! other processes
1170 CALL timeset(routinen//"_fill", handle2)
1171 CALL cp_fm_get_info(matrix=fm_mat, &
1172 nrow_local=nrow_local, &
1173 ncol_local=ncol_local)
1174 ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
1175 CALL para_env%allgather([nrow_local, ncol_local], sizes)
1176 iib = maxval(sizes(1, :))
1177 ALLOCATE (index_row_rec(iib))
1178 index_row_rec = 0
1179 rec_counter = 0
1180 DO proc_shift = 1, para_env%num_pe - 1
1181 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1182 size_rec_buffer = map_rec_size(proc_receive)
1183
1184 IF (map_rec_size(proc_receive) > 0) THEN
1185 rec_counter = rec_counter + 1
1186
1187 rec_col_size = sizes(2, proc_receive)
1188 rec_row_size = sizes(1, proc_receive)
1189
1190 ! precompute the indeces of the rows
1191 num_rec_rows = 0
1192 DO iib = 1, rec_row_size
1193 i_global = cp_fm_indxl2g(iib, nrow_block, mepos_2_grid(1, proc_receive), &
1194 fm_mat%matrix_struct%first_p_pos(1), nprow)
1195 IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1196 num_rec_rows = num_rec_rows + 1
1197 index_row_rec(num_rec_rows) = i_global
1198 END IF
1199 END DO
1200
1201 CALL buffer_rec(rec_counter)%msg_req%wait()
1202
1203 iii = 0
1204 DO jjb = 1, rec_col_size
1205 j_global = cp_fm_indxl2g(jjb, ncol_block, mepos_2_grid(2, proc_receive), &
1206 fm_mat%matrix_struct%first_p_pos(2), npcol)
1207 IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1208 DO iib = 1, num_rec_rows
1209 i_global = index_row_rec(iib)
1210 iii = iii + 1
1211 mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
1212 END DO
1213 END IF
1214 END DO
1215
1216 DEALLOCATE (buffer_rec(rec_counter)%msg)
1217 END IF
1218 END DO
1219 DEALLOCATE (buffer_rec)
1220 DEALLOCATE (index_row_rec)
1221 CALL cp_fm_release(fm_mat)
1222 CALL timestop(handle2)
1223
1224 ! 7) Finally wait for all messeges to be sent
1225 CALL timeset(routinen//"_waitall", handle2)
1226 CALL mp_waitall(req_send(:))
1227 DO send_counter = 1, number_of_send
1228 DEALLOCATE (buffer_send(send_counter)%msg)
1229 END DO
1230 DEALLOCATE (buffer_send)
1231 CALL timestop(handle2)
1232
1233 CALL timestop(handle)
1234
1235 END SUBROUTINE fm2array
1236
1237! **************************************************************************************************
1238!> \brief redistribute 2D representation of 3d tensor to a set of dbcsr matrices
1239!> \param Gamma_2D ...
1240!> \param homo ...
1241!> \param virtual ...
1242!> \param dimen_ia ...
1243!> \param para_env_sub ...
1244!> \param my_ia_start ...
1245!> \param my_ia_end ...
1246!> \param my_group_L_size ...
1247!> \param gd_ia ...
1248!> \param dbcsr_Gamma ...
1249!> \param mo_coeff_o ...
1250! **************************************************************************************************
1251 SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
1252 my_ia_start, my_ia_end, my_group_L_size, &
1253 gd_ia, dbcsr_Gamma, mo_coeff_o)
1254 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: gamma_2d
1255 INTEGER, INTENT(IN) :: homo, virtual, dimen_ia
1256 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env_sub
1257 INTEGER, INTENT(IN) :: my_ia_start, my_ia_end, my_group_l_size
1258 TYPE(group_dist_d1_type), INTENT(IN) :: gd_ia
1259 TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT) :: dbcsr_gamma
1260 TYPE(dbcsr_type), INTENT(INOUT) :: mo_coeff_o
1261
1262 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_dbcsr_gamma'
1263
1264 INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iib, iii, itmp(2), j_global, &
1265 j_local, jjb, jjj, kkb, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
1266 nrow_local, number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, &
1267 rec_counter, rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, &
1268 ref_send_pcol, ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, &
1269 size_send_buffer
1270 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size
1271 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
1272 indeces_map_my, mepos_2_grid
1273 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1274 REAL(kind=dp) :: part_ia
1275 TYPE(cp_blacs_env_type), POINTER :: blacs_env
1276 TYPE(cp_fm_struct_type), POINTER :: fm_struct
1277 TYPE(cp_fm_type) :: fm_ia
1278 TYPE(index_map), ALLOCATABLE, DIMENSION(:) :: indeces_rec
1279 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
1280 DIMENSION(:) :: buffer_rec, buffer_send
1281 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
1282
1283 CALL timeset(routinen, handle)
1284
1285 ! create sub blacs env
1286 NULLIFY (blacs_env)
1287 CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env_sub)
1288
1289 ! create the fm_ia buffer matrix
1290 NULLIFY (fm_struct)
1291 CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=homo, &
1292 ncol_global=virtual, para_env=para_env_sub)
1293 CALL cp_fm_create(fm_ia, fm_struct, name="fm_ia")
1294 ! release structure
1295 CALL cp_fm_struct_release(fm_struct)
1296 ! release blacs_env
1297 CALL cp_blacs_env_release(blacs_env)
1298
1299 ! get array information
1300 CALL cp_fm_get_info(matrix=fm_ia, &
1301 nrow_local=nrow_local, &
1302 ncol_local=ncol_local, &
1303 row_indices=row_indices, &
1304 col_indices=col_indices, &
1305 nrow_block=nrow_block, &
1306 ncol_block=ncol_block)
1307 myprow = fm_ia%matrix_struct%context%mepos(1)
1308 mypcol = fm_ia%matrix_struct%context%mepos(2)
1309 nprow = fm_ia%matrix_struct%context%num_pe(1)
1310 npcol = fm_ia%matrix_struct%context%num_pe(2)
1311
1312 ! 0) create array containing the processes position and supporting infos
1313 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
1314 grid_2_mepos = 0
1315 ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1316 ! fill the info array
1317 grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
1318 ! sum infos
1319 CALL para_env_sub%sum(grid_2_mepos)
1320 CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1321
1322 ! loop over local index range and define the sending map
1323 ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
1324 map_send_size = 0
1325 dummy_proc = 0
1326 DO iaia = my_ia_start, my_ia_end
1327 i_global = (iaia - 1)/virtual + 1
1328 j_global = mod(iaia - 1, virtual) + 1
1329 send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1330 fm_ia%matrix_struct%first_p_pos(1), nprow)
1331 send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1332 fm_ia%matrix_struct%first_p_pos(2), npcol)
1333 proc_send = grid_2_mepos(send_prow, send_pcol)
1334 map_send_size(proc_send) = map_send_size(proc_send) + 1
1335 END DO
1336
1337 ! loop over local data of fm_ia and define the receiving map
1338 ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
1339 map_rec_size = 0
1340 part_ia = real(dimen_ia, kind=dp)/real(para_env_sub%num_pe, kind=dp)
1341
1342 DO iib = 1, nrow_local
1343 i_global = row_indices(iib)
1344 DO jjb = 1, ncol_local
1345 j_global = col_indices(jjb)
1346 iaia = (i_global - 1)*virtual + j_global
1347 proc_receive = int(real(iaia - 1, kind=dp)/part_ia)
1348 proc_receive = max(0, proc_receive)
1349 proc_receive = min(proc_receive, para_env_sub%num_pe - 1)
1350 DO
1351 itmp = get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
1352 IF (iaia >= itmp(1) .AND. iaia <= itmp(2)) EXIT
1353 IF (iaia < itmp(1)) proc_receive = proc_receive - 1
1354 IF (iaia > itmp(2)) proc_receive = proc_receive + 1
1355 END DO
1356 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1357 END DO
1358 END DO
1359
1360 ! allocate the buffer for sending data
1361 number_of_send = 0
1362 DO proc_shift = 1, para_env_sub%num_pe - 1
1363 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1364 IF (map_send_size(proc_send) > 0) THEN
1365 number_of_send = number_of_send + 1
1366 END IF
1367 END DO
1368 ! allocate the structure that will hold the messages to be sent
1369 ALLOCATE (buffer_send(number_of_send))
1370 ! and the map from the grid of processess to the message position
1371 ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
1372 grid_ref_2_send_pos = 0
1373 ! finally allocate each message
1374 send_counter = 0
1375 DO proc_shift = 1, para_env_sub%num_pe - 1
1376 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1377 size_send_buffer = map_send_size(proc_send)
1378 IF (map_send_size(proc_send) > 0) THEN
1379 send_counter = send_counter + 1
1380 ! allocate the sending buffer (msg)
1381 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1382 buffer_send(send_counter)%proc = proc_send
1383 ! get the pointer to prow, pcol of the process that has
1384 ! to receive this message
1385 ref_send_prow = mepos_2_grid(1, proc_send)
1386 ref_send_pcol = mepos_2_grid(2, proc_send)
1387 ! save the rank of the process that has to receive this message
1388 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1389 END IF
1390 END DO
1391
1392 ! allocate the buffer for receiving data
1393 number_of_rec = 0
1394 DO proc_shift = 1, para_env_sub%num_pe - 1
1395 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1396 IF (map_rec_size(proc_receive) > 0) THEN
1397 number_of_rec = number_of_rec + 1
1398 END IF
1399 END DO
1400 ! allocate the structure that will hold the messages to be received
1401 ! and relative indeces
1402 ALLOCATE (buffer_rec(number_of_rec))
1403 ALLOCATE (indeces_rec(number_of_rec))
1404 ! finally allocate each message and fill the array of indeces
1405 rec_counter = 0
1406 DO proc_shift = 1, para_env_sub%num_pe - 1
1407 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1408 size_rec_buffer = map_rec_size(proc_receive)
1409 IF (map_rec_size(proc_receive) > 0) THEN
1410 rec_counter = rec_counter + 1
1411 ! prepare the buffer for receive
1412 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1413 buffer_rec(rec_counter)%proc = proc_receive
1414 ! create the indeces array
1415 ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
1416 indeces_rec(rec_counter)%map = 0
1417 CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
1418 iii = 0
1419 DO iaia = rec_iaia_start, rec_iaia_end
1420 i_global = (iaia - 1)/virtual + 1
1421 j_global = mod(iaia - 1, virtual) + 1
1422 rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1423 fm_ia%matrix_struct%first_p_pos(1), nprow)
1424 rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1425 fm_ia%matrix_struct%first_p_pos(2), npcol)
1426 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1427 iii = iii + 1
1428 i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
1429 fm_ia%matrix_struct%first_p_pos(1), nprow)
1430 j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
1431 fm_ia%matrix_struct%first_p_pos(2), npcol)
1432 indeces_rec(rec_counter)%map(1, iii) = i_local
1433 indeces_rec(rec_counter)%map(2, iii) = j_local
1434 END DO
1435 END IF
1436 END DO
1437 ! and create the index map for my local data
1438 IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1439 size_rec_buffer = map_rec_size(para_env_sub%mepos)
1440 ALLOCATE (indeces_map_my(2, size_rec_buffer))
1441 indeces_map_my = 0
1442 iii = 0
1443 DO iaia = my_ia_start, my_ia_end
1444 i_global = (iaia - 1)/virtual + 1
1445 j_global = mod(iaia - 1, virtual) + 1
1446 rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1447 fm_ia%matrix_struct%first_p_pos(1), nprow)
1448 rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1449 fm_ia%matrix_struct%first_p_pos(2), npcol)
1450 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1451 iii = iii + 1
1452 i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
1453 fm_ia%matrix_struct%first_p_pos(1), nprow)
1454 j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
1455 fm_ia%matrix_struct%first_p_pos(2), npcol)
1456 indeces_map_my(1, iii) = i_local
1457 indeces_map_my(2, iii) = j_local
1458 END DO
1459 END IF
1460
1461 ! auxiliary vector of indeces for the send buffer
1462 ALLOCATE (iii_vet(number_of_send))
1463 ! vector for the send requests
1464 ALLOCATE (req_send(number_of_send))
1465 ! loop over auxiliary basis function and redistribute into a fm
1466 ! and then compy the fm into a dbcsr matrix
1467 DO kkb = 1, my_group_l_size
1468 ! zero the matries of the buffers and post the messages to be received
1469 CALL cp_fm_set_all(matrix=fm_ia, alpha=0.0_dp)
1470 rec_counter = 0
1471 DO proc_shift = 1, para_env_sub%num_pe - 1
1472 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1473 IF (map_rec_size(proc_receive) > 0) THEN
1474 rec_counter = rec_counter + 1
1475 buffer_rec(rec_counter)%msg = 0.0_dp
1476 CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1477 buffer_rec(rec_counter)%msg_req)
1478 END IF
1479 END DO
1480 ! fill the sending buffer and send the messages
1481 DO send_counter = 1, number_of_send
1482 buffer_send(send_counter)%msg = 0.0_dp
1483 END DO
1484 iii_vet = 0
1485 jjj = 0
1486 DO iaia = my_ia_start, my_ia_end
1487 i_global = (iaia - 1)/virtual + 1
1488 j_global = mod(iaia - 1, virtual) + 1
1489 send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1490 fm_ia%matrix_struct%first_p_pos(1), nprow)
1491 send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1492 fm_ia%matrix_struct%first_p_pos(2), npcol)
1493 proc_send = grid_2_mepos(send_prow, send_pcol)
1494 ! we don't need to send to ourselves
1495 IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN
1496 ! filling fm_ia with local data
1497 jjj = jjj + 1
1498 i_local = indeces_map_my(1, jjj)
1499 j_local = indeces_map_my(2, jjj)
1500 fm_ia%local_data(i_local, j_local) = gamma_2d(iaia - my_ia_start + 1, kkb)
1501 ELSE
1502 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1503 iii_vet(send_counter) = iii_vet(send_counter) + 1
1504 iii = iii_vet(send_counter)
1505 buffer_send(send_counter)%msg(iii) = gamma_2d(iaia - my_ia_start + 1, kkb)
1506 END IF
1507 END DO
1508 req_send = mp_request_null
1509 send_counter = 0
1510 DO proc_shift = 1, para_env_sub%num_pe - 1
1511 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1512 IF (map_send_size(proc_send) > 0) THEN
1513 send_counter = send_counter + 1
1514 CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
1515 buffer_send(send_counter)%msg_req)
1516 req_send(send_counter) = buffer_send(send_counter)%msg_req
1517 END IF
1518 END DO
1519
1520 ! receive the massages and fill the fm_ia
1521 rec_counter = 0
1522 DO proc_shift = 1, para_env_sub%num_pe - 1
1523 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1524 size_rec_buffer = map_rec_size(proc_receive)
1525 IF (map_rec_size(proc_receive) > 0) THEN
1526 rec_counter = rec_counter + 1
1527 ! wait for the message
1528 CALL buffer_rec(rec_counter)%msg_req%wait()
1529 DO iii = 1, size_rec_buffer
1530 i_local = indeces_rec(rec_counter)%map(1, iii)
1531 j_local = indeces_rec(rec_counter)%map(2, iii)
1532 fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1533 END DO
1534 END IF
1535 END DO
1536
1537 ! wait all
1538 CALL mp_waitall(req_send(:))
1539
1540 ! now create the DBCSR matrix and copy fm_ia into it
1541 ALLOCATE (dbcsr_gamma(kkb)%matrix)
1542 CALL cp_dbcsr_m_by_n_from_template(dbcsr_gamma(kkb)%matrix, &
1543 template=mo_coeff_o, &
1544 m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
1545 CALL copy_fm_to_dbcsr(fm_ia, dbcsr_gamma(kkb)%matrix, keep_sparsity=.false.)
1546
1547 END DO
1548
1549 ! deallocate stuff
1550 DEALLOCATE (gamma_2d)
1551 DEALLOCATE (iii_vet)
1552 DEALLOCATE (req_send)
1553 IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1554 DEALLOCATE (indeces_map_my)
1555 END IF
1556 DO rec_counter = 1, number_of_rec
1557 DEALLOCATE (indeces_rec(rec_counter)%map)
1558 DEALLOCATE (buffer_rec(rec_counter)%msg)
1559 END DO
1560 DEALLOCATE (indeces_rec)
1561 DEALLOCATE (buffer_rec)
1562 DO send_counter = 1, number_of_send
1563 DEALLOCATE (buffer_send(send_counter)%msg)
1564 END DO
1565 DEALLOCATE (buffer_send)
1566 DEALLOCATE (map_send_size)
1567 DEALLOCATE (map_rec_size)
1568 DEALLOCATE (grid_2_mepos)
1569 DEALLOCATE (mepos_2_grid)
1570
1571 ! release buffer matrix
1572 CALL cp_fm_release(fm_ia)
1573
1574 CALL timestop(handle)
1575
1576 END SUBROUTINE create_dbcsr_gamma
1577
1578! **************************************************************************************************
1579!> \brief prepare array for redistribution
1580!> \param para_env ...
1581!> \param para_env_sub ...
1582!> \param ngroup ...
1583!> \param group_grid_2_mepos ...
1584!> \param mepos_2_grid_group ...
1585!> \param pos_info ...
1586! **************************************************************************************************
1587 SUBROUTINE prepare_redistribution(para_env, para_env_sub, ngroup, &
1588 group_grid_2_mepos, mepos_2_grid_group, &
1589 pos_info)
1590 TYPE(mp_para_env_type), INTENT(IN) :: para_env, para_env_sub
1591 INTEGER, INTENT(IN) :: ngroup
1592 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
1593 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
1594 OPTIONAL :: pos_info
1595
1596 INTEGER :: i, pos_group, pos_sub
1597 INTEGER, ALLOCATABLE, DIMENSION(:) :: my_pos_info
1598
1599 ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
1600 CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
1601
1602 ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
1603 group_grid_2_mepos = 0
1604 ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
1605 mepos_2_grid_group = 0
1606
1607 DO i = 0, para_env%num_pe - 1
1608 ! calculate position of the group
1609 pos_group = i/para_env_sub%num_pe
1610 ! calculate position in the subgroup
1611 pos_sub = my_pos_info(i)
1612 ! fill the map from the grid of groups to process
1613 group_grid_2_mepos(pos_sub, pos_group) = i
1614 ! and the opposite, from the global pos to the grid pos
1615 mepos_2_grid_group(1, i) = pos_sub
1616 mepos_2_grid_group(2, i) = pos_group
1617 END DO
1618
1619 IF (PRESENT(pos_info)) CALL move_alloc(my_pos_info, pos_info)
1620
1621 END SUBROUTINE prepare_redistribution
1622
1623END MODULE mp2_ri_grad_util
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_m_by_n_from_template(matrix, template, m, n, sym, data_type)
Utility function to create an arbitrary shaped dbcsr matrix with the same processor grid as the templ...
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
basic linear algebra operations for full matrices
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
integer function, public cp_fm_indxl2g(indxloc, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXL2G that computes the global index of a distributed matrix entry po...
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_indxg2l(indxglob, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXG2L that computes the local index of a distributed matrix entry poi...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
integer function, public cp_fm_indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXG2P that computes the process coordinate which possesses the entry ...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Types to describe group distributions.
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
Interface to the message passing library MPI.
type(mp_request_type), parameter, public mp_request_null
Routines for calculating RI-MP2 gradients.
subroutine, public complete_gamma(mp2_env, b_ia_q, dimen_ri, homo, virtual, para_env, para_env_sub, ngroup, my_group_l_size, my_group_l_start, my_group_l_end, my_b_size, my_b_virtual_start, gd_array, gd_b_virtual, kspin)
complete the calculation of the Gamma matrices
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public array2fm(mat2d, fm_struct, my_start_row, my_end_row, my_start_col, my_end_col, gd_row, gd_col, group_grid_2_mepos, ngroup_row, ngroup_col, fm_mat, integ_group_size, color_group, do_release_mat)
redistribute local part of array to fm
subroutine, public create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, my_ia_start, my_ia_end, my_group_l_size, gd_ia, dbcsr_gamma, mo_coeff_o)
redistribute 2D representation of 3d tensor to a set of dbcsr matrices
subroutine, public fm2array(mat2d, my_rows, my_start_row, my_end_row, my_cols, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
Definition mp2_types.F:14
basic linear algebra operations for full matrixes
All kind of helpful little routines.
Definition util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment