(git:374b731)
Loading...
Searching...
No Matches
mp2_ri_gpw.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines to calculate RI-GPW-MP2 energy using pw
10!> \par History
11!> 06.2012 created [Mauro Del Ben]
12!> 03.2019 Refactored from mp2_ri_gpw [Frederick Stein]
13! **************************************************************************************************
15 USE iso_c_binding, ONLY: c_null_ptr,&
16 c_associated
25 maxsize,&
27 USE kinds, ONLY: dp,&
28 int_8
35 USE machine, ONLY: m_flush,&
36 m_memory,&
38 USE message_passing, ONLY: mp_comm_type,&
41 USE mp2_types, ONLY: mp2_type,&
43
44!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
45#include "./base/base_uses.f90"
46
47 IMPLICIT NONE
48
49 PRIVATE
50
51 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_gpw'
52
53 PUBLIC :: mp2_ri_gpw_compute_en
54
55CONTAINS
56
57! **************************************************************************************************
58!> \brief ...
59!> \param Emp2_Cou ...
60!> \param Emp2_EX ...
61!> \param Emp2_S ...
62!> \param Emp2_T ...
63!> \param BIb_C ...
64!> \param mp2_env ...
65!> \param para_env ...
66!> \param para_env_sub ...
67!> \param color_sub ...
68!> \param gd_array ...
69!> \param gd_B_virtual ...
70!> \param Eigenval ...
71!> \param nmo ...
72!> \param homo ...
73!> \param dimen_RI ...
74!> \param unit_nr ...
75!> \param calc_forces ...
76!> \param calc_ex ...
77! **************************************************************************************************
78 SUBROUTINE mp2_ri_gpw_compute_en(Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T, BIb_C, mp2_env, para_env, para_env_sub, color_sub, &
79 gd_array, gd_B_virtual, &
80 Eigenval, nmo, homo, dimen_RI, unit_nr, calc_forces, calc_ex)
81 REAL(kind=dp), INTENT(INOUT) :: emp2_cou, emp2_ex, emp2_s, emp2_t
82 TYPE(three_dim_real_array), DIMENSION(:), &
83 INTENT(INOUT) :: bib_c
84 TYPE(mp2_type) :: mp2_env
85 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
86 INTEGER, INTENT(IN) :: color_sub
87 TYPE(group_dist_d1_type), INTENT(INOUT) :: gd_array
88 INTEGER, DIMENSION(:), INTENT(IN) :: homo
89 INTEGER, INTENT(IN) :: nmo
90 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: eigenval
91 TYPE(group_dist_d1_type), DIMENSION(SIZE(homo)), &
92 INTENT(INOUT) :: gd_b_virtual
93 INTEGER, INTENT(IN) :: dimen_ri, unit_nr
94 LOGICAL, INTENT(IN) :: calc_forces, calc_ex
95
96 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_ri_gpw_compute_en'
97
98 INTEGER :: a, a_global, b, b_global, block_size, decil, end_point, handle, handle2, handle3, &
99 iib, ij_counter, ij_counter_send, ij_index, integ_group_size, ispin, jjb, jspin, &
100 max_ij_pairs, my_block_size, my_group_l_end, my_group_l_size, my_group_l_size_orig, &
101 my_group_l_start, my_i, my_ij_pairs, my_j, my_new_group_l_size, ngroup, nspins, &
102 num_integ_group, proc_receive, proc_send, proc_shift, rec_b_size, rec_b_virtual_end, &
103 rec_b_virtual_start, rec_l_size, send_b_size, send_b_virtual_end, send_b_virtual_start, &
104 send_i, send_ij_index, send_j, start_point, tag, total_ij_pairs
105 INTEGER, ALLOCATABLE, DIMENSION(:) :: integ_group_pos2color_sub, my_b_size, &
106 my_b_virtual_end, my_b_virtual_start, num_ij_pairs, sizes_array_orig, virtual
107 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ij_map
108 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: ranges_info_array
109 LOGICAL :: my_alpha_beta_case, my_beta_beta_case, &
110 my_open_shell_ss
111 REAL(kind=dp) :: amp_fac, my_emp2_cou, my_emp2_ex, &
112 sym_fac, t_new, t_start
113 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), TARGET :: buffer_1d
114 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
115 TARGET :: local_ab, local_ba, t_ab
116 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
117 TARGET :: local_i_al, local_j_al, y_i_ap, y_j_ap
118 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
119 POINTER :: external_ab, external_i_al
120 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :, :), &
121 POINTER :: bi_c_rec
122 TYPE(dgemm_counter_type) :: dgemm_counter
123 TYPE(mp_comm_type) :: comm_exchange, comm_rep
124 TYPE(three_dim_real_array), ALLOCATABLE, &
125 DIMENSION(:) :: b_ia_q
126
127 CALL timeset(routinen, handle)
128
129 nspins = SIZE(homo)
130
131 ALLOCATE (virtual(nspins))
132 virtual(:) = nmo - homo(:)
133
134 ALLOCATE (my_b_size(nspins), my_b_virtual_start(nspins), my_b_virtual_end(nspins))
135 DO ispin = 1, nspins
136 CALL get_group_dist(gd_b_virtual(ispin), para_env_sub%mepos, &
137 my_b_virtual_start(ispin), my_b_virtual_end(ispin), my_b_size(ispin))
138 END DO
139
140 CALL get_group_dist(gd_array, color_sub, my_group_l_start, my_group_l_end, my_group_l_size)
141
142 CALL dgemm_counter_init(dgemm_counter, unit_nr, mp2_env%ri_mp2%print_dgemm_info)
143
144 ! local_gemm_ctx has a very footprint the first time this routine is
145 ! called.
146 IF (.NOT. c_associated(mp2_env%local_gemm_ctx)) THEN
147 CALL local_gemm_create(mp2_env%local_gemm_ctx, local_gemm_pu_gpu)
148 CALL local_gemm_set_op_threshold_gpu(mp2_env%local_gemm_ctx, 128*128*128*2)
149 END IF
150
151 CALL mp2_ri_get_integ_group_size( &
152 mp2_env, para_env, para_env_sub, gd_array, gd_b_virtual, &
153 homo, dimen_ri, unit_nr, &
154 integ_group_size, ngroup, &
155 num_integ_group, virtual, calc_forces)
156
157 ! now create a group that contains all the proc that have the same virtual starting point
158 ! in the integ group
159 CALL mp2_ri_create_group( &
160 para_env, para_env_sub, color_sub, &
161 gd_array%sizes, calc_forces, &
162 integ_group_size, my_group_l_end, &
163 my_group_l_size, my_group_l_size_orig, my_group_l_start, my_new_group_l_size, &
164 integ_group_pos2color_sub, sizes_array_orig, &
165 ranges_info_array, comm_exchange, comm_rep, num_integ_group)
166
167 ! We cannot fix the tag because of the recv routine
168 tag = 42
169
170 DO jspin = 1, nspins
171
172 CALL replicate_iak_2intgroup(bib_c(jspin)%array, comm_exchange, comm_rep, &
173 homo(jspin), gd_array%sizes, my_b_size(jspin), &
174 my_group_l_size, ranges_info_array)
175
176 DO ispin = 1, jspin
177
178 IF (unit_nr > 0) THEN
179 IF (nspins == 1) THEN
180 WRITE (unit_nr, *) "Start loop run"
181 ELSE IF (ispin == 1 .AND. jspin == 1) THEN
182 WRITE (unit_nr, *) "Start loop run alpha-alpha"
183 ELSE IF (ispin == 1 .AND. jspin == 2) THEN
184 WRITE (unit_nr, *) "Start loop run alpha-beta"
185 ELSE IF (ispin == 2 .AND. jspin == 2) THEN
186 WRITE (unit_nr, *) "Start loop run beta-beta"
187 END IF
188 CALL m_flush(unit_nr)
189 END IF
190
191 my_open_shell_ss = (nspins == 2) .AND. (ispin == jspin)
192
193 ! t_ab = amp_fac*(:,a|:,b)-(:,b|:,a)
194 ! If we calculate the gradient we need to distinguish
195 ! between alpha-alpha and beta-beta cases for UMP2
196
197 my_beta_beta_case = .false.
198 my_alpha_beta_case = .false.
199 IF (ispin /= jspin) THEN
200 my_alpha_beta_case = .true.
201 ELSE IF (my_open_shell_ss) THEN
202 IF (ispin == 2) my_beta_beta_case = .true.
203 END IF
204
205 amp_fac = mp2_env%scale_S + mp2_env%scale_T
206 IF (my_alpha_beta_case .OR. my_open_shell_ss) amp_fac = mp2_env%scale_T
207
208 CALL mp2_ri_allocate_no_blk(local_ab, t_ab, mp2_env, homo, virtual, my_b_size, &
209 my_group_l_size, calc_forces, ispin, jspin, local_ba)
210
211 CALL mp2_ri_get_block_size( &
212 mp2_env, para_env, para_env_sub, gd_array, gd_b_virtual(ispin:jspin), &
213 homo(ispin:jspin), virtual(ispin:jspin), dimen_ri, unit_nr, block_size, &
214 ngroup, num_integ_group, my_open_shell_ss, calc_forces, buffer_1d)
215
216 ! *****************************************************************
217 ! ********** REPLICATION-BLOCKED COMMUNICATION SCHEME ***********
218 ! *****************************************************************
219 ! introduce block size, the number of occupied orbitals has to be a
220 ! multiple of the block size
221
222 ! Calculate the maximum number of ij pairs that have to be computed
223 ! among groups
224 CALL mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo(ispin), homo(jspin), &
225 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_ss, unit_nr)
226
227 ALLOCATE (num_ij_pairs(0:comm_exchange%num_pe - 1))
228 CALL comm_exchange%allgather(my_ij_pairs, num_ij_pairs)
229
230 max_ij_pairs = maxval(num_ij_pairs)
231
232 ! start real stuff
233 CALL mp2_ri_allocate_blk(dimen_ri, my_b_size, block_size, local_i_al, &
234 local_j_al, calc_forces, y_i_ap, y_j_ap, ispin, jspin)
235
236 CALL timeset(routinen//"_RI_loop", handle2)
237 my_emp2_cou = 0.0_dp
238 my_emp2_ex = 0.0_dp
239 t_start = m_walltime()
240 DO ij_index = 1, max_ij_pairs
241
242 ! Prediction is unreliable if we are in the first step of the loop
243 IF (unit_nr > 0 .AND. ij_index > 1) THEN
244 decil = ij_index*10/max_ij_pairs
245 IF (decil /= (ij_index - 1)*10/max_ij_pairs) THEN
246 t_new = m_walltime()
247 t_new = (t_new - t_start)/60.0_dp*(max_ij_pairs - ij_index + 1)/(ij_index - 1)
248 WRITE (unit_nr, fmt="(T3,A)") "Percentage of finished loop: "// &
249 cp_to_string(decil*10)//". Minutes left: "//cp_to_string(t_new)
250 CALL m_flush(unit_nr)
251 END IF
252 END IF
253
254 IF (calc_forces) THEN
255 y_i_ap = 0.0_dp
256 y_j_ap = 0.0_dp
257 END IF
258
259 IF (ij_index <= my_ij_pairs) THEN
260 ! We have work to do
261 ij_counter = (ij_index - min(1, color_sub))*ngroup + color_sub
262 my_i = ij_map(1, ij_counter)
263 my_j = ij_map(2, ij_counter)
264 my_block_size = ij_map(3, ij_counter)
265
266 local_i_al = 0.0_dp
267 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, comm_exchange%mepos), &
268 bib_c(ispin)%array(:, :, my_i:my_i + my_block_size - 1))
269
270 local_j_al = 0.0_dp
271 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, comm_exchange%mepos), &
272 bib_c(jspin)%array(:, :, my_j:my_j + my_block_size - 1))
273
274 ! collect data from other proc
275 CALL timeset(routinen//"_comm", handle3)
276 DO proc_shift = 1, comm_exchange%num_pe - 1
277 proc_send = modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
278 proc_receive = modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
279
280 send_ij_index = num_ij_pairs(proc_send)
281
282 CALL get_group_dist(gd_array, proc_receive, sizes=rec_l_size)
283
284 IF (ij_index <= send_ij_index) THEN
285 ij_counter_send = (ij_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
286 integ_group_pos2color_sub(proc_send)
287 send_i = ij_map(1, ij_counter_send)
288 send_j = ij_map(2, ij_counter_send)
289
290 ! occupied i
291 bi_c_rec(1:rec_l_size, 1:my_b_size(ispin), 1:my_block_size) => &
292 buffer_1d(1:rec_l_size*my_b_size(ispin)*my_block_size)
293 bi_c_rec = 0.0_dp
294 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_i:send_i + my_block_size - 1), &
295 proc_send, bi_c_rec, proc_receive, tag)
296
297 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
298 bi_c_rec(:, 1:my_b_size(ispin), :))
299
300 ! occupied j
301 bi_c_rec(1:rec_l_size, 1:my_b_size(jspin), 1:my_block_size) => &
302 buffer_1d(1:int(rec_l_size, int_8)*my_b_size(jspin)*my_block_size)
303 bi_c_rec = 0.0_dp
304 CALL comm_exchange%sendrecv(bib_c(jspin)%array(:, :, send_j:send_j + my_block_size - 1), &
305 proc_send, bi_c_rec, proc_receive, tag)
306
307 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
308 bi_c_rec(:, 1:my_b_size(jspin), :))
309
310 ELSE
311 ! we send nothing while we know that we have to receive something
312
313 ! occupied i
314 bi_c_rec(1:rec_l_size, 1:my_b_size(ispin), 1:my_block_size) => &
315 buffer_1d(1:int(rec_l_size, int_8)*my_b_size(ispin)*my_block_size)
316 bi_c_rec = 0.0_dp
317 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
318
319 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
320 bi_c_rec(:, 1:my_b_size(ispin), 1:my_block_size))
321
322 ! occupied j
323 bi_c_rec(1:rec_l_size, 1:my_b_size(jspin), 1:my_block_size) => &
324 buffer_1d(1:int(rec_l_size, int_8)*my_b_size(jspin)*my_block_size)
325 bi_c_rec = 0.0_dp
326 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
327
328 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
329 bi_c_rec(:, 1:my_b_size(jspin), 1:my_block_size))
330
331 END IF
332
333 END DO
334
335 CALL timestop(handle3)
336
337 ! loop over the block elements
338 DO iib = 1, my_block_size
339 DO jjb = 1, my_block_size
340 CALL timeset(routinen//"_expansion", handle3)
341 associate(my_local_i_al => local_i_al(:, :, iib), my_local_j_al => local_j_al(:, :, jjb))
342
343 ! calculate the integrals (ia|jb) strating from my local data ...
344 local_ab = 0.0_dp
345 IF ((my_alpha_beta_case) .AND. (calc_forces)) THEN
346 local_ba = 0.0_dp
347 END IF
348 CALL dgemm_counter_start(dgemm_counter)
349 CALL local_gemm('T', 'N', my_b_size(ispin), my_b_size(jspin), dimen_ri, 1.0_dp, &
350 my_local_i_al, dimen_ri, my_local_j_al, dimen_ri, &
351 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), &
352 my_b_size(ispin), mp2_env%local_gemm_ctx)
353 ! Additional integrals only for alpha_beta case and forces
354 IF (my_alpha_beta_case .AND. calc_forces) THEN
355 local_ba(my_b_virtual_start(jspin):my_b_virtual_end(jspin), :) = &
356 transpose(local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :))
357 END IF
358 ! ... and from the other of my subgroup
359 DO proc_shift = 1, para_env_sub%num_pe - 1
360 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
361 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
362
363 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, &
364 rec_b_virtual_end, rec_b_size)
365
366 external_i_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, int_8)*rec_b_size)
367 external_i_al = 0.0_dp
368
369 CALL para_env_sub%sendrecv(my_local_i_al, proc_send, &
370 external_i_al, proc_receive, tag)
371
372 CALL local_gemm('T', 'N', rec_b_size, my_b_size(jspin), dimen_ri, 1.0_dp, &
373 external_i_al, dimen_ri, my_local_j_al, dimen_ri, &
374 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:my_b_size(jspin)), rec_b_size, &
375 mp2_env%local_gemm_ctx)
376
377 ! Additional integrals only for alpha_beta case and forces
378 IF (my_alpha_beta_case .AND. calc_forces) THEN
379
380 CALL get_group_dist(gd_b_virtual(jspin), proc_receive, rec_b_virtual_start, &
381 rec_b_virtual_end, rec_b_size)
382
383 external_i_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, int_8)*rec_b_size)
384 external_i_al = 0.0_dp
385
386 CALL para_env_sub%sendrecv(my_local_j_al, proc_send, &
387 external_i_al, proc_receive, tag)
388
389 CALL local_gemm('T', 'N', rec_b_size, my_b_size(ispin), dimen_ri, 1.0_dp, &
390 external_i_al, dimen_ri, my_local_i_al, dimen_ri, &
391 0.0_dp, local_ba(rec_b_virtual_start:rec_b_virtual_end, 1:my_b_size(ispin)), rec_b_size, &
392 mp2_env%local_gemm_ctx)
393 END IF
394 END DO
395 IF (my_alpha_beta_case .AND. calc_forces) THEN
396 ! Is just an approximation, but the call does not allow it, it ought to be (virtual_i*B_size_j+virtual_j*B_size_i)*dimen_RI
397 CALL dgemm_counter_stop(dgemm_counter, virtual(ispin), my_b_size(ispin) + my_b_size(jspin), dimen_ri)
398 ELSE
399 CALL dgemm_counter_stop(dgemm_counter, virtual(ispin), my_b_size(jspin), dimen_ri)
400 END IF
401 CALL timestop(handle3)
402
403 !sample peak memory
404 CALL m_memory()
405
406 CALL timeset(routinen//"_ener", handle3)
407 ! calculate coulomb only MP2
408 sym_fac = 2.0_dp
409 IF (my_i == my_j) sym_fac = 1.0_dp
410 IF (my_alpha_beta_case) sym_fac = 0.5_dp
411 DO b = 1, my_b_size(jspin)
412 b_global = b + my_b_virtual_start(jspin) - 1
413 DO a = 1, virtual(ispin)
414 my_emp2_cou = my_emp2_cou - sym_fac*2.0_dp*local_ab(a, b)**2/ &
415 (eigenval(homo(ispin) + a, ispin) + eigenval(homo(jspin) + b_global, jspin) - &
416 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
417 END DO
418 END DO
419
420 IF (calc_ex) THEN
421 ! contract integrals with orbital energies for exchange MP2 energy
422 ! starting with local ...
423 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) t_ab = 0.0_dp
424 DO b = 1, my_b_size(ispin)
425 b_global = b + my_b_virtual_start(ispin) - 1
426 DO a = 1, my_b_size(ispin)
427 a_global = a + my_b_virtual_start(ispin) - 1
428 my_emp2_ex = my_emp2_ex + sym_fac*local_ab(a_global, b)*local_ab(b_global, a)/ &
429 (eigenval(homo(ispin) + a_global, ispin) + eigenval(homo(ispin) + b_global, ispin) - &
430 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
431 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) THEN
432 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - mp2_env%scale_T*local_ab(b_global, a))/ &
433 (eigenval(homo(ispin) + a_global, ispin) + &
434 eigenval(homo(ispin) + b_global, ispin) - &
435 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
436 END IF
437 END DO
438 END DO
439 ! ... and then with external data
440 DO proc_shift = 1, para_env_sub%num_pe - 1
441 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
442 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
443
444 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, &
445 rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
446 CALL get_group_dist(gd_b_virtual(ispin), proc_send, &
447 send_b_virtual_start, send_b_virtual_end, send_b_size)
448
449 external_ab(1:my_b_size(ispin), 1:rec_b_size) => &
450 buffer_1d(1:int(rec_b_size, int_8)*my_b_size(ispin))
451 external_ab = 0.0_dp
452
453 CALL para_env_sub%sendrecv(local_ab(send_b_virtual_start:send_b_virtual_end, 1:my_b_size(ispin)), proc_send, &
454 external_ab(1:my_b_size(ispin), 1:rec_b_size), proc_receive, tag)
455
456 DO b = 1, my_b_size(ispin)
457 b_global = b + my_b_virtual_start(ispin) - 1
458 DO a = 1, rec_b_size
459 a_global = a + rec_b_virtual_start - 1
460 my_emp2_ex = my_emp2_ex + sym_fac*local_ab(a_global, b)*external_ab(b, a)/ &
461 (eigenval(homo(ispin) + a_global, ispin) + eigenval(homo(ispin) + b_global, ispin) - &
462 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
463 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) &
464 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - mp2_env%scale_T*external_ab(b, a))/ &
465 (eigenval(homo(ispin) + a_global, ispin) + &
466 eigenval(homo(ispin) + b_global, ispin) - &
467 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
468 END DO
469 END DO
470 END DO
471 END IF
472 CALL timestop(handle3)
473
474 IF (calc_forces) THEN
475 ! update P_ab, Gamma_P_ia
476 CALL mp2_update_p_gamma(mp2_env, para_env_sub, gd_b_virtual, &
477 eigenval, homo, dimen_ri, iib, jjb, my_b_size, &
478 my_b_virtual_end, my_b_virtual_start, my_i, my_j, virtual, &
479 local_ab, t_ab, my_local_i_al, my_local_j_al, &
480 my_open_shell_ss, y_i_ap(:, :, iib), y_j_ap(:, :, jjb), local_ba, &
481 ispin, jspin, dgemm_counter, buffer_1d)
482
483 END IF
484
485 END associate
486
487 END DO ! jjB
488 END DO ! iiB
489
490 ELSE
491 ! We need it later in case of gradients
492 my_block_size = 1
493
494 CALL timeset(routinen//"_comm", handle3)
495 ! No work to do and we know that we have to receive nothing, but send something
496 ! send data to other proc
497 DO proc_shift = 1, comm_exchange%num_pe - 1
498 proc_send = modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
499 proc_receive = modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
500
501 send_ij_index = num_ij_pairs(proc_send)
502
503 IF (ij_index <= send_ij_index) THEN
504 ! something to send
505 ij_counter_send = (ij_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
506 integ_group_pos2color_sub(proc_send)
507 send_i = ij_map(1, ij_counter_send)
508 send_j = ij_map(2, ij_counter_send)
509
510 ! occupied i
511 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i:send_i + my_block_size - 1), &
512 proc_send, tag)
513 ! occupied j
514 CALL comm_exchange%send(bib_c(jspin)%array(:, :, send_j:send_j + my_block_size - 1), &
515 proc_send, tag)
516 END IF
517 END DO
518 CALL timestop(handle3)
519 END IF
520
521 ! redistribute gamma
522 IF (calc_forces) THEN
523 CALL mp2_redistribute_gamma(mp2_env%ri_grad%Gamma_P_ia(ispin)%array, ij_index, my_b_size(ispin), &
524 my_block_size, my_group_l_size, my_i, my_ij_pairs, ngroup, &
525 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
526 ij_map, ranges_info_array, y_i_ap(:, :, 1:my_block_size), comm_exchange, &
527 gd_array%sizes, 1, buffer_1d)
528 CALL mp2_redistribute_gamma(mp2_env%ri_grad%Gamma_P_ia(jspin)%array, ij_index, my_b_size(jspin), &
529 my_block_size, my_group_l_size, my_j, my_ij_pairs, ngroup, &
530 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
531 ij_map, ranges_info_array, y_j_ap(:, :, 1:my_block_size), comm_exchange, &
532 gd_array%sizes, 2, buffer_1d)
533 END IF
534
535 END DO
536 CALL timestop(handle2)
537
538 DEALLOCATE (local_i_al)
539 DEALLOCATE (local_j_al)
540 DEALLOCATE (ij_map)
541 DEALLOCATE (num_ij_pairs)
542 DEALLOCATE (local_ab)
543
544 IF (calc_forces) THEN
545 DEALLOCATE (y_i_ap)
546 DEALLOCATE (y_j_ap)
547 IF (ALLOCATED(t_ab)) THEN
548 DEALLOCATE (t_ab)
549 END IF
550 DEALLOCATE (local_ba)
551
552 ! here we check if there are almost degenerate ij
553 ! pairs and we update P_ij with these contribution.
554 ! If all pairs are degenerate with each other this step will scale O(N^6),
555 ! if the number of degenerate pairs scales linearly with the system size
556 ! this step will scale O(N^5).
557 ! Start counting the number of almost degenerate ij pairs according
558 ! to eps_canonical
559 CALL quasi_degenerate_p_ij( &
560 mp2_env, eigenval(:, ispin:jspin), homo(ispin:jspin), virtual(ispin:jspin), my_open_shell_ss, &
561 my_beta_beta_case, bib_c(ispin:jspin), unit_nr, dimen_ri, &
562 my_b_size(ispin:jspin), ngroup, my_group_l_size, &
563 color_sub, ranges_info_array, comm_exchange, para_env_sub, para_env, &
564 my_b_virtual_start(ispin:jspin), my_b_virtual_end(ispin:jspin), gd_array%sizes, gd_b_virtual(ispin:jspin), &
565 integ_group_pos2color_sub, dgemm_counter, buffer_1d)
566
567 END IF
568
569 DEALLOCATE (buffer_1d)
570
571 ! Dereplicate BIb_C and Gamma_P_ia to save memory
572 ! These matrices will not be needed in that fashion anymore
573 ! B_ia_Q will needed later
574 IF (calc_forces .AND. jspin == nspins) THEN
575 IF (.NOT. ALLOCATED(b_ia_q)) ALLOCATE (b_ia_q(nspins))
576 ALLOCATE (b_ia_q(ispin)%array(homo(ispin), my_b_size(ispin), my_group_l_size_orig))
577 b_ia_q(ispin)%array = 0.0_dp
578 DO jjb = 1, homo(ispin)
579 DO iib = 1, my_b_size(ispin)
580 b_ia_q(ispin)%array(jjb, iib, 1:my_group_l_size_orig) = &
581 bib_c(ispin)%array(1:my_group_l_size_orig, iib, jjb)
582 END DO
583 END DO
584 DEALLOCATE (bib_c(ispin)%array)
585
586 ! sum Gamma and dereplicate
587 ALLOCATE (bib_c(ispin)%array(my_b_size(ispin), homo(ispin), my_group_l_size_orig))
588 DO proc_shift = 1, comm_rep%num_pe - 1
589 ! invert order
590 proc_send = modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
591 proc_receive = modulo(comm_rep%mepos + proc_shift, comm_rep%num_pe)
592
593 start_point = ranges_info_array(3, proc_shift, comm_exchange%mepos)
594 end_point = ranges_info_array(4, proc_shift, comm_exchange%mepos)
595
596 CALL comm_rep%sendrecv(mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, start_point:end_point), &
597 proc_send, bib_c(ispin)%array, proc_receive, tag)
598!$OMP PARALLEL WORKSHARE DEFAULT(NONE) &
599!$OMP SHARED(mp2_env,BIb_C,ispin,homo,my_B_size,my_group_L_size_orig)
600 mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig) = &
601 mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig) &
602 + bib_c(ispin)%array(:, :, :)
603!$OMP END PARALLEL WORKSHARE
604 END DO
605
606 bib_c(ispin)%array(:, :, :) = mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig)
607 DEALLOCATE (mp2_env%ri_grad%Gamma_P_ia(ispin)%array)
608 CALL move_alloc(bib_c(ispin)%array, mp2_env%ri_grad%Gamma_P_ia(ispin)%array)
609 ELSE IF (jspin == nspins) THEN
610 DEALLOCATE (bib_c(ispin)%array)
611 END IF
612
613 CALL para_env%sum(my_emp2_cou)
614 CALL para_env%sum(my_emp2_ex)
615
616 IF (my_open_shell_ss .OR. my_alpha_beta_case) THEN
617 IF (my_alpha_beta_case) THEN
618 emp2_s = emp2_s + my_emp2_cou
619 emp2_cou = emp2_cou + my_emp2_cou
620 ELSE
621 my_emp2_cou = my_emp2_cou*0.25_dp
622 my_emp2_ex = my_emp2_ex*0.5_dp
623 emp2_t = emp2_t + my_emp2_cou + my_emp2_ex
624 emp2_cou = emp2_cou + my_emp2_cou
625 emp2_ex = emp2_ex + my_emp2_ex
626 END IF
627 ELSE
628 emp2_cou = emp2_cou + my_emp2_cou
629 emp2_ex = emp2_ex + my_emp2_ex
630 END IF
631 END DO
632
633 END DO
634
635 DEALLOCATE (integ_group_pos2color_sub)
636 DEALLOCATE (ranges_info_array)
637
638 CALL comm_exchange%free()
639 CALL comm_rep%free()
640
641 IF (calc_forces) THEN
642 ! recover original information (before replication)
643 DEALLOCATE (gd_array%sizes)
644 iib = SIZE(sizes_array_orig)
645 ALLOCATE (gd_array%sizes(0:iib - 1))
646 gd_array%sizes(:) = sizes_array_orig
647 DEALLOCATE (sizes_array_orig)
648
649 ! Remove replication from BIb_C and reorder the matrix
650 my_group_l_size = my_group_l_size_orig
651
652 ! B_ia_Q(ispin)%array will be deallocated inside of complete_gamma
653 DO ispin = 1, nspins
654 CALL complete_gamma(mp2_env, b_ia_q(ispin)%array, dimen_ri, homo(ispin), &
655 virtual(ispin), para_env, para_env_sub, ngroup, &
656 my_group_l_size, my_group_l_start, my_group_l_end, &
657 my_b_size(ispin), my_b_virtual_start(ispin), &
658 gd_array, gd_b_virtual(ispin), &
659 ispin)
660 END DO
661 DEALLOCATE (b_ia_q)
662
663 IF (nspins == 1) mp2_env%ri_grad%P_ab(1)%array(:, :) = mp2_env%ri_grad%P_ab(1)%array(:, :)*2.0_dp
664 block
665 TYPE(mp_comm_type) :: comm
666 CALL comm%from_split(para_env, para_env_sub%mepos)
667 DO ispin = 1, nspins
668 ! P_ab is only replicated over all subgroups
669 CALL comm%sum(mp2_env%ri_grad%P_ab(ispin)%array)
670 ! P_ij is replicated over all processes
671 CALL para_env%sum(mp2_env%ri_grad%P_ij(ispin)%array)
672 END DO
673 CALL comm%free()
674 END block
675 END IF
676
677 CALL release_group_dist(gd_array)
678 DO ispin = 1, nspins
679 IF (ALLOCATED(bib_c(ispin)%array)) DEALLOCATE (bib_c(ispin)%array)
680 CALL release_group_dist(gd_b_virtual(ispin))
681 END DO
682
683 ! We do not need this matrix later, so deallocate it here to safe memory
684 IF (calc_forces) DEALLOCATE (mp2_env%ri_grad%PQ_half)
685 IF (calc_forces .AND. .NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) &
686 DEALLOCATE (mp2_env%ri_grad%operator_half)
687
688 CALL dgemm_counter_write(dgemm_counter, para_env)
689
690 ! release memory allocated by local_gemm when run on GPU. local_gemm_ctx is null on cpu only runs
691 CALL local_gemm_destroy(mp2_env%local_gemm_ctx)
692 mp2_env%local_gemm_ctx = c_null_ptr
693 CALL timestop(handle)
694
695 END SUBROUTINE mp2_ri_gpw_compute_en
696
697! **************************************************************************************************
698!> \brief ...
699!> \param local_i_aL ...
700!> \param ranges_info_array ...
701!> \param BIb_C_rec ...
702! **************************************************************************************************
703 SUBROUTINE fill_local_i_al(local_i_aL, ranges_info_array, BIb_C_rec)
704 REAL(kind=dp), DIMENSION(:, :, :), INTENT(INOUT) :: local_i_al
705 INTEGER, DIMENSION(:, :), INTENT(IN) :: ranges_info_array
706 REAL(kind=dp), DIMENSION(:, :, :), INTENT(IN) :: bib_c_rec
707
708 CHARACTER(LEN=*), PARAMETER :: routinen = 'fill_local_i_aL'
709
710 INTEGER :: end_point, handle, irep, lend_pos, &
711 lstart_pos, start_point
712
713 CALL timeset(routinen, handle)
714
715 DO irep = 1, SIZE(ranges_info_array, 2)
716 lstart_pos = ranges_info_array(1, irep)
717 lend_pos = ranges_info_array(2, irep)
718 start_point = ranges_info_array(3, irep)
719 end_point = ranges_info_array(4, irep)
720
721!$OMP PARALLEL WORKSHARE DEFAULT(NONE) &
722!$OMP SHARED(BIb_C_rec,local_i_aL,Lstart_pos,Lend_pos,start_point,end_point)
723 local_i_al(lstart_pos:lend_pos, :, :) = bib_c_rec(start_point:end_point, :, :)
724!$OMP END PARALLEL WORKSHARE
725 END DO
726
727 CALL timestop(handle)
728
729 END SUBROUTINE fill_local_i_al
730
731! **************************************************************************************************
732!> \brief ...
733!> \param local_i_aL ...
734!> \param ranges_info_array ...
735!> \param BIb_C_rec ...
736! **************************************************************************************************
737 SUBROUTINE fill_local_i_al_2d(local_i_aL, ranges_info_array, BIb_C_rec)
738 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: local_i_al
739 INTEGER, DIMENSION(:, :), INTENT(IN) :: ranges_info_array
740 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: bib_c_rec
741
742 CHARACTER(LEN=*), PARAMETER :: routinen = 'fill_local_i_aL_2D'
743
744 INTEGER :: end_point, handle, irep, lend_pos, &
745 lstart_pos, start_point
746
747 CALL timeset(routinen, handle)
748
749 DO irep = 1, SIZE(ranges_info_array, 2)
750 lstart_pos = ranges_info_array(1, irep)
751 lend_pos = ranges_info_array(2, irep)
752 start_point = ranges_info_array(3, irep)
753 end_point = ranges_info_array(4, irep)
754
755!$OMP PARALLEL WORKSHARE DEFAULT(NONE) &
756!$OMP SHARED(BIb_C_rec,local_i_aL,Lstart_pos,Lend_pos,start_point,end_point)
757 local_i_al(lstart_pos:lend_pos, :) = bib_c_rec(start_point:end_point, :)
758!$OMP END PARALLEL WORKSHARE
759 END DO
760
761 CALL timestop(handle)
762
763 END SUBROUTINE fill_local_i_al_2d
764
765! **************************************************************************************************
766!> \brief ...
767!> \param BIb_C ...
768!> \param comm_exchange ...
769!> \param comm_rep ...
770!> \param homo ...
771!> \param sizes_array ...
772!> \param my_B_size ...
773!> \param my_group_L_size ...
774!> \param ranges_info_array ...
775! **************************************************************************************************
776 SUBROUTINE replicate_iak_2intgroup(BIb_C, comm_exchange, comm_rep, homo, sizes_array, my_B_size, &
777 my_group_L_size, ranges_info_array)
778 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
779 INTENT(INOUT) :: bib_c
780 TYPE(mp_comm_type), INTENT(IN) :: comm_exchange, comm_rep
781 INTEGER, INTENT(IN) :: homo
782 INTEGER, DIMENSION(:), INTENT(IN) :: sizes_array
783 INTEGER, INTENT(IN) :: my_b_size, my_group_l_size
784 INTEGER, DIMENSION(:, 0:, 0:), INTENT(IN) :: ranges_info_array
785
786 CHARACTER(LEN=*), PARAMETER :: routinen = 'replicate_iaK_2intgroup'
787
788 INTEGER :: end_point, handle, max_l_size, &
789 proc_receive, proc_shift, start_point
790 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_c_copy
791 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: bib_c_gather
792
793 CALL timeset(routinen, handle)
794
795 ! replication scheme using mpi_allgather
796 ! get the max L size of the
797 max_l_size = maxval(sizes_array)
798
799 ALLOCATE (bib_c_copy(max_l_size, my_b_size, homo))
800 bib_c_copy = 0.0_dp
801 bib_c_copy(1:SIZE(bib_c, 1), 1:my_b_size, 1:homo) = bib_c
802
803 DEALLOCATE (bib_c)
804
805 ALLOCATE (bib_c_gather(max_l_size, my_b_size, homo, 0:comm_rep%num_pe - 1))
806 bib_c_gather = 0.0_dp
807
808 CALL comm_rep%allgather(bib_c_copy, bib_c_gather)
809
810 DEALLOCATE (bib_c_copy)
811
812 ALLOCATE (bib_c(my_group_l_size, my_b_size, homo))
813 bib_c = 0.0_dp
814
815 ! reorder data
816 DO proc_shift = 0, comm_rep%num_pe - 1
817 proc_receive = modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
818
819 start_point = ranges_info_array(3, proc_shift, comm_exchange%mepos)
820 end_point = ranges_info_array(4, proc_shift, comm_exchange%mepos)
821
822 bib_c(start_point:end_point, 1:my_b_size, 1:homo) = &
823 bib_c_gather(1:end_point - start_point + 1, 1:my_b_size, 1:homo, proc_receive)
824
825 END DO
826
827 DEALLOCATE (bib_c_gather)
828
829 CALL timestop(handle)
830
831 END SUBROUTINE replicate_iak_2intgroup
832
833! **************************************************************************************************
834!> \brief ...
835!> \param local_ab ...
836!> \param t_ab ...
837!> \param mp2_env ...
838!> \param homo ...
839!> \param virtual ...
840!> \param my_B_size ...
841!> \param my_group_L_size ...
842!> \param calc_forces ...
843!> \param ispin ...
844!> \param jspin ...
845!> \param local_ba ...
846! **************************************************************************************************
847 SUBROUTINE mp2_ri_allocate_no_blk(local_ab, t_ab, mp2_env, homo, virtual, my_B_size, &
848 my_group_L_size, calc_forces, ispin, jspin, local_ba)
849 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
850 INTENT(OUT) :: local_ab, t_ab
851 TYPE(mp2_type) :: mp2_env
852 INTEGER, INTENT(IN) :: homo(2), virtual(2), my_b_size(2), &
853 my_group_l_size
854 LOGICAL, INTENT(IN) :: calc_forces
855 INTEGER, INTENT(IN) :: ispin, jspin
856 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
857 INTENT(OUT) :: local_ba
858
859 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_ri_allocate_no_blk'
860
861 INTEGER :: handle
862
863 CALL timeset(routinen, handle)
864
865 ALLOCATE (local_ab(virtual(ispin), my_b_size(jspin)))
866 local_ab = 0.0_dp
867
868 IF (calc_forces) THEN
869 IF (.NOT. ALLOCATED(mp2_env%ri_grad%P_ij(jspin)%array)) THEN
870 ALLOCATE (mp2_env%ri_grad%P_ij(jspin)%array(homo(ispin), homo(ispin)))
871 mp2_env%ri_grad%P_ij(jspin)%array = 0.0_dp
872 END IF
873 IF (.NOT. ALLOCATED(mp2_env%ri_grad%P_ab(jspin)%array)) THEN
874 ALLOCATE (mp2_env%ri_grad%P_ab(jspin)%array(my_b_size(jspin), virtual(jspin)))
875 mp2_env%ri_grad%P_ab(jspin)%array = 0.0_dp
876 END IF
877 IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_P_ia(jspin)%array)) THEN
878 ALLOCATE (mp2_env%ri_grad%Gamma_P_ia(jspin)%array(my_b_size(jspin), homo(jspin), my_group_l_size))
879 mp2_env%ri_grad%Gamma_P_ia(jspin)%array = 0.0_dp
880 END IF
881
882 IF (ispin == jspin) THEN
883 ! For non-alpha-beta case we need amplitudes
884 ALLOCATE (t_ab(virtual(ispin), my_b_size(jspin)))
885
886 ! That is just a dummy. In that way, we can pass it as array to other routines w/o requirement for allocatable array
887 ALLOCATE (local_ba(1, 1))
888 ELSE
889 ! We need more integrals
890 ALLOCATE (local_ba(virtual(jspin), my_b_size(ispin)))
891 END IF
892 END IF
893 !
894
895 CALL timestop(handle)
896
897 END SUBROUTINE mp2_ri_allocate_no_blk
898
899! **************************************************************************************************
900!> \brief ...
901!> \param dimen_RI ...
902!> \param my_B_size ...
903!> \param block_size ...
904!> \param local_i_aL ...
905!> \param local_j_aL ...
906!> \param calc_forces ...
907!> \param Y_i_aP ...
908!> \param Y_j_aP ...
909!> \param ispin ...
910!> \param jspin ...
911! **************************************************************************************************
912 SUBROUTINE mp2_ri_allocate_blk(dimen_RI, my_B_size, block_size, &
913 local_i_aL, local_j_aL, calc_forces, &
914 Y_i_aP, Y_j_aP, ispin, jspin)
915 INTEGER, INTENT(IN) :: dimen_ri, my_b_size(2), block_size
916 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
917 INTENT(OUT) :: local_i_al, local_j_al
918 LOGICAL, INTENT(IN) :: calc_forces
919 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
920 INTENT(OUT) :: y_i_ap, y_j_ap
921 INTEGER, INTENT(IN) :: ispin, jspin
922
923 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_ri_allocate_blk'
924
925 INTEGER :: handle
926
927 CALL timeset(routinen, handle)
928
929 ALLOCATE (local_i_al(dimen_ri, my_b_size(ispin), block_size))
930 local_i_al = 0.0_dp
931 ALLOCATE (local_j_al(dimen_ri, my_b_size(jspin), block_size))
932 local_j_al = 0.0_dp
933
934 IF (calc_forces) THEN
935 ALLOCATE (y_i_ap(my_b_size(ispin), dimen_ri, block_size))
936 y_i_ap = 0.0_dp
937 ! For closed-shell, alpha-alpha and beta-beta my_B_size_beta=my_b_size
938 ! Not for alpha-beta case: Y_j_aP_beta is sent and received as Y_j_aP
939 ALLOCATE (y_j_ap(my_b_size(jspin), dimen_ri, block_size))
940 y_j_ap = 0.0_dp
941 END IF
942 !
943
944 CALL timestop(handle)
945
946 END SUBROUTINE mp2_ri_allocate_blk
947
948! **************************************************************************************************
949!> \brief ...
950!> \param my_alpha_beta_case ...
951!> \param total_ij_pairs ...
952!> \param homo ...
953!> \param homo_beta ...
954!> \param block_size ...
955!> \param ngroup ...
956!> \param ij_map ...
957!> \param color_sub ...
958!> \param my_ij_pairs ...
959!> \param my_open_shell_SS ...
960!> \param unit_nr ...
961! **************************************************************************************************
962 SUBROUTINE mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo, homo_beta, &
963 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_SS, unit_nr)
964 LOGICAL, INTENT(IN) :: my_alpha_beta_case
965 INTEGER, INTENT(OUT) :: total_ij_pairs
966 INTEGER, INTENT(IN) :: homo, homo_beta, block_size, ngroup
967 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: ij_map
968 INTEGER, INTENT(IN) :: color_sub
969 INTEGER, INTENT(OUT) :: my_ij_pairs
970 LOGICAL, INTENT(IN) :: my_open_shell_ss
971 INTEGER, INTENT(IN) :: unit_nr
972
973 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_ri_communication'
974
975 INTEGER :: assigned_blocks, first_i_block, first_j_block, handle, iib, ij_block_counter, &
976 ij_counter, jjb, last_i_block, last_j_block, num_block_per_group, num_ij_blocks, &
977 num_ij_blocks_beta, total_ij_block, total_ij_pairs_blocks
978 LOGICAL, ALLOCATABLE, DIMENSION(:, :) :: ij_marker
979
980! Calculate the maximum number of ij pairs that have to be computed
981! among groups
982
983 CALL timeset(routinen, handle)
984
985 IF (.NOT. my_open_shell_ss .AND. .NOT. my_alpha_beta_case) THEN
986 total_ij_pairs = homo*(1 + homo)/2
987 num_ij_blocks = homo/block_size - 1
988
989 first_i_block = 1
990 last_i_block = block_size*(num_ij_blocks - 1)
991
992 first_j_block = block_size + 1
993 last_j_block = block_size*(num_ij_blocks + 1)
994
995 ij_block_counter = 0
996 DO iib = first_i_block, last_i_block, block_size
997 DO jjb = iib + block_size, last_j_block, block_size
998 ij_block_counter = ij_block_counter + 1
999 END DO
1000 END DO
1001
1002 total_ij_block = ij_block_counter
1003 num_block_per_group = total_ij_block/ngroup
1004 assigned_blocks = num_block_per_group*ngroup
1005
1006 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1007
1008 ALLOCATE (ij_marker(homo, homo))
1009 ij_marker = .true.
1010 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1011 ij_map = 0
1012 ij_counter = 0
1013 my_ij_pairs = 0
1014 DO iib = first_i_block, last_i_block, block_size
1015 DO jjb = iib + block_size, last_j_block, block_size
1016 IF (ij_counter + 1 > assigned_blocks) EXIT
1017 ij_counter = ij_counter + 1
1018 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1019 ij_map(1, ij_counter) = iib
1020 ij_map(2, ij_counter) = jjb
1021 ij_map(3, ij_counter) = block_size
1022 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1023 END DO
1024 END DO
1025 DO iib = 1, homo
1026 DO jjb = iib, homo
1027 IF (ij_marker(iib, jjb)) THEN
1028 ij_counter = ij_counter + 1
1029 ij_map(1, ij_counter) = iib
1030 ij_map(2, ij_counter) = jjb
1031 ij_map(3, ij_counter) = 1
1032 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1033 END IF
1034 END DO
1035 END DO
1036 DEALLOCATE (ij_marker)
1037
1038 ELSE IF (.NOT. my_alpha_beta_case) THEN
1039 ! THese are the cases alpha/alpha and beta/beta
1040 ! We do not have to consider the diagonal elements
1041 total_ij_pairs = homo*(homo - 1)/2
1042 num_ij_blocks = (homo - 1)/block_size - 1
1043
1044 first_i_block = 1
1045 last_i_block = block_size*(num_ij_blocks - 1)
1046
1047 ! We shift the blocks to prevent the calculation of the diagonal elements which always give zero
1048 first_j_block = block_size + 2
1049 last_j_block = block_size*(num_ij_blocks + 1) + 1
1050
1051 ij_block_counter = 0
1052 DO iib = first_i_block, last_i_block, block_size
1053 DO jjb = iib + block_size + 1, last_j_block, block_size
1054 ij_block_counter = ij_block_counter + 1
1055 END DO
1056 END DO
1057
1058 total_ij_block = ij_block_counter
1059 num_block_per_group = total_ij_block/ngroup
1060 assigned_blocks = num_block_per_group*ngroup
1061
1062 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1063
1064 ALLOCATE (ij_marker(homo, homo))
1065 ij_marker = .true.
1066 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1067 ij_map = 0
1068 ij_counter = 0
1069 my_ij_pairs = 0
1070 DO iib = first_i_block, last_i_block, block_size
1071 DO jjb = iib + block_size + 1, last_j_block, block_size
1072 IF (ij_counter + 1 > assigned_blocks) EXIT
1073 ij_counter = ij_counter + 1
1074 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1075 ij_map(1, ij_counter) = iib
1076 ij_map(2, ij_counter) = jjb
1077 ij_map(3, ij_counter) = block_size
1078 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1079 END DO
1080 END DO
1081 DO iib = 1, homo
1082 DO jjb = iib + 1, homo
1083 IF (ij_marker(iib, jjb)) THEN
1084 ij_counter = ij_counter + 1
1085 ij_map(1, ij_counter) = iib
1086 ij_map(2, ij_counter) = jjb
1087 ij_map(3, ij_counter) = 1
1088 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1089 END IF
1090 END DO
1091 END DO
1092 DEALLOCATE (ij_marker)
1093
1094 ELSE
1095 total_ij_pairs = homo*homo_beta
1096 num_ij_blocks = homo/block_size
1097 num_ij_blocks_beta = homo_beta/block_size
1098
1099 first_i_block = 1
1100 last_i_block = block_size*(num_ij_blocks - 1)
1101
1102 first_j_block = 1
1103 last_j_block = block_size*(num_ij_blocks_beta - 1)
1104
1105 ij_block_counter = 0
1106 DO iib = first_i_block, last_i_block, block_size
1107 DO jjb = first_j_block, last_j_block, block_size
1108 ij_block_counter = ij_block_counter + 1
1109 END DO
1110 END DO
1111
1112 total_ij_block = ij_block_counter
1113 num_block_per_group = total_ij_block/ngroup
1114 assigned_blocks = num_block_per_group*ngroup
1115
1116 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1117
1118 ALLOCATE (ij_marker(homo, homo_beta))
1119 ij_marker = .true.
1120 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1121 ij_map = 0
1122 ij_counter = 0
1123 my_ij_pairs = 0
1124 DO iib = first_i_block, last_i_block, block_size
1125 DO jjb = first_j_block, last_j_block, block_size
1126 IF (ij_counter + 1 > assigned_blocks) EXIT
1127 ij_counter = ij_counter + 1
1128 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1129 ij_map(1, ij_counter) = iib
1130 ij_map(2, ij_counter) = jjb
1131 ij_map(3, ij_counter) = block_size
1132 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1133 END DO
1134 END DO
1135 DO iib = 1, homo
1136 DO jjb = 1, homo_beta
1137 IF (ij_marker(iib, jjb)) THEN
1138 ij_counter = ij_counter + 1
1139 ij_map(1, ij_counter) = iib
1140 ij_map(2, ij_counter) = jjb
1141 ij_map(3, ij_counter) = 1
1142 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1143 END IF
1144 END DO
1145 END DO
1146 DEALLOCATE (ij_marker)
1147 END IF
1148
1149 IF (unit_nr > 0) THEN
1150 IF (block_size == 1) THEN
1151 WRITE (unit=unit_nr, fmt="(T3,A,T66,F15.1)") &
1152 "RI_INFO| Percentage of ij pairs communicated with block size 1:", 100.0_dp
1153 ELSE
1154 WRITE (unit=unit_nr, fmt="(T3,A,T66,F15.1)") &
1155 "RI_INFO| Percentage of ij pairs communicated with block size 1:", &
1156 100.0_dp*real((total_ij_pairs - assigned_blocks*(block_size**2)), kind=dp)/real(total_ij_pairs, kind=dp)
1157 END IF
1158 CALL m_flush(unit_nr)
1159 END IF
1160
1161 CALL timestop(handle)
1162
1163 END SUBROUTINE mp2_ri_communication
1164
1165! **************************************************************************************************
1166!> \brief ...
1167!> \param para_env ...
1168!> \param para_env_sub ...
1169!> \param color_sub ...
1170!> \param sizes_array ...
1171!> \param calc_forces ...
1172!> \param integ_group_size ...
1173!> \param my_group_L_end ...
1174!> \param my_group_L_size ...
1175!> \param my_group_L_size_orig ...
1176!> \param my_group_L_start ...
1177!> \param my_new_group_L_size ...
1178!> \param integ_group_pos2color_sub ...
1179!> \param sizes_array_orig ...
1180!> \param ranges_info_array ...
1181!> \param comm_exchange ...
1182!> \param comm_rep ...
1183!> \param num_integ_group ...
1184! **************************************************************************************************
1185 SUBROUTINE mp2_ri_create_group(para_env, para_env_sub, color_sub, &
1186 sizes_array, calc_forces, &
1187 integ_group_size, my_group_L_end, &
1188 my_group_L_size, my_group_L_size_orig, my_group_L_start, my_new_group_L_size, &
1189 integ_group_pos2color_sub, &
1190 sizes_array_orig, ranges_info_array, comm_exchange, comm_rep, num_integ_group)
1191 TYPE(mp_para_env_type), INTENT(IN) :: para_env, para_env_sub
1192 INTEGER, INTENT(IN) :: color_sub
1193 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: sizes_array
1194 LOGICAL, INTENT(IN) :: calc_forces
1195 INTEGER, INTENT(IN) :: integ_group_size, my_group_l_end
1196 INTEGER, INTENT(INOUT) :: my_group_l_size
1197 INTEGER, INTENT(OUT) :: my_group_l_size_orig
1198 INTEGER, INTENT(IN) :: my_group_l_start
1199 INTEGER, INTENT(INOUT) :: my_new_group_l_size
1200 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: integ_group_pos2color_sub, &
1201 sizes_array_orig
1202 INTEGER, ALLOCATABLE, DIMENSION(:, :, :), &
1203 INTENT(OUT) :: ranges_info_array
1204 TYPE(mp_comm_type), INTENT(OUT) :: comm_exchange, comm_rep
1205 INTEGER, INTENT(IN) :: num_integ_group
1206
1207 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_ri_create_group'
1208
1209 INTEGER :: handle, iib, proc_receive, proc_shift, &
1210 sub_sub_color
1211 INTEGER, ALLOCATABLE, DIMENSION(:) :: new_sizes_array, rep_ends_array, &
1212 rep_sizes_array, rep_starts_array
1213 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: my_info
1214
1215 CALL timeset(routinen, handle)
1216 !
1217 sub_sub_color = para_env_sub%mepos*num_integ_group + color_sub/integ_group_size
1218 CALL comm_exchange%from_split(para_env, sub_sub_color)
1219
1220 ! create the replication group
1221 sub_sub_color = para_env_sub%mepos*comm_exchange%num_pe + comm_exchange%mepos
1222 CALL comm_rep%from_split(para_env, sub_sub_color)
1223
1224 ! create the new limits for K according to the size
1225 ! of the integral group
1226
1227 ! info array for replication
1228 ALLOCATE (rep_ends_array(0:comm_rep%num_pe - 1))
1229 ALLOCATE (rep_starts_array(0:comm_rep%num_pe - 1))
1230 ALLOCATE (rep_sizes_array(0:comm_rep%num_pe - 1))
1231
1232 CALL comm_rep%allgather(my_group_l_size, rep_sizes_array)
1233 CALL comm_rep%allgather(my_group_l_start, rep_starts_array)
1234 CALL comm_rep%allgather(my_group_l_end, rep_ends_array)
1235
1236 ! calculate my_new_group_L_size according to sizes_array
1237 my_new_group_l_size = my_group_l_size
1238
1239 ! Info of this process
1240 ALLOCATE (my_info(4, 0:comm_rep%num_pe - 1))
1241 my_info(1, 0) = my_group_l_start
1242 my_info(2, 0) = my_group_l_end
1243 my_info(3, 0) = 1
1244 my_info(4, 0) = my_group_l_size
1245
1246 DO proc_shift = 1, comm_rep%num_pe - 1
1247 proc_receive = modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
1248
1249 my_new_group_l_size = my_new_group_l_size + rep_sizes_array(proc_receive)
1250
1251 my_info(1, proc_shift) = rep_starts_array(proc_receive)
1252 my_info(2, proc_shift) = rep_ends_array(proc_receive)
1253 my_info(3, proc_shift) = my_info(4, proc_shift - 1) + 1
1254 my_info(4, proc_shift) = my_new_group_l_size
1255
1256 END DO
1257
1258 ALLOCATE (new_sizes_array(0:comm_exchange%num_pe - 1))
1259 ALLOCATE (ranges_info_array(4, 0:comm_rep%num_pe - 1, 0:comm_exchange%num_pe - 1))
1260 CALL comm_exchange%allgather(my_new_group_l_size, new_sizes_array)
1261 CALL comm_exchange%allgather(my_info, ranges_info_array)
1262
1263 DEALLOCATE (rep_sizes_array)
1264 DEALLOCATE (rep_starts_array)
1265 DEALLOCATE (rep_ends_array)
1266
1267 ALLOCATE (integ_group_pos2color_sub(0:comm_exchange%num_pe - 1))
1268 CALL comm_exchange%allgather(color_sub, integ_group_pos2color_sub)
1269
1270 IF (calc_forces) THEN
1271 iib = SIZE(sizes_array)
1272 ALLOCATE (sizes_array_orig(0:iib - 1))
1273 sizes_array_orig(:) = sizes_array
1274 END IF
1275
1276 my_group_l_size_orig = my_group_l_size
1277 my_group_l_size = my_new_group_l_size
1278 DEALLOCATE (sizes_array)
1279
1280 ALLOCATE (sizes_array(0:integ_group_size - 1))
1281 sizes_array(:) = new_sizes_array
1282
1283 DEALLOCATE (new_sizes_array)
1284 !
1285 CALL timestop(handle)
1286
1287 END SUBROUTINE mp2_ri_create_group
1288
1289! **************************************************************************************************
1290!> \brief ...
1291!> \param mp2_env ...
1292!> \param para_env ...
1293!> \param para_env_sub ...
1294!> \param gd_array ...
1295!> \param gd_B_virtual ...
1296!> \param homo ...
1297!> \param dimen_RI ...
1298!> \param unit_nr ...
1299!> \param integ_group_size ...
1300!> \param ngroup ...
1301!> \param num_integ_group ...
1302!> \param virtual ...
1303!> \param calc_forces ...
1304! **************************************************************************************************
1305 SUBROUTINE mp2_ri_get_integ_group_size(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
1306 homo, dimen_RI, unit_nr, &
1307 integ_group_size, &
1308 ngroup, num_integ_group, &
1309 virtual, calc_forces)
1310 TYPE(mp2_type) :: mp2_env
1311 TYPE(mp_para_env_type), INTENT(IN) :: para_env, para_env_sub
1312 TYPE(group_dist_d1_type), INTENT(IN) :: gd_array
1313 TYPE(group_dist_d1_type), DIMENSION(:), INTENT(IN) :: gd_b_virtual
1314 INTEGER, DIMENSION(:), INTENT(IN) :: homo
1315 INTEGER, INTENT(IN) :: dimen_ri, unit_nr
1316 INTEGER, INTENT(OUT) :: integ_group_size, ngroup, num_integ_group
1317 INTEGER, DIMENSION(:), INTENT(IN) :: virtual
1318 LOGICAL, INTENT(IN) :: calc_forces
1319
1320 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_ri_get_integ_group_size'
1321
1322 INTEGER :: block_size, handle, iib, &
1323 max_repl_group_size, &
1324 min_integ_group_size
1325 INTEGER(KIND=int_8) :: mem
1326 LOGICAL :: calc_group_size
1327 REAL(kind=dp) :: factor, mem_base, mem_min, mem_per_blk, &
1328 mem_per_repl, mem_per_repl_blk, &
1329 mem_real
1330
1331 CALL timeset(routinen, handle)
1332
1333 ngroup = para_env%num_pe/para_env_sub%num_pe
1334
1335 calc_group_size = mp2_env%ri_mp2%number_integration_groups <= 0
1336 IF (.NOT. calc_group_size) THEN
1337 IF (mod(ngroup, mp2_env%ri_mp2%number_integration_groups) /= 0) calc_group_size = .true.
1338 END IF
1339
1340 IF (calc_group_size) THEN
1341 CALL m_memory(mem)
1342 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
1343 CALL para_env%min(mem_real)
1344
1345 mem_base = 0.0_dp
1346 mem_per_blk = 0.0_dp
1347 mem_per_repl = 0.0_dp
1348 mem_per_repl_blk = 0.0_dp
1349
1350 ! BIB_C_copy
1351 mem_per_repl = mem_per_repl + maxval(max(real(homo, kind=dp)*maxsize(gd_array), real(dimen_ri, kind=dp))* &
1352 maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1353 ! BIB_C
1354 mem_per_repl = mem_per_repl + sum(real(homo, kind=dp)*maxsize(gd_b_virtual))*maxsize(gd_array)*8.0_dp/(1024**2)
1355 ! BIB_C_rec
1356 mem_per_repl_blk = mem_per_repl_blk + real(maxval(maxsize(gd_b_virtual)), kind=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
1357 ! local_i_aL+local_j_aL
1358 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*real(dimen_ri, kind=dp)*8.0_dp/(1024**2)
1359 ! local_ab
1360 mem_base = mem_base + maxval(real(virtual, kind=dp)*maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1361 ! external_ab/external_i_aL
1362 mem_base = mem_base + real(max(dimen_ri, maxval(virtual)), kind=dp)*maxval(maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1363
1364 IF (calc_forces) THEN
1365 ! Gamma_P_ia
1366 mem_per_repl = mem_per_repl + sum(real(homo, kind=dp)*maxsize(gd_array)* &
1367 maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1368 ! Y_i_aP+Y_j_aP
1369 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*dimen_ri*8.0_dp/(1024**2)
1370 ! local_ba/t_ab
1371 mem_base = mem_base + real(maxval(maxsize(gd_b_virtual)), kind=dp)*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1372 ! P_ij
1373 mem_base = mem_base + sum(real(homo, kind=dp)*homo)*8.0_dp/(1024**2)
1374 ! P_ab
1375 mem_base = mem_base + sum(real(virtual, kind=dp)*maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1376 ! send_ab/send_i_aL
1377 mem_base = mem_base + real(max(dimen_ri, maxval(virtual)), kind=dp)*maxval(maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1378 END IF
1379
1380 ! This a first guess based on the assumption of optimal block sizes
1381 block_size = max(1, min(floor(sqrt(real(minval(homo), kind=dp))), floor(minval(homo)/sqrt(2.0_dp*ngroup))))
1382 IF (mp2_env%ri_mp2%block_size > 0) block_size = mp2_env%ri_mp2%block_size
1383
1384 mem_min = mem_base + mem_per_repl + (mem_per_blk + mem_per_repl_blk)*block_size
1385
1386 IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T68,F9.2,A4)') 'RI_INFO| Minimum available memory per MPI process:', &
1387 mem_real, ' MiB'
1388 IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T68,F9.2,A4)') 'RI_INFO| Minimum required memory per MPI process:', &
1389 mem_min, ' MiB'
1390
1391 ! We use the following communication model
1392 ! Comm(replication)+Comm(collection of data for ij pair)+Comm(contraction)
1393 ! One can show that the costs of the contraction step are independent of the block size and the replication group size
1394 ! With gradients, the other two steps are carried out twice (Y_i_aP -> Gamma_i_aP, and dereplication)
1395 ! NL ... number of RI basis functions
1396 ! NR ... replication group size
1397 ! NG ... number of sub groups
1398 ! NB ... Block size
1399 ! o ... number of occupied orbitals
1400 ! Then, we have the communication costs (in multiples of the original BIb_C matrix)
1401 ! (NR/NG)+(1-(NR/NG))*(o/NB+NB-2)/NG = (NR/NG)*(1-(o/NB+NB-2)/NG)+(o/NB+NB-2)/NG
1402 ! and with gradients
1403 ! 2*(NR/NG)+2*(1-(NR/NG))*(o/NB+NB-2)/NG = (NR/NG)*(1-(o/NB+NB-2)/NG)+(o/NB+NB-2)/NG
1404 ! We are looking for the minimum of the communication volume,
1405 ! thus, if the prefactor of (NR/NG) is smaller than zero, use the largest possible replication group size.
1406 ! If the factor is larger than zero, set the replication group size to 1. (For small systems and a large number of subgroups)
1407 ! Replication group size = 1 implies that the integration group size equals the number of subgroups
1408
1409 integ_group_size = ngroup
1410
1411 ! Multiply everything by homo*virtual to consider differences between spin channels in case of open-shell calculations
1412 factor = real(sum(homo*virtual), kind=dp) &
1413 - sum((real(maxval(homo), kind=dp)/block_size + block_size - 2.0_dp)*homo*virtual)/ngroup
1414 IF (SIZE(homo) == 2) factor = factor - 2.0_dp*product(homo)/block_size/ngroup*sum(homo*virtual)
1415
1416 IF (factor <= 0.0_dp) THEN
1417 ! Remove the fixed memory and divide by the memory per replication group size
1418 max_repl_group_size = min(max(floor((mem_real - mem_base - mem_per_blk*block_size)/ &
1419 (mem_per_repl + mem_per_repl_blk*block_size)), 1), ngroup)
1420 ! Convert to an integration group size
1421 min_integ_group_size = ngroup/max_repl_group_size
1422
1423 ! Ensure that the integration group size is a divisor of the number of sub groups
1424 DO iib = max(min(min_integ_group_size, ngroup), 1), ngroup
1425 ! check that the ngroup is a multiple of integ_group_size
1426 IF (mod(ngroup, iib) == 0) THEN
1427 integ_group_size = iib
1428 EXIT
1429 END IF
1430 integ_group_size = integ_group_size + 1
1431 END DO
1432 END IF
1433 ELSE ! We take the user provided group size
1434 integ_group_size = ngroup/mp2_env%ri_mp2%number_integration_groups
1435 END IF
1436
1437 IF (unit_nr > 0) THEN
1438 WRITE (unit=unit_nr, fmt="(T3,A,T75,i6)") &
1439 "RI_INFO| Group size for integral replication:", integ_group_size*para_env_sub%num_pe
1440 CALL m_flush(unit_nr)
1441 END IF
1442
1443 num_integ_group = ngroup/integ_group_size
1444
1445 CALL timestop(handle)
1446
1447 END SUBROUTINE mp2_ri_get_integ_group_size
1448
1449! **************************************************************************************************
1450!> \brief ...
1451!> \param mp2_env ...
1452!> \param para_env ...
1453!> \param para_env_sub ...
1454!> \param gd_array ...
1455!> \param gd_B_virtual ...
1456!> \param homo ...
1457!> \param virtual ...
1458!> \param dimen_RI ...
1459!> \param unit_nr ...
1460!> \param block_size ...
1461!> \param ngroup ...
1462!> \param num_integ_group ...
1463!> \param my_open_shell_ss ...
1464!> \param calc_forces ...
1465!> \param buffer_1D ...
1466! **************************************************************************************************
1467 SUBROUTINE mp2_ri_get_block_size(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
1468 homo, virtual, dimen_RI, unit_nr, &
1469 block_size, ngroup, num_integ_group, &
1470 my_open_shell_ss, calc_forces, buffer_1D)
1471 TYPE(mp2_type) :: mp2_env
1472 TYPE(mp_para_env_type), INTENT(IN) :: para_env, para_env_sub
1473 TYPE(group_dist_d1_type), INTENT(IN) :: gd_array
1474 TYPE(group_dist_d1_type), DIMENSION(:), INTENT(IN) :: gd_b_virtual
1475 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
1476 INTEGER, INTENT(IN) :: dimen_ri, unit_nr
1477 INTEGER, INTENT(OUT) :: block_size, ngroup
1478 INTEGER, INTENT(IN) :: num_integ_group
1479 LOGICAL, INTENT(IN) :: my_open_shell_ss, calc_forces
1480 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
1481 INTENT(OUT) :: buffer_1d
1482
1483 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_ri_get_block_size'
1484
1485 INTEGER :: best_block_size, handle, num_ij_blocks
1486 INTEGER(KIND=int_8) :: buffer_size, mem
1487 REAL(kind=dp) :: mem_base, mem_per_blk, mem_per_repl_blk, &
1488 mem_real
1489
1490 CALL timeset(routinen, handle)
1491
1492 ngroup = para_env%num_pe/para_env_sub%num_pe
1493
1494 CALL m_memory(mem)
1495 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
1496 CALL para_env%min(mem_real)
1497
1498 mem_base = 0.0_dp
1499 mem_per_blk = 0.0_dp
1500 mem_per_repl_blk = 0.0_dp
1501
1502 ! external_ab
1503 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1504 ! BIB_C_rec
1505 mem_per_repl_blk = mem_per_repl_blk + real(maxval(maxsize(gd_b_virtual)), kind=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
1506 ! local_i_aL+local_j_aL
1507 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*real(dimen_ri, kind=dp)*8.0_dp/(1024**2)
1508 ! Copy to keep arrays contiguous
1509 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1510
1511 IF (calc_forces) THEN
1512 ! Y_i_aP+Y_j_aP+BIb_C_send
1513 mem_per_blk = mem_per_blk + 3.0_dp*maxval(maxsize(gd_b_virtual))*dimen_ri*8.0_dp/(1024**2)
1514 ! send_ab
1515 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1516 END IF
1517
1518 best_block_size = 1
1519
1520 ! Here we split the memory half for the communication, half for replication
1521 IF (mp2_env%ri_mp2%block_size > 0) THEN
1522 best_block_size = mp2_env%ri_mp2%block_size
1523 ELSE
1524 best_block_size = max(floor((mem_real - mem_base)/(mem_per_blk + mem_per_repl_blk*ngroup/num_integ_group)), 1)
1525
1526 DO
1527 IF (SIZE(homo) == 1) THEN
1528 IF (.NOT. my_open_shell_ss) THEN
1529 num_ij_blocks = (homo(1)/best_block_size)
1530 num_ij_blocks = (num_ij_blocks*num_ij_blocks - num_ij_blocks)/2
1531 ELSE
1532 num_ij_blocks = ((homo(1) - 1)/best_block_size)
1533 num_ij_blocks = (num_ij_blocks*num_ij_blocks - num_ij_blocks)/2
1534 END IF
1535 ELSE
1536 num_ij_blocks = product(homo/best_block_size)
1537 END IF
1538 ! Enforce at least one large block for each subgroup
1539 IF ((num_ij_blocks >= ngroup .AND. num_ij_blocks > 0) .OR. best_block_size == 1) THEN
1540 EXIT
1541 ELSE
1542 best_block_size = best_block_size - 1
1543 END IF
1544 END DO
1545
1546 IF (SIZE(homo) == 1) THEN
1547 IF (my_open_shell_ss) THEN
1548 ! check that best_block_size is not bigger than sqrt(homo-1)
1549 ! Diagonal elements do not have to be considered
1550 best_block_size = min(floor(sqrt(real(homo(1) - 1, kind=dp))), best_block_size)
1551 ELSE
1552 ! check that best_block_size is not bigger than sqrt(homo)
1553 best_block_size = min(floor(sqrt(real(homo(1), kind=dp))), best_block_size)
1554 END IF
1555 END IF
1556 END IF
1557 block_size = max(1, best_block_size)
1558
1559 IF (unit_nr > 0) THEN
1560 WRITE (unit=unit_nr, fmt="(T3,A,T75,i6)") &
1561 "RI_INFO| Block size:", block_size
1562 CALL m_flush(unit_nr)
1563 END IF
1564
1565 ! Determine recv buffer size (BI_C_recv, external_i_aL, external_ab)
1566 buffer_size = max(int(maxsize(gd_array), kind=int_8)*block_size, int(max(dimen_ri, maxval(virtual)), kind=int_8)) &
1567 *maxval(maxsize(gd_b_virtual))
1568 ! The send buffer has the same size as the recv buffer
1569 IF (calc_forces) buffer_size = buffer_size*2
1570 ALLOCATE (buffer_1d(buffer_size))
1571
1572 CALL timestop(handle)
1573
1574 END SUBROUTINE mp2_ri_get_block_size
1575
1576! **************************************************************************************************
1577!> \brief ...
1578!> \param mp2_env ...
1579!> \param para_env_sub ...
1580!> \param gd_B_virtual ...
1581!> \param Eigenval ...
1582!> \param homo ...
1583!> \param dimen_RI ...
1584!> \param iiB ...
1585!> \param jjB ...
1586!> \param my_B_size ...
1587!> \param my_B_virtual_end ...
1588!> \param my_B_virtual_start ...
1589!> \param my_i ...
1590!> \param my_j ...
1591!> \param virtual ...
1592!> \param local_ab ...
1593!> \param t_ab ...
1594!> \param my_local_i_aL ...
1595!> \param my_local_j_aL ...
1596!> \param open_ss ...
1597!> \param Y_i_aP ...
1598!> \param Y_j_aP ...
1599!> \param local_ba ...
1600!> \param ispin ...
1601!> \param jspin ...
1602!> \param dgemm_counter ...
1603!> \param buffer_1D ...
1604! **************************************************************************************************
1605 SUBROUTINE mp2_update_p_gamma(mp2_env, para_env_sub, gd_B_virtual, &
1606 Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, &
1607 my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, local_ab, &
1608 t_ab, my_local_i_aL, my_local_j_aL, open_ss, Y_i_aP, Y_j_aP, &
1609 local_ba, ispin, jspin, dgemm_counter, buffer_1D)
1610 TYPE(mp2_type) :: mp2_env
1611 TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
1612 TYPE(group_dist_d1_type), DIMENSION(:), INTENT(IN) :: gd_b_virtual
1613 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: eigenval
1614 INTEGER, DIMENSION(:), INTENT(IN) :: homo
1615 INTEGER, INTENT(IN) :: dimen_ri, iib, jjb
1616 INTEGER, DIMENSION(:), INTENT(IN) :: my_b_size, my_b_virtual_end, &
1617 my_b_virtual_start
1618 INTEGER, INTENT(IN) :: my_i, my_j
1619 INTEGER, DIMENSION(:), INTENT(IN) :: virtual
1620 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1621 INTENT(INOUT), TARGET :: local_ab
1622 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1623 INTENT(IN), TARGET :: t_ab, my_local_i_al, my_local_j_al
1624 LOGICAL, INTENT(IN) :: open_ss
1625 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1626 INTENT(INOUT), TARGET :: y_i_ap, y_j_ap, local_ba
1627 INTEGER, INTENT(IN) :: ispin, jspin
1628 TYPE(dgemm_counter_type), INTENT(INOUT) :: dgemm_counter
1629 REAL(kind=dp), CONTIGUOUS, DIMENSION(:), TARGET :: buffer_1d
1630
1631 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_update_P_gamma'
1632
1633 INTEGER :: a, b, b_global, handle, proc_receive, proc_send, proc_shift, rec_b_size, &
1634 rec_b_virtual_end, rec_b_virtual_start, send_b_size, send_b_virtual_end, &
1635 send_b_virtual_start
1636 INTEGER(KIND=int_8) :: offset
1637 LOGICAL :: alpha_beta
1638 REAL(kind=dp) :: factor, p_ij_diag
1639 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1640 POINTER :: external_ab, send_ab
1641
1642 CALL timeset(routinen//"_Pia", handle)
1643
1644 alpha_beta = .NOT. (ispin == jspin)
1645 IF (open_ss) THEN
1646 factor = 1.0_dp
1647 ELSE
1648 factor = 2.0_dp
1649 END IF
1650 ! divide the (ia|jb) integrals by Delta_ij^ab
1651 DO b = 1, my_b_size(jspin)
1652 b_global = b + my_b_virtual_start(jspin) - 1
1653 DO a = 1, virtual(ispin)
1654 local_ab(a, b) = -local_ab(a, b)/ &
1655 (eigenval(homo(ispin) + a, ispin) + eigenval(homo(jspin) + b_global, jspin) - &
1656 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
1657 END DO
1658 END DO
1659 IF (.NOT. (alpha_beta)) THEN
1660 p_ij_diag = -sum(local_ab*t_ab)*factor
1661 ELSE
1662 ! update diagonal part of P_ij
1663 p_ij_diag = -sum(local_ab*local_ab)*mp2_env%scale_S
1664 ! More integrals needed only for alpha-beta case: local_ba
1665 DO b = 1, my_b_size(ispin)
1666 b_global = b + my_b_virtual_start(ispin) - 1
1667 DO a = 1, virtual(jspin)
1668 local_ba(a, b) = -local_ba(a, b)/ &
1669 (eigenval(homo(jspin) + a, jspin) + eigenval(homo(ispin) + b_global, ispin) - &
1670 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
1671 END DO
1672 END DO
1673 END IF
1674
1675 ! P_ab and add diagonal part of P_ij
1676
1677 CALL dgemm_counter_start(dgemm_counter)
1678 IF (.NOT. (alpha_beta)) THEN
1679 CALL local_gemm('T', 'N', my_b_size(ispin), my_b_size(ispin), virtual(ispin), 1.0_dp, &
1680 t_ab, virtual(ispin), local_ab, virtual(ispin), &
1681 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, &
1682 my_b_virtual_start(ispin):my_b_virtual_end(ispin)), my_b_size(ispin), mp2_env%local_gemm_ctx)
1683 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) = &
1684 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) + p_ij_diag
1685 ELSE
1686 CALL local_gemm('T', 'N', my_b_size(ispin), my_b_size(ispin), virtual(jspin), mp2_env%scale_S, &
1687 local_ba, virtual(jspin), local_ba, virtual(jspin), 1.0_dp, &
1688 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_virtual_start(ispin):my_b_virtual_end(ispin)), my_b_size(ispin), &
1689 mp2_env%local_gemm_ctx)
1690
1691 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) = &
1692 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) + p_ij_diag
1693
1694 CALL local_gemm('T', 'N', my_b_size(jspin), my_b_size(jspin), virtual(ispin), mp2_env%scale_S, &
1695 local_ab, virtual(ispin), local_ab, virtual(ispin), 1.0_dp, &
1696 mp2_env%ri_grad%P_ab(jspin)%array(:, my_b_virtual_start(jspin):my_b_virtual_end(jspin)), my_b_size(jspin), &
1697 mp2_env%local_gemm_ctx)
1698
1699 mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjb - 1, my_j + jjb - 1) = &
1700 mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjb - 1, my_j + jjb - 1) + p_ij_diag
1701 END IF
1702 ! The summation is over unique pairs. In alpha-beta case, all pairs are unique: subroutine is called for
1703 ! both i^alpha,j^beta and i^beta,j^alpha. Formally, my_i can be equal to my_j, but they are different
1704 ! due to spin in alpha-beta case.
1705 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN
1706
1707 CALL local_gemm('N', 'T', my_b_size(ispin), virtual(ispin), my_b_size(ispin), 1.0_dp, &
1708 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1709 local_ab, virtual(ispin), &
1710 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array, my_b_size(ispin), &
1711 mp2_env%local_gemm_ctx)
1712
1713 mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjb - 1, my_j + jjb - 1) = &
1714 mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjb - 1, my_j + jjb - 1) + p_ij_diag
1715 END IF
1716 DO proc_shift = 1, para_env_sub%num_pe - 1
1717 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1718 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1719
1720 CALL get_group_dist(gd_b_virtual(jspin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1721 CALL get_group_dist(gd_b_virtual(jspin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1722
1723 external_ab(1:virtual(ispin), 1:rec_b_size) => buffer_1d(1:int(virtual(ispin), int_8)*rec_b_size)
1724 external_ab = 0.0_dp
1725
1726 CALL para_env_sub%sendrecv(local_ab, proc_send, &
1727 external_ab, proc_receive)
1728
1729 IF (.NOT. (alpha_beta)) THEN
1730 CALL local_gemm('T', 'N', my_b_size(ispin), rec_b_size, virtual(ispin), 1.0_dp, &
1731 t_ab, virtual(ispin), external_ab, virtual(ispin), &
1732 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_b_virtual_start:rec_b_virtual_end), my_b_size(ispin), &
1733 mp2_env%local_gemm_ctx)
1734 ELSE
1735 CALL local_gemm('T', 'N', my_b_size(jspin), rec_b_size, virtual(ispin), mp2_env%scale_S, &
1736 local_ab, virtual(ispin), external_ab, virtual(ispin), &
1737 1.0_dp, mp2_env%ri_grad%P_ab(jspin)%array(:, rec_b_virtual_start:rec_b_virtual_end), &
1738 my_b_size(jspin), mp2_env%local_gemm_ctx)
1739
1740 ! For alpha-beta part of alpha-density we need a new parallel code
1741 ! And new external_ab (of a different size)
1742 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1743 CALL get_group_dist(gd_b_virtual(ispin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1744 external_ab(1:virtual(jspin), 1:rec_b_size) => buffer_1d(1:int(virtual(jspin), int_8)*rec_b_size)
1745 external_ab = 0.0_dp
1746 CALL para_env_sub%sendrecv(local_ba, proc_send, &
1747 external_ab, proc_receive)
1748 CALL local_gemm('T', 'N', my_b_size(ispin), rec_b_size, virtual(jspin), mp2_env%scale_S, &
1749 local_ba, virtual(jspin), external_ab, virtual(jspin), &
1750 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_b_virtual_start:rec_b_virtual_end), my_b_size(ispin), &
1751 mp2_env%local_gemm_ctx)
1752 END IF
1753
1754 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN
1755 external_ab(1:my_b_size(ispin), 1:virtual(ispin)) => &
1756 buffer_1d(1:int(virtual(ispin), int_8)*my_b_size(ispin))
1757 external_ab = 0.0_dp
1758
1759 offset = int(virtual(ispin), int_8)*my_b_size(ispin)
1760
1761 send_ab(1:send_b_size, 1:virtual(ispin)) => buffer_1d(offset + 1:offset + int(send_b_size, int_8)*virtual(ispin))
1762 send_ab = 0.0_dp
1763
1764 CALL local_gemm('N', 'T', send_b_size, virtual(ispin), my_b_size(ispin), 1.0_dp, &
1765 t_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1766 local_ab, virtual(ispin), &
1767 0.0_dp, send_ab, send_b_size, &
1768 mp2_env%local_gemm_ctx)
1769 CALL para_env_sub%sendrecv(send_ab, proc_send, &
1770 external_ab, proc_receive)
1771
1772 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = mp2_env%ri_grad%P_ab(ispin)%array + external_ab
1773 END IF
1774
1775 END DO
1776 IF (.NOT. alpha_beta) THEN
1777 IF (my_i /= my_j) THEN
1778 CALL dgemm_counter_stop(dgemm_counter, 2*my_b_size(ispin), virtual(ispin), virtual(ispin))
1779 ELSE
1780 CALL dgemm_counter_stop(dgemm_counter, my_b_size(ispin), virtual(ispin), virtual(ispin))
1781 END IF
1782 ELSE
1783 CALL dgemm_counter_stop(dgemm_counter, sum(my_b_size), virtual(ispin), virtual(jspin))
1784 END IF
1785 CALL timestop(handle)
1786
1787 ! Now, Gamma_P_ia (made of Y_ia_P)
1788
1789 CALL timeset(routinen//"_Gamma", handle)
1790 CALL dgemm_counter_start(dgemm_counter)
1791 IF (.NOT. alpha_beta) THEN
1792 ! Alpha-alpha, beta-beta and closed shell
1793 CALL local_gemm('N', 'T', my_b_size(ispin), dimen_ri, my_b_size(ispin), 1.0_dp, &
1794 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1795 my_local_j_al, dimen_ri, 1.0_dp, y_i_ap, my_b_size(ispin), &
1796 mp2_env%local_gemm_ctx)
1797 ELSE ! Alpha-beta
1798 CALL local_gemm('N', 'T', my_b_size(ispin), dimen_ri, my_b_size(jspin), mp2_env%scale_S, &
1799 local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1800 my_local_j_al, dimen_ri, 1.0_dp, y_i_ap, my_b_size(ispin), mp2_env%local_gemm_ctx)
1801 CALL local_gemm('T', 'T', my_b_size(jspin), dimen_ri, my_b_size(ispin), mp2_env%scale_S, &
1802 local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1803 my_local_i_al, dimen_ri, 1.0_dp, y_j_ap, my_b_size(jspin), mp2_env%local_gemm_ctx)
1804 END IF
1805
1806 IF (para_env_sub%num_pe > 1) THEN
1807 external_ab(1:my_b_size(ispin), 1:dimen_ri) => buffer_1d(1:int(my_b_size(ispin), int_8)*dimen_ri)
1808 external_ab = 0.0_dp
1809
1810 offset = int(my_b_size(ispin), int_8)*dimen_ri
1811 END IF
1812 !
1813 DO proc_shift = 1, para_env_sub%num_pe - 1
1814 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1815 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1816
1817 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1818 CALL get_group_dist(gd_b_virtual(ispin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1819
1820 send_ab(1:send_b_size, 1:dimen_ri) => buffer_1d(offset + 1:offset + int(dimen_ri, int_8)*send_b_size)
1821 send_ab = 0.0_dp
1822 IF (.NOT. alpha_beta) THEN
1823 CALL local_gemm('N', 'T', send_b_size, dimen_ri, my_b_size(ispin), 1.0_dp, &
1824 t_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1825 my_local_j_al, dimen_ri, 0.0_dp, send_ab, send_b_size, &
1826 mp2_env%local_gemm_ctx)
1827 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1828
1829 y_i_ap(:, :) = y_i_ap + external_ab
1830
1831 ELSE ! Alpha-beta case
1832 ! Alpha-alpha part
1833 CALL local_gemm('N', 'T', send_b_size, dimen_ri, my_b_size(jspin), mp2_env%scale_S, &
1834 local_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1835 my_local_j_al, dimen_ri, 0.0_dp, send_ab, send_b_size, &
1836 mp2_env%local_gemm_ctx)
1837 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1838 y_i_ap(:, :) = y_i_ap + external_ab
1839 END IF
1840 END DO
1841
1842 IF (alpha_beta) THEN
1843 ! For beta-beta part (in alpha-beta case) we need a new parallel code
1844 IF (para_env_sub%num_pe > 1) THEN
1845 external_ab(1:my_b_size(jspin), 1:dimen_ri) => buffer_1d(1:int(my_b_size(jspin), int_8)*dimen_ri)
1846 external_ab = 0.0_dp
1847
1848 offset = int(my_b_size(jspin), int_8)*dimen_ri
1849 END IF
1850 DO proc_shift = 1, para_env_sub%num_pe - 1
1851 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1852 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1853
1854 CALL get_group_dist(gd_b_virtual(jspin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1855 send_ab(1:send_b_size, 1:dimen_ri) => buffer_1d(offset + 1:offset + int(dimen_ri, int_8)*send_b_size)
1856 send_ab = 0.0_dp
1857 CALL local_gemm('N', 'T', send_b_size, dimen_ri, my_b_size(ispin), mp2_env%scale_S, &
1858 local_ba(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1859 my_local_i_al, dimen_ri, 0.0_dp, send_ab, send_b_size, &
1860 mp2_env%local_gemm_ctx)
1861 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1862 y_j_ap(:, :) = y_j_ap + external_ab
1863
1864 END DO
1865
1866 ! Here, we just use approximate bounds. For large systems virtual(ispin) is approx virtual(jspin), same for B_size
1867 CALL dgemm_counter_stop(dgemm_counter, 3*virtual(ispin), dimen_ri, my_b_size(jspin))
1868 ELSE
1869 CALL dgemm_counter_stop(dgemm_counter, virtual(ispin), dimen_ri, my_b_size(ispin))
1870 END IF
1871
1872 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN
1873 ! Alpha-alpha, beta-beta and closed shell
1874 CALL dgemm_counter_start(dgemm_counter)
1875 CALL local_gemm('T', 'T', my_b_size(ispin), dimen_ri, my_b_size(ispin), 1.0_dp, &
1876 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1877 my_local_i_al, dimen_ri, 1.0_dp, y_j_ap, my_b_size(ispin), &
1878 mp2_env%local_gemm_ctx)
1879 DO proc_shift = 1, para_env_sub%num_pe - 1
1880 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1881 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1882
1883 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1884
1885 external_ab(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, int_8)*rec_b_size)
1886 external_ab = 0.0_dp
1887
1888 CALL para_env_sub%sendrecv(my_local_i_al, proc_send, &
1889 external_ab, proc_receive)
1890
1891 ! Alpha-alpha, beta-beta and closed shell
1892 CALL local_gemm('T', 'T', my_b_size(ispin), dimen_ri, rec_b_size, 1.0_dp, &
1893 t_ab(rec_b_virtual_start:rec_b_virtual_end, :), rec_b_size, &
1894 external_ab, dimen_ri, 1.0_dp, y_j_ap, my_b_size(ispin), mp2_env%local_gemm_ctx)
1895 END DO
1896
1897 CALL dgemm_counter_stop(dgemm_counter, my_b_size(ispin), dimen_ri, virtual(ispin))
1898 END IF
1899
1900 CALL timestop(handle)
1901 END SUBROUTINE mp2_update_p_gamma
1902
1903! **************************************************************************************************
1904!> \brief ...
1905!> \param Gamma_P_ia ...
1906!> \param ij_index ...
1907!> \param my_B_size ...
1908!> \param my_block_size ...
1909!> \param my_group_L_size ...
1910!> \param my_i ...
1911!> \param my_ij_pairs ...
1912!> \param ngroup ...
1913!> \param num_integ_group ...
1914!> \param integ_group_pos2color_sub ...
1915!> \param num_ij_pairs ...
1916!> \param ij_map ...
1917!> \param ranges_info_array ...
1918!> \param Y_i_aP ...
1919!> \param comm_exchange ...
1920!> \param sizes_array ...
1921!> \param spin ...
1922!> \param buffer_1D ...
1923! **************************************************************************************************
1924 SUBROUTINE mp2_redistribute_gamma(Gamma_P_ia, ij_index, my_B_size, &
1925 my_block_size, my_group_L_size, my_i, my_ij_pairs, ngroup, &
1926 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
1927 ij_map, ranges_info_array, Y_i_aP, comm_exchange, &
1928 sizes_array, spin, buffer_1D)
1929
1930 REAL(kind=dp), DIMENSION(:, :, :), INTENT(INOUT) :: gamma_p_ia
1931 INTEGER, INTENT(IN) :: ij_index, my_b_size, my_block_size, &
1932 my_group_l_size, my_i, my_ij_pairs, &
1933 ngroup, num_integ_group
1934 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: integ_group_pos2color_sub, num_ij_pairs
1935 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: ij_map
1936 INTEGER, ALLOCATABLE, DIMENSION(:, :, :), &
1937 INTENT(IN) :: ranges_info_array
1938 REAL(kind=dp), DIMENSION(:, :, :), INTENT(IN) :: y_i_ap
1939 TYPE(mp_comm_type), INTENT(IN) :: comm_exchange
1940 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: sizes_array
1941 INTEGER, INTENT(IN) :: spin
1942 REAL(kind=dp), CONTIGUOUS, DIMENSION(:), TARGET :: buffer_1d
1943
1944 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_redistribute_gamma'
1945
1946 INTEGER :: end_point, handle, handle2, iib, ij_counter_rec, irep, kkk, lll, lstart_pos, &
1947 proc_receive, proc_send, proc_shift, rec_i, rec_ij_index, send_l_size, start_point, tag
1948 INTEGER(KIND=int_8) :: offset
1949 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1950 POINTER :: bi_c_rec, bi_c_send
1951
1952! In alpha-beta case Y_i_aP_beta is sent as Y_j_aP
1953
1954 CALL timeset(routinen//"_comm2", handle)
1955
1956 tag = 43
1957
1958 IF (ij_index <= my_ij_pairs) THEN
1959 ! somethig to send
1960 ! start with myself
1961 CALL timeset(routinen//"_comm2_w", handle2)
1962 DO irep = 0, num_integ_group - 1
1963 lstart_pos = ranges_info_array(1, irep, comm_exchange%mepos)
1964 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
1965 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
1966!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll,iiB) &
1967!$OMP SHARED(start_point,end_point,Lstart_pos,my_block_size,&
1968!$OMP Gamma_P_ia,my_i,my_B_size,Y_i_aP)
1969 DO kkk = start_point, end_point
1970 lll = kkk - start_point + lstart_pos
1971 DO iib = 1, my_block_size
1972 gamma_p_ia(1:my_b_size, my_i + iib - 1, kkk) = &
1973 gamma_p_ia(1:my_b_size, my_i + iib - 1, kkk) + &
1974 y_i_ap(1:my_b_size, lll, iib)
1975 END DO
1976 END DO
1977!$OMP END PARALLEL DO
1978 END DO
1979 CALL timestop(handle2)
1980
1981 ! Y_i_aP(my_B_size,dimen_RI,block_size)
1982
1983 DO proc_shift = 1, comm_exchange%num_pe - 1
1984 proc_send = modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
1985 proc_receive = modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
1986
1987 send_l_size = sizes_array(proc_send)
1988 bi_c_send(1:my_b_size, 1:my_block_size, 1:send_l_size) => &
1989 buffer_1d(1:int(my_b_size, int_8)*my_block_size*send_l_size)
1990
1991 offset = int(my_b_size, int_8)*my_block_size*send_l_size
1992
1993 CALL timeset(routinen//"_comm2_w", handle2)
1994 bi_c_send = 0.0_dp
1995 DO irep = 0, num_integ_group - 1
1996 lstart_pos = ranges_info_array(1, irep, proc_send)
1997 start_point = ranges_info_array(3, irep, proc_send)
1998 end_point = ranges_info_array(4, irep, proc_send)
1999!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll,iiB) &
2000!$OMP SHARED(start_point,end_point,Lstart_pos,my_block_size,&
2001!$OMP BI_C_send,my_B_size,Y_i_aP)
2002 DO kkk = start_point, end_point
2003 lll = kkk - start_point + lstart_pos
2004 DO iib = 1, my_block_size
2005 bi_c_send(1:my_b_size, iib, kkk) = y_i_ap(1:my_b_size, lll, iib)
2006 END DO
2007 END DO
2008!$OMP END PARALLEL DO
2009 END DO
2010 CALL timestop(handle2)
2011
2012 rec_ij_index = num_ij_pairs(proc_receive)
2013
2014 IF (ij_index <= rec_ij_index) THEN
2015 ! we know that proc_receive has something to send for us, let's see what
2016 ij_counter_rec = &
2017 (ij_index - min(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
2018
2019 rec_i = ij_map(spin, ij_counter_rec)
2020
2021 bi_c_rec(1:my_b_size, 1:my_block_size, 1:my_group_l_size) => &
2022 buffer_1d(offset + 1:offset + int(my_b_size, int_8)*my_block_size*my_group_l_size)
2023 bi_c_rec = 0.0_dp
2024
2025 CALL comm_exchange%sendrecv(bi_c_send, proc_send, &
2026 bi_c_rec, proc_receive, tag)
2027
2028 CALL timeset(routinen//"_comm2_w", handle2)
2029 DO irep = 0, num_integ_group - 1
2030 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
2031 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
2032!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(start_point,end_point,my_block_size,&
2033!$OMP Gamma_P_ia,rec_i,iiB,my_B_size,BI_C_rec)
2034 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) = &
2035 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) + &
2036 bi_c_rec(1:my_b_size, :, start_point:end_point)
2037!$OMP END PARALLEL WORKSHARE
2038 END DO
2039 CALL timestop(handle2)
2040
2041 ELSE
2042 ! we have something to send but nothing to receive
2043 CALL comm_exchange%send(bi_c_send, proc_send, tag)
2044
2045 END IF
2046
2047 END DO
2048
2049 ELSE
2050 ! noting to send check if we have to receive
2051 DO proc_shift = 1, comm_exchange%num_pe - 1
2052 proc_send = modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2053 proc_receive = modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2054 rec_ij_index = num_ij_pairs(proc_receive)
2055
2056 IF (ij_index <= rec_ij_index) THEN
2057 ! we know that proc_receive has something to send for us, let's see what
2058 ij_counter_rec = &
2059 (ij_index - min(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
2060
2061 rec_i = ij_map(spin, ij_counter_rec)
2062
2063 bi_c_rec(1:my_b_size, 1:my_block_size, 1:my_group_l_size) => &
2064 buffer_1d(1:int(my_b_size, int_8)*my_block_size*my_group_l_size)
2065
2066 bi_c_rec = 0.0_dp
2067
2068 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2069
2070 CALL timeset(routinen//"_comm2_w", handle2)
2071 DO irep = 0, num_integ_group - 1
2072 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
2073 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
2074!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(start_point,end_point,my_block_size,&
2075!$OMP Gamma_P_ia,rec_i,iiB,my_B_size,BI_C_rec)
2076 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) = &
2077 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) + &
2078 bi_c_rec(1:my_b_size, :, start_point:end_point)
2079!$OMP END PARALLEL WORKSHARE
2080 END DO
2081 CALL timestop(handle2)
2082
2083 END IF
2084 END DO
2085
2086 END IF
2087 CALL timestop(handle)
2088
2089 END SUBROUTINE mp2_redistribute_gamma
2090
2091! **************************************************************************************************
2092!> \brief ...
2093!> \param mp2_env ...
2094!> \param Eigenval ...
2095!> \param homo ...
2096!> \param virtual ...
2097!> \param open_shell ...
2098!> \param beta_beta ...
2099!> \param Bib_C ...
2100!> \param unit_nr ...
2101!> \param dimen_RI ...
2102!> \param my_B_size ...
2103!> \param ngroup ...
2104!> \param my_group_L_size ...
2105!> \param color_sub ...
2106!> \param ranges_info_array ...
2107!> \param comm_exchange ...
2108!> \param para_env_sub ...
2109!> \param para_env ...
2110!> \param my_B_virtual_start ...
2111!> \param my_B_virtual_end ...
2112!> \param sizes_array ...
2113!> \param gd_B_virtual ...
2114!> \param integ_group_pos2color_sub ...
2115!> \param dgemm_counter ...
2116!> \param buffer_1D ...
2117! **************************************************************************************************
2118 SUBROUTINE quasi_degenerate_p_ij(mp2_env, Eigenval, homo, virtual, open_shell, &
2119 beta_beta, Bib_C, unit_nr, dimen_RI, &
2120 my_B_size, ngroup, my_group_L_size, &
2121 color_sub, ranges_info_array, comm_exchange, para_env_sub, para_env, &
2122 my_B_virtual_start, my_B_virtual_end, sizes_array, gd_B_virtual, &
2123 integ_group_pos2color_sub, dgemm_counter, buffer_1D)
2124 TYPE(mp2_type) :: mp2_env
2125 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: eigenval
2126 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
2127 LOGICAL, INTENT(IN) :: open_shell, beta_beta
2128 TYPE(three_dim_real_array), DIMENSION(:), &
2129 INTENT(IN) :: bib_c
2130 INTEGER, INTENT(IN) :: unit_nr, dimen_ri
2131 INTEGER, DIMENSION(:), INTENT(IN) :: my_b_size
2132 INTEGER, INTENT(IN) :: ngroup, my_group_l_size, color_sub
2133 INTEGER, ALLOCATABLE, DIMENSION(:, :, :), &
2134 INTENT(IN) :: ranges_info_array
2135 TYPE(mp_comm_type), INTENT(IN) :: comm_exchange
2136 TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub, para_env
2137 INTEGER, DIMENSION(:), INTENT(IN) :: my_b_virtual_start, my_b_virtual_end
2138 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: sizes_array
2139 TYPE(group_dist_d1_type), DIMENSION(:), INTENT(IN) :: gd_b_virtual
2140 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: integ_group_pos2color_sub
2141 TYPE(dgemm_counter_type), INTENT(INOUT) :: dgemm_counter
2142 REAL(kind=dp), CONTIGUOUS, DIMENSION(:), TARGET :: buffer_1d
2143
2144 CHARACTER(LEN=*), PARAMETER :: routinen = 'quasi_degenerate_P_ij'
2145
2146 INTEGER :: a, a_global, b, b_global, block_size, decil, handle, handle2, ijk_counter, &
2147 ijk_counter_send, ijk_index, ispin, kkb, kspin, max_block_size, max_ijk, my_i, my_ijk, &
2148 my_j, my_k, my_last_k(2), my_virtual, nspins, proc_receive, proc_send, proc_shift, &
2149 rec_b_size, rec_b_virtual_end, rec_b_virtual_start, rec_l_size, send_b_size, &
2150 send_b_virtual_end, send_b_virtual_start, send_i, send_ijk_index, send_j, send_k, &
2151 size_b_i, size_b_k, tag, tag2
2152 INTEGER, ALLOCATABLE, DIMENSION(:) :: num_ijk
2153 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ijk_map, send_last_k
2154 LOGICAL :: alpha_beta, do_recv_i, do_recv_j, &
2155 do_recv_k, do_send_i, do_send_j, &
2156 do_send_k
2157 REAL(kind=dp) :: amp_fac, p_ij_elem, t_new, t_start
2158 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
2159 TARGET :: local_ab, local_al_i, local_al_j, t_ab
2160 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: local_al_k
2161 REAL(kind=dp), DIMENSION(:, :), POINTER :: bi_c_rec, external_ab, external_al
2162 REAL(kind=dp), DIMENSION(:, :, :), POINTER :: bi_c_rec_3d
2163
2164 CALL timeset(routinen//"_ij_sing", handle)
2165
2166 tag = 44
2167 tag2 = 45
2168
2169 nspins = SIZE(bib_c)
2170 alpha_beta = (nspins == 2)
2171
2172 ! Set amplitude factor
2173 amp_fac = mp2_env%scale_S + mp2_env%scale_T
2174 IF (open_shell) amp_fac = mp2_env%scale_T
2175
2176 ALLOCATE (send_last_k(2, comm_exchange%num_pe - 1))
2177
2178 ! Loop(s) over orbital triplets
2179 DO ispin = 1, nspins
2180 size_b_i = my_b_size(ispin)
2181 IF (ispin == 1 .AND. alpha_beta) THEN
2182 kspin = 2
2183 ELSE
2184 kspin = 1
2185 END IF
2186 size_b_k = my_b_size(kspin)
2187
2188 ! Find the number of quasi-degenerate orbitals and orbital triplets
2189
2190 CALL find_quasi_degenerate_ij(my_ijk, homo(ispin), homo(kspin), eigenval(:, ispin), mp2_env, ijk_map, unit_nr, ngroup, &
2191 .NOT. beta_beta .AND. ispin /= 2, comm_exchange, num_ijk, max_ijk, color_sub, &
2192 SIZE(buffer_1d), my_group_l_size, size_b_k, para_env, virtual(ispin), size_b_i)
2193
2194 my_virtual = virtual(ispin)
2195 IF (SIZE(ijk_map, 2) > 0) THEN
2196 max_block_size = ijk_map(4, 1)
2197 ELSE
2198 max_block_size = 1
2199 END IF
2200
2201 ALLOCATE (local_al_i(dimen_ri, size_b_i))
2202 ALLOCATE (local_al_j(dimen_ri, size_b_i))
2203 ALLOCATE (local_al_k(dimen_ri, size_b_k, max_block_size))
2204 ALLOCATE (t_ab(my_virtual, size_b_k))
2205
2206 my_last_k = -1
2207 send_last_k = -1
2208
2209 t_start = m_walltime()
2210 DO ijk_index = 1, max_ijk
2211
2212 ! Prediction is unreliable if we are in the first step of the loop
2213 IF (unit_nr > 0 .AND. ijk_index > 1) THEN
2214 decil = ijk_index*10/max_ijk
2215 IF (decil /= (ijk_index - 1)*10/max_ijk) THEN
2216 t_new = m_walltime()
2217 t_new = (t_new - t_start)/60.0_dp*(max_ijk - ijk_index + 1)/(ijk_index - 1)
2218 WRITE (unit_nr, fmt="(T3,A)") "Percentage of finished loop: "// &
2219 cp_to_string(decil*10)//". Minutes left: "//cp_to_string(t_new)
2220 CALL m_flush(unit_nr)
2221 END IF
2222 END IF
2223
2224 IF (ijk_index <= my_ijk) THEN
2225 ! work to be done
2226 ijk_counter = (ijk_index - min(1, color_sub))*ngroup + color_sub
2227 my_i = ijk_map(1, ijk_counter)
2228 my_j = ijk_map(2, ijk_counter)
2229 my_k = ijk_map(3, ijk_counter)
2230 block_size = ijk_map(4, ijk_counter)
2231
2232 do_recv_i = (ispin /= kspin) .OR. my_i < my_k .OR. my_i > my_k + block_size - 1
2233 do_recv_j = (ispin /= kspin) .OR. my_j < my_k .OR. my_j > my_k + block_size - 1
2234 do_recv_k = my_k /= my_last_k(1) .OR. my_k + block_size - 1 /= my_last_k(2)
2235 my_last_k(1) = my_k
2236 my_last_k(2) = my_k + block_size - 1
2237
2238 local_al_i = 0.0_dp
2239 IF (do_recv_i) THEN
2240 CALL fill_local_i_al_2d(local_al_i, ranges_info_array(:, :, comm_exchange%mepos), &
2241 bib_c(ispin)%array(:, :, my_i))
2242 END IF
2243
2244 local_al_j = 0.0_dp
2245 IF (do_recv_j) THEN
2246 CALL fill_local_i_al_2d(local_al_j, ranges_info_array(:, :, comm_exchange%mepos), &
2247 bib_c(ispin)%array(:, :, my_j))
2248 END IF
2249
2250 IF (do_recv_k) THEN
2251 local_al_k = 0.0_dp
2252 CALL fill_local_i_al(local_al_k(:, :, 1:block_size), ranges_info_array(:, :, comm_exchange%mepos), &
2253 bib_c(kspin)%array(:, :, my_k:my_k + block_size - 1))
2254 END IF
2255
2256 CALL timeset(routinen//"_comm", handle2)
2257 DO proc_shift = 1, comm_exchange%num_pe - 1
2258 proc_send = modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2259 proc_receive = modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2260
2261 send_ijk_index = num_ijk(proc_send)
2262
2263 rec_l_size = sizes_array(proc_receive)
2264 bi_c_rec(1:rec_l_size, 1:size_b_i) => buffer_1d(1:int(rec_l_size, kind=int_8)*size_b_i)
2265
2266 do_send_i = .false.
2267 do_send_j = .false.
2268 do_send_k = .false.
2269 IF (ijk_index <= send_ijk_index) THEN
2270 ! something to send
2271 ijk_counter_send = (ijk_index - min(1, integ_group_pos2color_sub(proc_send)))* &
2272 ngroup + integ_group_pos2color_sub(proc_send)
2273 send_i = ijk_map(1, ijk_counter_send)
2274 send_j = ijk_map(2, ijk_counter_send)
2275 send_k = ijk_map(3, ijk_counter_send)
2276
2277 do_send_i = (ispin /= kspin) .OR. send_i < send_k .OR. send_i > send_k + block_size - 1
2278 do_send_j = (ispin /= kspin) .OR. send_j < send_k .OR. send_j > send_k + block_size - 1
2279 do_send_k = send_k /= send_last_k(1, proc_shift) .OR. send_k + block_size - 1 /= send_last_k(2, proc_shift)
2280 send_last_k(1, proc_shift) = send_k
2281 send_last_k(2, proc_shift) = send_k + block_size - 1
2282 END IF
2283
2284 ! occupied i
2285 bi_c_rec = 0.0_dp
2286 IF (do_send_i) THEN
2287 IF (do_recv_i) THEN
2288 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_i), proc_send, &
2289 bi_c_rec, proc_receive, tag)
2290 ELSE
2291 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i), proc_send, tag)
2292 END IF
2293 ELSE IF (do_recv_i) THEN
2294 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2295 END IF
2296 IF (do_recv_i) THEN
2297 CALL fill_local_i_al_2d(local_al_i, ranges_info_array(:, :, proc_receive), bi_c_rec)
2298 END IF
2299
2300 ! occupied j
2301 bi_c_rec = 0.0_dp
2302 IF (do_send_j) THEN
2303 IF (do_recv_j) THEN
2304 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_j), proc_send, &
2305 bi_c_rec, proc_receive, tag)
2306 ELSE
2307 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_j), proc_send, tag)
2308 END IF
2309 ELSE IF (do_recv_j) THEN
2310 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2311 END IF
2312 IF (do_recv_j) THEN
2313 CALL fill_local_i_al_2d(local_al_j, ranges_info_array(:, :, proc_receive), bi_c_rec)
2314 END IF
2315
2316 ! occupied k
2317 bi_c_rec_3d(1:rec_l_size, 1:size_b_k, 1:block_size) => &
2318 buffer_1d(1:int(rec_l_size, kind=int_8)*size_b_k*block_size)
2319 IF (do_send_k) THEN
2320 IF (do_recv_k) THEN
2321 CALL comm_exchange%sendrecv(bib_c(kspin)%array(:, :, send_k:send_k + block_size - 1), proc_send, &
2322 bi_c_rec_3d, proc_receive, tag)
2323 ELSE
2324 CALL comm_exchange%send(bi_c_rec, proc_receive, tag)
2325 END IF
2326 ELSE IF (do_recv_k) THEN
2327 CALL comm_exchange%recv(bi_c_rec_3d, proc_receive, tag)
2328 END IF
2329 IF (do_recv_k) THEN
2330 CALL fill_local_i_al(local_al_k(:, :, 1:block_size), ranges_info_array(:, :, proc_receive), bi_c_rec_3d)
2331 END IF
2332 END DO
2333
2334 IF (.NOT. do_recv_i) local_al_i(:, :) = local_al_k(:, :, my_i - my_k + 1)
2335 IF (.NOT. do_recv_j) local_al_j(:, :) = local_al_k(:, :, my_j - my_k + 1)
2336 CALL timestop(handle2)
2337
2338 ! expand integrals
2339 DO kkb = 1, block_size
2340 CALL timeset(routinen//"_exp_ik", handle2)
2341 CALL dgemm_counter_start(dgemm_counter)
2342 ALLOCATE (local_ab(my_virtual, size_b_k))
2343 local_ab = 0.0_dp
2344 CALL local_gemm('T', 'N', size_b_i, size_b_k, dimen_ri, 1.0_dp, &
2345 local_al_i, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2346 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), 1:size_b_k), size_b_i, &
2347 mp2_env%local_gemm_ctx)
2348 DO proc_shift = 1, para_env_sub%num_pe - 1
2349 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2350 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2351
2352 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2353
2354 external_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, kind=int_8)*rec_b_size)
2355
2356 CALL comm_exchange%sendrecv(local_al_i, proc_send, &
2357 external_al, proc_receive, tag)
2358
2359 CALL local_gemm('T', 'N', rec_b_size, size_b_k, dimen_ri, 1.0_dp, &
2360 external_al, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2361 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:size_b_k), rec_b_size, &
2362 mp2_env%local_gemm_ctx)
2363 END DO
2364 CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_b_k, dimen_ri)
2365 CALL timestop(handle2)
2366
2367 ! Amplitudes
2368 CALL timeset(routinen//"_tab", handle2)
2369 t_ab = 0.0_dp
2370 ! Alpha-alpha, beta-beta and closed shell
2371 IF (.NOT. alpha_beta) THEN
2372 DO b = 1, size_b_k
2373 b_global = b + my_b_virtual_start(1) - 1
2374 DO a = 1, my_b_size(1)
2375 a_global = a + my_b_virtual_start(1) - 1
2376 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - mp2_env%scale_T*local_ab(b_global, a))/ &
2377 (eigenval(my_i, 1) + eigenval(my_k + kkb - 1, 1) &
2378 - eigenval(homo(1) + a_global, 1) - eigenval(homo(1) + b_global, 1))
2379 END DO
2380 END DO
2381 ELSE
2382 DO b = 1, size_b_k
2383 b_global = b + my_b_virtual_start(kspin) - 1
2384 DO a = 1, my_b_size(ispin)
2385 a_global = a + my_b_virtual_start(ispin) - 1
2386 t_ab(a_global, b) = mp2_env%scale_S*local_ab(a_global, b)/ &
2387 (eigenval(my_i, ispin) + eigenval(my_k + kkb - 1, kspin) &
2388 - eigenval(homo(ispin) + a_global, ispin) - eigenval(homo(kspin) + b_global, kspin))
2389 END DO
2390 END DO
2391 END IF
2392
2393 IF (.NOT. alpha_beta) THEN
2394 DO proc_shift = 1, para_env_sub%num_pe - 1
2395 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2396 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2397 CALL get_group_dist(gd_b_virtual(1), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2398 CALL get_group_dist(gd_b_virtual(1), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
2399
2400 external_ab(1:size_b_i, 1:rec_b_size) => buffer_1d(1:int(size_b_i, kind=int_8)*rec_b_size)
2401 CALL para_env_sub%sendrecv(local_ab(send_b_virtual_start:send_b_virtual_end, 1:size_b_k), proc_send, &
2402 external_ab(1:size_b_i, 1:rec_b_size), proc_receive, tag)
2403
2404 DO b = 1, my_b_size(1)
2405 b_global = b + my_b_virtual_start(1) - 1
2406 DO a = 1, rec_b_size
2407 a_global = a + rec_b_virtual_start - 1
2408 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - mp2_env%scale_T*external_ab(b, a))/ &
2409 (eigenval(my_i, 1) + eigenval(my_k + kkb - 1, 1) &
2410 - eigenval(homo(1) + a_global, 1) - eigenval(homo(1) + b_global, 1))
2411 END DO
2412 END DO
2413 END DO
2414 END IF
2415 CALL timestop(handle2)
2416
2417 ! Expand the second set of integrals
2418 CALL timeset(routinen//"_exp_jk", handle2)
2419 local_ab = 0.0_dp
2420 CALL dgemm_counter_start(dgemm_counter)
2421 CALL local_gemm('T', 'N', size_b_i, size_b_k, dimen_ri, 1.0_dp, &
2422 local_al_j, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2423 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), 1:size_b_k), size_b_i, &
2424 mp2_env%local_gemm_ctx)
2425 DO proc_shift = 1, para_env_sub%num_pe - 1
2426 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2427 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2428
2429 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2430
2431 external_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, kind=int_8)*rec_b_size)
2432
2433 CALL comm_exchange%sendrecv(local_al_j, proc_send, &
2434 external_al, proc_receive, tag)
2435 CALL local_gemm('T', 'N', rec_b_size, size_b_k, dimen_ri, 1.0_dp, &
2436 external_al, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2437 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:size_b_k), rec_b_size, &
2438 mp2_env%local_gemm_ctx)
2439 END DO
2440 CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_b_k, dimen_ri)
2441 CALL timestop(handle2)
2442
2443 CALL timeset(routinen//"_Pij", handle2)
2444 DO b = 1, size_b_k
2445 b_global = b + my_b_virtual_start(kspin) - 1
2446 DO a = 1, my_b_size(ispin)
2447 a_global = a + my_b_virtual_start(ispin) - 1
2448 local_ab(a_global, b) = &
2449 local_ab(a_global, b)/(eigenval(my_j, ispin) + eigenval(my_k + kkb - 1, kspin) &
2450 - eigenval(homo(ispin) + a_global, ispin) - eigenval(homo(kspin) + b_global, kspin))
2451 END DO
2452 END DO
2453 !
2454 p_ij_elem = sum(local_ab*t_ab)
2455 DEALLOCATE (local_ab)
2456 IF ((.NOT. open_shell) .AND. (.NOT. alpha_beta)) THEN
2457 p_ij_elem = p_ij_elem*2.0_dp
2458 END IF
2459 IF (beta_beta) THEN
2460 mp2_env%ri_grad%P_ij(2)%array(my_i, my_j) = mp2_env%ri_grad%P_ij(2)%array(my_i, my_j) - p_ij_elem
2461 mp2_env%ri_grad%P_ij(2)%array(my_j, my_i) = mp2_env%ri_grad%P_ij(2)%array(my_j, my_i) - p_ij_elem
2462 ELSE
2463 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) = mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) - p_ij_elem
2464 mp2_env%ri_grad%P_ij(ispin)%array(my_j, my_i) = mp2_env%ri_grad%P_ij(ispin)%array(my_j, my_i) - p_ij_elem
2465 END IF
2466 CALL timestop(handle2)
2467 END DO
2468 ELSE
2469 CALL timeset(routinen//"_comm", handle2)
2470 ! no work to be done, possible messeges to be exchanged
2471 DO proc_shift = 1, comm_exchange%num_pe - 1
2472 proc_send = modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2473 proc_receive = modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2474
2475 send_ijk_index = num_ijk(proc_send)
2476
2477 IF (ijk_index <= send_ijk_index) THEN
2478 ! somethig to send
2479 ijk_counter_send = (ijk_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
2480 integ_group_pos2color_sub(proc_send)
2481 send_i = ijk_map(1, ijk_counter_send)
2482 send_j = ijk_map(2, ijk_counter_send)
2483 send_k = ijk_map(3, ijk_counter_send)
2484 block_size = ijk_map(4, ijk_counter_send)
2485
2486 do_send_i = (ispin /= kspin) .OR. send_i < send_k .OR. send_i > send_k + block_size - 1
2487 do_send_j = (ispin /= kspin) .OR. send_j < send_k .OR. send_j > send_k + block_size - 1
2488 ! occupied i
2489 IF (do_send_i) THEN
2490 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i), proc_send, tag)
2491 END IF
2492 ! occupied j
2493 IF (do_send_j) THEN
2494 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_j), proc_send, tag)
2495 END IF
2496 ! occupied k
2497 CALL comm_exchange%send(bib_c(kspin)%array(:, :, send_k:send_k + block_size - 1), proc_send, tag)
2498 END IF
2499
2500 END DO ! proc loop
2501 CALL timestop(handle2)
2502 END IF
2503 END DO ! ijk_index loop
2504 DEALLOCATE (local_al_i)
2505 DEALLOCATE (local_al_j)
2506 DEALLOCATE (local_al_k)
2507 DEALLOCATE (t_ab)
2508 DEALLOCATE (ijk_map)
2509 END DO ! over number of loops (ispin)
2510 CALL timestop(handle)
2511
2512 END SUBROUTINE quasi_degenerate_p_ij
2513
2514! **************************************************************************************************
2515!> \brief ...
2516!> \param my_ijk ...
2517!> \param homo ...
2518!> \param homo_beta ...
2519!> \param Eigenval ...
2520!> \param mp2_env ...
2521!> \param ijk_map ...
2522!> \param unit_nr ...
2523!> \param ngroup ...
2524!> \param do_print_alpha ...
2525!> \param comm_exchange ...
2526!> \param num_ijk ...
2527!> \param max_ijk ...
2528!> \param color_sub ...
2529!> \param buffer_size ...
2530!> \param my_group_L_size ...
2531!> \param B_size_k ...
2532!> \param para_env ...
2533!> \param virtual ...
2534!> \param B_size_i ...
2535! **************************************************************************************************
2536 SUBROUTINE find_quasi_degenerate_ij(my_ijk, homo, homo_beta, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, &
2537 do_print_alpha, comm_exchange, num_ijk, max_ijk, color_sub, &
2538 buffer_size, my_group_L_size, B_size_k, para_env, virtual, B_size_i)
2539
2540 INTEGER, INTENT(OUT) :: my_ijk
2541 INTEGER, INTENT(IN) :: homo, homo_beta
2542 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
2543 TYPE(mp2_type), INTENT(IN) :: mp2_env
2544 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: ijk_map
2545 INTEGER, INTENT(IN) :: unit_nr, ngroup
2546 LOGICAL, INTENT(IN) :: do_print_alpha
2547 TYPE(mp_comm_type), INTENT(IN) :: comm_exchange
2548 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: num_ijk
2549 INTEGER, INTENT(OUT) :: max_ijk
2550 INTEGER, INTENT(IN) :: color_sub, buffer_size, my_group_l_size, &
2551 b_size_k
2552 TYPE(mp_para_env_type), INTENT(IN) :: para_env
2553 INTEGER, INTENT(IN) :: virtual, b_size_i
2554
2555 INTEGER :: block_size, communication_steps, communication_volume, iib, ij_counter, &
2556 ijk_counter, jjb, kkb, max_block_size, max_num_k_blocks, min_communication_volume, &
2557 my_steps, num_k_blocks, num_sing_ij, total_ijk
2558 INTEGER(KIND=int_8) :: mem
2559 LOGICAL, ALLOCATABLE, DIMENSION(:, :) :: ijk_marker
2560
2561 ALLOCATE (num_ijk(0:comm_exchange%num_pe - 1))
2562
2563 num_sing_ij = 0
2564 DO iib = 1, homo
2565 ! diagonal elements already updated
2566 DO jjb = iib + 1, homo
2567 IF (abs(eigenval(jjb) - eigenval(iib)) < mp2_env%ri_grad%eps_canonical) &
2568 num_sing_ij = num_sing_ij + 1
2569 END DO
2570 END DO
2571
2572 IF (unit_nr > 0) THEN
2573 IF (do_print_alpha) THEN
2574 WRITE (unit=unit_nr, fmt="(T3,A,T75,i6)") &
2575 "MO_INFO| Number of ij pairs below EPS_CANONICAL:", num_sing_ij
2576 ELSE
2577 WRITE (unit=unit_nr, fmt="(T3,A,T75,i6)") &
2578 "MO_INFO| Number of ij pairs (spin beta) below EPS_CANONICAL:", num_sing_ij
2579 END IF
2580 END IF
2581
2582 ! Determine the block size, first guess: use available buffer
2583 max_block_size = buffer_size/(my_group_l_size*b_size_k)
2584
2585 ! Second limit: memory
2586 CALL m_memory(mem)
2587 ! Convert to number of doubles
2588 mem = mem/8
2589 ! Remove local_ab (2x) and local_aL_i (2x)
2590 mem = mem - 2_int_8*(virtual*b_size_k + b_size_i*my_group_l_size)
2591 max_block_size = min(max_block_size, max(1, int(mem/(my_group_l_size*b_size_k), kind(max_block_size))))
2592
2593 ! Exchange the limit
2594 CALL para_env%min(max_block_size)
2595
2596 ! Find now the block size which minimizes the communication volume and then the number of communication steps
2597 block_size = 1
2598 min_communication_volume = 3*homo_beta*num_sing_ij
2599 communication_steps = 3*homo_beta*num_sing_ij
2600 DO iib = max_block_size, 2, -1
2601 max_num_k_blocks = homo_beta/iib*num_sing_ij
2602 num_k_blocks = max_num_k_blocks - mod(max_num_k_blocks, ngroup)
2603 communication_volume = num_k_blocks*(2 + iib) + 3*(homo_beta*num_sing_ij - iib*num_k_blocks)
2604 my_steps = num_k_blocks + homo_beta*num_sing_ij - iib*num_k_blocks
2605 IF (communication_volume < min_communication_volume) THEN
2606 block_size = iib
2607 min_communication_volume = communication_volume
2608 communication_steps = my_steps
2609 ELSE IF (communication_volume == min_communication_volume .AND. my_steps < communication_steps) THEN
2610 block_size = iib
2611 communication_steps = my_steps
2612 END IF
2613 END DO
2614
2615 IF (unit_nr > 0) THEN
2616 WRITE (unit=unit_nr, fmt="(T3,A,T75,i6)") &
2617 "MO_INFO| Block size:", block_size
2618 CALL m_flush(unit_nr)
2619 END IF
2620
2621 ! Calculate number of large blocks
2622 max_num_k_blocks = homo_beta/block_size*num_sing_ij
2623 num_k_blocks = max_num_k_blocks - mod(max_num_k_blocks, ngroup)
2624
2625 total_ijk = num_k_blocks + homo_beta*num_sing_ij - num_k_blocks*block_size
2626 ALLOCATE (ijk_map(4, total_ijk))
2627 ijk_map = 0
2628 ALLOCATE (ijk_marker(homo_beta, num_sing_ij))
2629 ijk_marker = .true.
2630
2631 my_ijk = 0
2632 ijk_counter = 0
2633 ij_counter = 0
2634 DO iib = 1, homo
2635 ! diagonal elements already updated
2636 DO jjb = iib + 1, homo
2637 IF (abs(eigenval(jjb) - eigenval(iib)) >= mp2_env%ri_grad%eps_canonical) cycle
2638 ij_counter = ij_counter + 1
2639 DO kkb = 1, homo_beta - mod(homo_beta, block_size), block_size
2640 IF (ijk_counter + 1 > num_k_blocks) EXIT
2641 ijk_counter = ijk_counter + 1
2642 ijk_marker(kkb:kkb + block_size - 1, ij_counter) = .false.
2643 ijk_map(1, ijk_counter) = iib
2644 ijk_map(2, ijk_counter) = jjb
2645 ijk_map(3, ijk_counter) = kkb
2646 ijk_map(4, ijk_counter) = block_size
2647 IF (mod(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1
2648 END DO
2649 END DO
2650 END DO
2651 ij_counter = 0
2652 DO iib = 1, homo
2653 ! diagonal elements already updated
2654 DO jjb = iib + 1, homo
2655 IF (abs(eigenval(jjb) - eigenval(iib)) >= mp2_env%ri_grad%eps_canonical) cycle
2656 ij_counter = ij_counter + 1
2657 DO kkb = 1, homo_beta
2658 IF (ijk_marker(kkb, ij_counter)) THEN
2659 ijk_counter = ijk_counter + 1
2660 ijk_map(1, ijk_counter) = iib
2661 ijk_map(2, ijk_counter) = jjb
2662 ijk_map(3, ijk_counter) = kkb
2663 ijk_map(4, ijk_counter) = 1
2664 IF (mod(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1
2665 END IF
2666 END DO
2667 END DO
2668 END DO
2669
2670 DEALLOCATE (ijk_marker)
2671
2672 CALL comm_exchange%allgather(my_ijk, num_ijk)
2673 max_ijk = maxval(num_ijk)
2674
2675 END SUBROUTINE find_quasi_degenerate_ij
2676
2677END MODULE mp2_ri_gpw
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various routines to log and control the output. The idea is that decisions about where to log should ...
Counters to determine the performance of parallel DGEMMs.
elemental subroutine, public dgemm_counter_init(dgemm_counter, unit_nr, print_info)
Initialize a dgemm_counter.
subroutine, public dgemm_counter_write(dgemm_counter, para_env)
calculate and print flop rates
subroutine, public dgemm_counter_start(dgemm_counter)
start timer of the counter
subroutine, public dgemm_counter_stop(dgemm_counter, size1, size2, size3)
stop timer of the counter and provide matrix sizes
Types to describe group distributions.
elemental integer function, public maxsize(this)
...
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
subroutine, public local_gemm(opa, opb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, ctx)
...
integer, parameter, public local_gemm_pu_gpu
subroutine, public local_gemm_set_op_threshold_gpu(ctx, opthresholdgpu)
...
subroutine, public local_gemm_destroy(ctx)
release resources associated to a gemm context
subroutine, public local_gemm_create(ctx, pu)
create a context for handling gemm offloading
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:332
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:106
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:123
Interface to the message passing library MPI.
Routines to calculate RI-GPW-MP2 energy using pw.
Definition mp2_ri_gpw.F:14
subroutine, public mp2_ri_gpw_compute_en(emp2_cou, emp2_ex, emp2_s, emp2_t, bib_c, mp2_env, para_env, para_env_sub, color_sub, gd_array, gd_b_virtual, eigenval, nmo, homo, dimen_ri, unit_nr, calc_forces, calc_ex)
...
Definition mp2_ri_gpw.F:81
Routines for calculating RI-MP2 gradients.
subroutine, public complete_gamma(mp2_env, b_ia_q, dimen_ri, homo, virtual, para_env, para_env_sub, ngroup, my_group_l_size, my_group_l_start, my_group_l_end, my_b_size, my_b_virtual_start, gd_array, gd_b_virtual, kspin)
complete the calculation of the Gamma matrices
Types needed for MP2 calculations.
Definition mp2_types.F:14
stores all the informations relevant to an mpi environment