39#include "./base/base_uses.f90"
45 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'mp2_ri_gpw'
72 SUBROUTINE mp2_ri_gpw_compute_en(Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T, BIb_C, mp2_env, para_env, para_env_sub, color_sub, &
73 gd_array, gd_B_virtual, &
74 Eigenval, nmo, homo, dimen_RI, unit_nr, calc_forces, calc_ex)
75 REAL(kind=
dp),
INTENT(INOUT) :: emp2_cou, emp2_ex, emp2_s, emp2_t
77 INTENT(INOUT) :: bib_c
80 INTEGER,
INTENT(IN) :: color_sub
82 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
83 INTEGER,
INTENT(IN) :: nmo
84 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
86 INTENT(INOUT) :: gd_b_virtual
87 INTEGER,
INTENT(IN) :: dimen_ri, unit_nr
88 LOGICAL,
INTENT(IN) :: calc_forces, calc_ex
90 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_gpw_compute_en'
92 INTEGER :: a, a_global, b, b_global, block_size, decil, end_point, handle, handle2, handle3, &
93 iib, ij_counter, ij_counter_send, ij_index, integ_group_size, ispin, jjb, jspin, &
94 max_ij_pairs, my_block_size, my_group_l_end, my_group_l_size, my_group_l_size_orig, &
95 my_group_l_start, my_i, my_ij_pairs, my_j, my_new_group_l_size, ngroup, nspins, &
96 num_integ_group, proc_receive, proc_send, proc_shift, rec_b_size, rec_b_virtual_end, &
97 rec_b_virtual_start, rec_l_size, send_b_size, send_b_virtual_end, send_b_virtual_start, &
98 send_i, send_ij_index, send_j, start_point, tag, total_ij_pairs
99 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: integ_group_pos2color_sub, my_b_size, &
100 my_b_virtual_end, my_b_virtual_start, num_ij_pairs, sizes_array_orig, virtual
101 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ij_map
102 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: ranges_info_array
103 LOGICAL :: my_alpha_beta_case, my_beta_beta_case, &
105 REAL(kind=
dp) :: amp_fac, my_emp2_cou, my_emp2_ex, &
106 sym_fac, t_new, t_start
107 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: buffer_1d
108 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :), &
109 TARGET :: local_ab, local_ba, t_ab
110 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
111 TARGET :: local_i_al, local_j_al, y_i_ap, y_j_ap
112 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :), &
113 POINTER :: external_ab, external_i_al
114 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
119 DIMENSION(:) :: b_ia_q
121 CALL timeset(routinen, handle)
125 ALLOCATE (virtual(nspins))
126 virtual(:) = nmo - homo(:)
128 ALLOCATE (my_b_size(nspins), my_b_virtual_start(nspins), my_b_virtual_end(nspins))
131 my_b_virtual_start(ispin), my_b_virtual_end(ispin), my_b_size(ispin))
134 CALL get_group_dist(gd_array, color_sub, my_group_l_start, my_group_l_end, my_group_l_size)
141 CALL mp2_env%local_gemm_ctx%set_op_threshold_gpu(128*128*128*2)
143 CALL mp2_ri_get_integ_group_size( &
144 mp2_env, para_env, para_env_sub, gd_array, gd_b_virtual, &
145 homo, dimen_ri, unit_nr, &
146 integ_group_size, ngroup, &
147 num_integ_group, virtual, calc_forces)
151 CALL mp2_ri_create_group( &
152 para_env, para_env_sub, color_sub, &
153 gd_array%sizes, calc_forces, &
154 integ_group_size, my_group_l_end, &
155 my_group_l_size, my_group_l_size_orig, my_group_l_start, my_new_group_l_size, &
156 integ_group_pos2color_sub, sizes_array_orig, &
157 ranges_info_array, comm_exchange, comm_rep, num_integ_group)
164 CALL replicate_iak_2intgroup(bib_c(jspin)%array, comm_exchange, comm_rep, &
165 homo(jspin), gd_array%sizes, my_b_size(jspin), &
166 my_group_l_size, ranges_info_array)
170 IF (unit_nr > 0)
THEN
171 IF (nspins == 1)
THEN
172 WRITE (unit_nr, *)
"Start loop run"
173 ELSE IF (ispin == 1 .AND. jspin == 1)
THEN
174 WRITE (unit_nr, *)
"Start loop run alpha-alpha"
175 ELSE IF (ispin == 1 .AND. jspin == 2)
THEN
176 WRITE (unit_nr, *)
"Start loop run alpha-beta"
177 ELSE IF (ispin == 2 .AND. jspin == 2)
THEN
178 WRITE (unit_nr, *)
"Start loop run beta-beta"
183 my_open_shell_ss = (nspins == 2) .AND. (ispin == jspin)
189 my_beta_beta_case = .false.
190 my_alpha_beta_case = .false.
191 IF (ispin /= jspin)
THEN
192 my_alpha_beta_case = .true.
193 ELSE IF (my_open_shell_ss)
THEN
194 IF (ispin == 2) my_beta_beta_case = .true.
197 amp_fac = mp2_env%scale_S + mp2_env%scale_T
198 IF (my_alpha_beta_case .OR. my_open_shell_ss) amp_fac = mp2_env%scale_T
200 CALL mp2_ri_allocate_no_blk(local_ab, t_ab, mp2_env, homo, virtual, my_b_size, &
201 my_group_l_size, calc_forces, ispin, jspin, local_ba)
203 CALL mp2_ri_get_block_size( &
204 mp2_env, para_env, para_env_sub, gd_array, gd_b_virtual(ispin:jspin), &
205 homo(ispin:jspin), virtual(ispin:jspin), dimen_ri, unit_nr, block_size, &
206 ngroup, num_integ_group, my_open_shell_ss, calc_forces, buffer_1d)
216 CALL mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo(ispin), homo(jspin), &
217 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_ss, unit_nr)
219 ALLOCATE (num_ij_pairs(0:comm_exchange%num_pe - 1))
220 CALL comm_exchange%allgather(my_ij_pairs, num_ij_pairs)
222 max_ij_pairs = maxval(num_ij_pairs)
225 CALL mp2_ri_allocate_blk(dimen_ri, my_b_size, block_size, local_i_al, &
226 local_j_al, calc_forces, y_i_ap, y_j_ap, ispin, jspin)
228 CALL timeset(routinen//
"_RI_loop", handle2)
232 DO ij_index = 1, max_ij_pairs
235 IF (unit_nr > 0 .AND. ij_index > 1)
THEN
236 decil = ij_index*10/max_ij_pairs
237 IF (decil /= (ij_index - 1)*10/max_ij_pairs)
THEN
239 t_new = (t_new - t_start)/60.0_dp*(max_ij_pairs - ij_index + 1)/(ij_index - 1)
240 WRITE (unit_nr, fmt=
"(T3,A)")
"Percentage of finished loop: "// &
246 IF (calc_forces)
THEN
251 IF (ij_index <= my_ij_pairs)
THEN
253 ij_counter = (ij_index - min(1, color_sub))*ngroup + color_sub
254 my_i = ij_map(1, ij_counter)
255 my_j = ij_map(2, ij_counter)
256 my_block_size = ij_map(3, ij_counter)
259 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, comm_exchange%mepos), &
260 bib_c(ispin)%array(:, :, my_i:my_i + my_block_size - 1))
263 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, comm_exchange%mepos), &
264 bib_c(jspin)%array(:, :, my_j:my_j + my_block_size - 1))
267 CALL timeset(routinen//
"_comm", handle3)
268 DO proc_shift = 1, comm_exchange%num_pe - 1
269 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
270 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
272 send_ij_index = num_ij_pairs(proc_send)
276 IF (ij_index <= send_ij_index)
THEN
277 ij_counter_send = (ij_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
278 integ_group_pos2color_sub(proc_send)
279 send_i = ij_map(1, ij_counter_send)
280 send_j = ij_map(2, ij_counter_send)
283 bi_c_rec(1:rec_l_size, 1:my_b_size(ispin), 1:my_block_size) => &
284 buffer_1d(1:rec_l_size*my_b_size(ispin)*my_block_size)
286 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_i:send_i + my_block_size - 1), &
287 proc_send, bi_c_rec, proc_receive, tag)
289 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
290 bi_c_rec(:, 1:my_b_size(ispin), :))
293 bi_c_rec(1:rec_l_size, 1:my_b_size(jspin), 1:my_block_size) => &
294 buffer_1d(1:int(rec_l_size,
int_8)*my_b_size(jspin)*my_block_size)
296 CALL comm_exchange%sendrecv(bib_c(jspin)%array(:, :, send_j:send_j + my_block_size - 1), &
297 proc_send, bi_c_rec, proc_receive, tag)
299 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
300 bi_c_rec(:, 1:my_b_size(jspin), :))
306 bi_c_rec(1:rec_l_size, 1:my_b_size(ispin), 1:my_block_size) => &
307 buffer_1d(1:int(rec_l_size,
int_8)*my_b_size(ispin)*my_block_size)
309 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
311 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
312 bi_c_rec(:, 1:my_b_size(ispin), 1:my_block_size))
315 bi_c_rec(1:rec_l_size, 1:my_b_size(jspin), 1:my_block_size) => &
316 buffer_1d(1:int(rec_l_size,
int_8)*my_b_size(jspin)*my_block_size)
318 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
320 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
321 bi_c_rec(:, 1:my_b_size(jspin), 1:my_block_size))
327 CALL timestop(handle3)
330 DO iib = 1, my_block_size
331 DO jjb = 1, my_block_size
332 CALL timeset(routinen//
"_expansion", handle3)
333 associate(my_local_i_al => local_i_al(:, :, iib), my_local_j_al => local_j_al(:, :, jjb))
337 IF ((my_alpha_beta_case) .AND. (calc_forces))
THEN
341 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), my_b_size(jspin), dimen_ri, 1.0_dp, &
342 my_local_i_al, dimen_ri, my_local_j_al, dimen_ri, &
343 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), &
346 IF (my_alpha_beta_case .AND. calc_forces)
THEN
347 local_ba(my_b_virtual_start(jspin):my_b_virtual_end(jspin), :) = &
348 transpose(local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :))
351 DO proc_shift = 1, para_env_sub%num_pe - 1
352 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
353 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
355 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, &
356 rec_b_virtual_end, rec_b_size)
358 external_i_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri,
int_8)*rec_b_size)
359 external_i_al = 0.0_dp
361 CALL para_env_sub%sendrecv(my_local_i_al, proc_send, &
362 external_i_al, proc_receive, tag)
364 CALL mp2_env%local_gemm_ctx%gemm( &
365 'T',
'N', rec_b_size, my_b_size(jspin), dimen_ri, 1.0_dp, &
366 external_i_al, dimen_ri, my_local_j_al, dimen_ri, &
367 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:my_b_size(jspin)), rec_b_size)
370 IF (my_alpha_beta_case .AND. calc_forces)
THEN
372 CALL get_group_dist(gd_b_virtual(jspin), proc_receive, rec_b_virtual_start, &
373 rec_b_virtual_end, rec_b_size)
375 external_i_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri,
int_8)*rec_b_size)
376 external_i_al = 0.0_dp
378 CALL para_env_sub%sendrecv(my_local_j_al, proc_send, &
379 external_i_al, proc_receive, tag)
381 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', rec_b_size, my_b_size(ispin), dimen_ri, 1.0_dp, &
382 external_i_al, dimen_ri, my_local_i_al, dimen_ri, &
383 0.0_dp, local_ba(rec_b_virtual_start:rec_b_virtual_end, 1:my_b_size(ispin)), rec_b_size)
386 IF (my_alpha_beta_case .AND. calc_forces)
THEN
388 CALL dgemm_counter_stop(dgemm_counter, virtual(ispin), my_b_size(ispin) + my_b_size(jspin), dimen_ri)
392 CALL timestop(handle3)
397 CALL timeset(routinen//
"_ener", handle3)
400 IF (my_i == my_j) sym_fac = 1.0_dp
401 IF (my_alpha_beta_case) sym_fac = 0.5_dp
402 DO b = 1, my_b_size(jspin)
403 b_global = b + my_b_virtual_start(jspin) - 1
404 DO a = 1, virtual(ispin)
405 my_emp2_cou = my_emp2_cou - sym_fac*2.0_dp*local_ab(a, b)**2/ &
406 (eigenval(homo(ispin) + a, ispin) + eigenval(homo(jspin) + b_global, jspin) - &
407 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
414 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) t_ab = 0.0_dp
415 DO b = 1, my_b_size(ispin)
416 b_global = b + my_b_virtual_start(ispin) - 1
417 DO a = 1, my_b_size(ispin)
418 a_global = a + my_b_virtual_start(ispin) - 1
419 my_emp2_ex = my_emp2_ex + sym_fac*local_ab(a_global, b)*local_ab(b_global, a)/ &
420 (eigenval(homo(ispin) + a_global, ispin) + eigenval(homo(ispin) + b_global, ispin) - &
421 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
422 IF (calc_forces .AND. (.NOT. my_alpha_beta_case))
THEN
423 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - mp2_env%scale_T*local_ab(b_global, a))/ &
424 (eigenval(homo(ispin) + a_global, ispin) + &
425 eigenval(homo(ispin) + b_global, ispin) - &
426 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
431 DO proc_shift = 1, para_env_sub%num_pe - 1
432 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
433 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
436 rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
438 send_b_virtual_start, send_b_virtual_end, send_b_size)
440 external_ab(1:my_b_size(ispin), 1:rec_b_size) => &
441 buffer_1d(1:int(rec_b_size,
int_8)*my_b_size(ispin))
444 CALL para_env_sub%sendrecv(local_ab(send_b_virtual_start:send_b_virtual_end, 1:my_b_size(ispin)), proc_send, &
445 external_ab(1:my_b_size(ispin), 1:rec_b_size), proc_receive, tag)
447 DO b = 1, my_b_size(ispin)
448 b_global = b + my_b_virtual_start(ispin) - 1
450 a_global = a + rec_b_virtual_start - 1
451 my_emp2_ex = my_emp2_ex + sym_fac*local_ab(a_global, b)*external_ab(b, a)/ &
452 (eigenval(homo(ispin) + a_global, ispin) + eigenval(homo(ispin) + b_global, ispin) - &
453 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
454 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) &
455 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - mp2_env%scale_T*external_ab(b, a))/ &
456 (eigenval(homo(ispin) + a_global, ispin) + &
457 eigenval(homo(ispin) + b_global, ispin) - &
458 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
463 CALL timestop(handle3)
465 IF (calc_forces)
THEN
467 CALL mp2_update_p_gamma(mp2_env, para_env_sub, gd_b_virtual, &
468 eigenval, homo, dimen_ri, iib, jjb, my_b_size, &
469 my_b_virtual_end, my_b_virtual_start, my_i, my_j, virtual, &
470 local_ab, t_ab, my_local_i_al, my_local_j_al, &
471 my_open_shell_ss, y_i_ap(:, :, iib), y_j_ap(:, :, jjb), local_ba, &
472 ispin, jspin, dgemm_counter, buffer_1d)
485 CALL timeset(routinen//
"_comm", handle3)
488 DO proc_shift = 1, comm_exchange%num_pe - 1
489 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
490 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
492 send_ij_index = num_ij_pairs(proc_send)
494 IF (ij_index <= send_ij_index)
THEN
496 ij_counter_send = (ij_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
497 integ_group_pos2color_sub(proc_send)
498 send_i = ij_map(1, ij_counter_send)
499 send_j = ij_map(2, ij_counter_send)
502 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i:send_i + my_block_size - 1), &
505 CALL comm_exchange%send(bib_c(jspin)%array(:, :, send_j:send_j + my_block_size - 1), &
509 CALL timestop(handle3)
513 IF (calc_forces)
THEN
514 CALL mp2_redistribute_gamma(mp2_env%ri_grad%Gamma_P_ia(ispin)%array, ij_index, my_b_size(ispin), &
515 my_block_size, my_group_l_size, my_i, my_ij_pairs, ngroup, &
516 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
517 ij_map, ranges_info_array, y_i_ap(:, :, 1:my_block_size), comm_exchange, &
518 gd_array%sizes, 1, buffer_1d)
519 CALL mp2_redistribute_gamma(mp2_env%ri_grad%Gamma_P_ia(jspin)%array, ij_index, my_b_size(jspin), &
520 my_block_size, my_group_l_size, my_j, my_ij_pairs, ngroup, &
521 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
522 ij_map, ranges_info_array, y_j_ap(:, :, 1:my_block_size), comm_exchange, &
523 gd_array%sizes, 2, buffer_1d)
527 CALL timestop(handle2)
529 DEALLOCATE (local_i_al)
530 DEALLOCATE (local_j_al)
532 DEALLOCATE (num_ij_pairs)
533 DEALLOCATE (local_ab)
535 IF (calc_forces)
THEN
538 IF (
ALLOCATED(t_ab))
THEN
541 DEALLOCATE (local_ba)
550 CALL quasi_degenerate_p_ij( &
551 mp2_env, eigenval(:, ispin:jspin), homo(ispin:jspin), virtual(ispin:jspin), my_open_shell_ss, &
552 my_beta_beta_case, bib_c(ispin:jspin), unit_nr, dimen_ri, &
553 my_b_size(ispin:jspin), ngroup, my_group_l_size, &
554 color_sub, ranges_info_array, comm_exchange, para_env_sub, para_env, &
555 my_b_virtual_start(ispin:jspin), my_b_virtual_end(ispin:jspin), gd_array%sizes, gd_b_virtual(ispin:jspin), &
556 integ_group_pos2color_sub, dgemm_counter, buffer_1d)
560 DEALLOCATE (buffer_1d)
565 IF (calc_forces .AND. jspin == nspins)
THEN
566 IF (.NOT.
ALLOCATED(b_ia_q))
ALLOCATE (b_ia_q(nspins))
567 ALLOCATE (b_ia_q(ispin)%array(homo(ispin), my_b_size(ispin), my_group_l_size_orig))
568 b_ia_q(ispin)%array = 0.0_dp
569 DO jjb = 1, homo(ispin)
570 DO iib = 1, my_b_size(ispin)
571 b_ia_q(ispin)%array(jjb, iib, 1:my_group_l_size_orig) = &
572 bib_c(ispin)%array(1:my_group_l_size_orig, iib, jjb)
575 DEALLOCATE (bib_c(ispin)%array)
578 ALLOCATE (bib_c(ispin)%array(my_b_size(ispin), homo(ispin), my_group_l_size_orig))
579 DO proc_shift = 1, comm_rep%num_pe - 1
581 proc_send =
modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
582 proc_receive =
modulo(comm_rep%mepos + proc_shift, comm_rep%num_pe)
584 start_point = ranges_info_array(3, proc_shift, comm_exchange%mepos)
585 end_point = ranges_info_array(4, proc_shift, comm_exchange%mepos)
587 CALL comm_rep%sendrecv(mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, start_point:end_point), &
588 proc_send, bib_c(ispin)%array, proc_receive, tag)
591 mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig) = &
592 mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig) &
593 + bib_c(ispin)%array(:, :, :)
597 bib_c(ispin)%array(:, :, :) = mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig)
598 DEALLOCATE (mp2_env%ri_grad%Gamma_P_ia(ispin)%array)
599 CALL move_alloc(bib_c(ispin)%array, mp2_env%ri_grad%Gamma_P_ia(ispin)%array)
600 ELSE IF (jspin == nspins)
THEN
601 DEALLOCATE (bib_c(ispin)%array)
604 CALL para_env%sum(my_emp2_cou)
605 CALL para_env%sum(my_emp2_ex)
607 IF (my_open_shell_ss .OR. my_alpha_beta_case)
THEN
608 IF (my_alpha_beta_case)
THEN
609 emp2_s = emp2_s + my_emp2_cou
610 emp2_cou = emp2_cou + my_emp2_cou
612 my_emp2_cou = my_emp2_cou*0.25_dp
613 my_emp2_ex = my_emp2_ex*0.5_dp
614 emp2_t = emp2_t + my_emp2_cou + my_emp2_ex
615 emp2_cou = emp2_cou + my_emp2_cou
616 emp2_ex = emp2_ex + my_emp2_ex
619 emp2_cou = emp2_cou + my_emp2_cou
620 emp2_ex = emp2_ex + my_emp2_ex
626 DEALLOCATE (integ_group_pos2color_sub)
627 DEALLOCATE (ranges_info_array)
629 CALL comm_exchange%free()
632 IF (calc_forces)
THEN
634 DEALLOCATE (gd_array%sizes)
635 iib =
SIZE(sizes_array_orig)
636 ALLOCATE (gd_array%sizes(0:iib - 1))
637 gd_array%sizes(:) = sizes_array_orig
638 DEALLOCATE (sizes_array_orig)
641 my_group_l_size = my_group_l_size_orig
645 CALL complete_gamma(mp2_env, b_ia_q(ispin)%array, dimen_ri, homo(ispin), &
646 virtual(ispin), para_env, para_env_sub, ngroup, &
647 my_group_l_size, my_group_l_start, my_group_l_end, &
648 my_b_size(ispin), my_b_virtual_start(ispin), &
649 gd_array, gd_b_virtual(ispin), &
654 IF (nspins == 1) mp2_env%ri_grad%P_ab(1)%array(:, :) = mp2_env%ri_grad%P_ab(1)%array(:, :)*2.0_dp
657 CALL comm%from_split(para_env, para_env_sub%mepos)
660 CALL comm%sum(mp2_env%ri_grad%P_ab(ispin)%array)
662 CALL para_env%sum(mp2_env%ri_grad%P_ij(ispin)%array)
668 CALL release_group_dist(gd_array)
670 IF (
ALLOCATED(bib_c(ispin)%array))
DEALLOCATE (bib_c(ispin)%array)
671 CALL release_group_dist(gd_b_virtual(ispin))
675 IF (calc_forces)
DEALLOCATE (mp2_env%ri_grad%PQ_half)
676 IF (calc_forces .AND. .NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) &
677 DEALLOCATE (mp2_env%ri_grad%operator_half)
679 CALL dgemm_counter_write(dgemm_counter, para_env)
682 CALL mp2_env%local_gemm_ctx%destroy()
683 CALL timestop(handle)
693 SUBROUTINE fill_local_i_al(local_i_aL, ranges_info_array, BIb_C_rec)
694 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: local_i_al
695 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: ranges_info_array
696 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(IN) :: bib_c_rec
698 CHARACTER(LEN=*),
PARAMETER :: routinen =
'fill_local_i_aL'
700 INTEGER :: end_point, handle, irep, lend_pos, &
701 lstart_pos, start_point
703 CALL timeset(routinen, handle)
705 DO irep = 1,
SIZE(ranges_info_array, 2)
706 lstart_pos = ranges_info_array(1, irep)
707 lend_pos = ranges_info_array(2, irep)
708 start_point = ranges_info_array(3, irep)
709 end_point = ranges_info_array(4, irep)
713 local_i_al(lstart_pos:lend_pos, :, :) = bib_c_rec(start_point:end_point, :, :)
717 CALL timestop(handle)
719 END SUBROUTINE fill_local_i_al
727 SUBROUTINE fill_local_i_al_2d(local_i_aL, ranges_info_array, BIb_C_rec)
728 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: local_i_al
729 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: ranges_info_array
730 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: bib_c_rec
732 CHARACTER(LEN=*),
PARAMETER :: routinen =
'fill_local_i_aL_2D'
734 INTEGER :: end_point, handle, irep, lend_pos, &
735 lstart_pos, start_point
737 CALL timeset(routinen, handle)
739 DO irep = 1,
SIZE(ranges_info_array, 2)
740 lstart_pos = ranges_info_array(1, irep)
741 lend_pos = ranges_info_array(2, irep)
742 start_point = ranges_info_array(3, irep)
743 end_point = ranges_info_array(4, irep)
747 local_i_al(lstart_pos:lend_pos, :) = bib_c_rec(start_point:end_point, :)
751 CALL timestop(handle)
753 END SUBROUTINE fill_local_i_al_2d
766 SUBROUTINE replicate_iak_2intgroup(BIb_C, comm_exchange, comm_rep, homo, sizes_array, my_B_size, &
767 my_group_L_size, ranges_info_array)
768 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
769 INTENT(INOUT) :: bib_c
770 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange, comm_rep
771 INTEGER,
INTENT(IN) :: homo
772 INTEGER,
DIMENSION(:),
INTENT(IN) :: sizes_array
773 INTEGER,
INTENT(IN) :: my_b_size, my_group_l_size
774 INTEGER,
DIMENSION(:, 0:, 0:),
INTENT(IN) :: ranges_info_array
776 CHARACTER(LEN=*),
PARAMETER :: routinen =
'replicate_iaK_2intgroup'
778 INTEGER :: end_point, handle, max_l_size, &
779 proc_receive, proc_shift, start_point
780 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: bib_c_copy
781 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :, :) :: bib_c_gather
783 CALL timeset(routinen, handle)
787 max_l_size = maxval(sizes_array)
789 ALLOCATE (bib_c_copy(max_l_size, my_b_size, homo))
791 bib_c_copy(1:
SIZE(bib_c, 1), 1:my_b_size, 1:homo) = bib_c
795 ALLOCATE (bib_c_gather(max_l_size, my_b_size, homo, 0:comm_rep%num_pe - 1))
796 bib_c_gather = 0.0_dp
798 CALL comm_rep%allgather(bib_c_copy, bib_c_gather)
800 DEALLOCATE (bib_c_copy)
802 ALLOCATE (bib_c(my_group_l_size, my_b_size, homo))
806 DO proc_shift = 0, comm_rep%num_pe - 1
807 proc_receive =
modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
809 start_point = ranges_info_array(3, proc_shift, comm_exchange%mepos)
810 end_point = ranges_info_array(4, proc_shift, comm_exchange%mepos)
812 bib_c(start_point:end_point, 1:my_b_size, 1:homo) = &
813 bib_c_gather(1:end_point - start_point + 1, 1:my_b_size, 1:homo, proc_receive)
817 DEALLOCATE (bib_c_gather)
819 CALL timestop(handle)
821 END SUBROUTINE replicate_iak_2intgroup
837 SUBROUTINE mp2_ri_allocate_no_blk(local_ab, t_ab, mp2_env, homo, virtual, my_B_size, &
838 my_group_L_size, calc_forces, ispin, jspin, local_ba)
839 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
840 INTENT(OUT) :: local_ab, t_ab
841 TYPE(mp2_type) :: mp2_env
842 INTEGER,
INTENT(IN) :: homo(2), virtual(2), my_b_size(2), &
844 LOGICAL,
INTENT(IN) :: calc_forces
845 INTEGER,
INTENT(IN) :: ispin, jspin
846 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
847 INTENT(OUT) :: local_ba
849 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_allocate_no_blk'
853 CALL timeset(routinen, handle)
855 ALLOCATE (local_ab(virtual(ispin), my_b_size(jspin)))
858 IF (calc_forces)
THEN
859 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%P_ij(jspin)%array))
THEN
860 ALLOCATE (mp2_env%ri_grad%P_ij(jspin)%array(homo(ispin), homo(ispin)))
861 mp2_env%ri_grad%P_ij(jspin)%array = 0.0_dp
863 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%P_ab(jspin)%array))
THEN
864 ALLOCATE (mp2_env%ri_grad%P_ab(jspin)%array(my_b_size(jspin), virtual(jspin)))
865 mp2_env%ri_grad%P_ab(jspin)%array = 0.0_dp
867 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%Gamma_P_ia(jspin)%array))
THEN
868 ALLOCATE (mp2_env%ri_grad%Gamma_P_ia(jspin)%array(my_b_size(jspin), homo(jspin), my_group_l_size))
869 mp2_env%ri_grad%Gamma_P_ia(jspin)%array = 0.0_dp
872 IF (ispin == jspin)
THEN
874 ALLOCATE (t_ab(virtual(ispin), my_b_size(jspin)))
877 ALLOCATE (local_ba(1, 1))
880 ALLOCATE (local_ba(virtual(jspin), my_b_size(ispin)))
885 CALL timestop(handle)
887 END SUBROUTINE mp2_ri_allocate_no_blk
902 SUBROUTINE mp2_ri_allocate_blk(dimen_RI, my_B_size, block_size, &
903 local_i_aL, local_j_aL, calc_forces, &
904 Y_i_aP, Y_j_aP, ispin, jspin)
905 INTEGER,
INTENT(IN) :: dimen_ri, my_b_size(2), block_size
906 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
907 INTENT(OUT) :: local_i_al, local_j_al
908 LOGICAL,
INTENT(IN) :: calc_forces
909 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
910 INTENT(OUT) :: y_i_ap, y_j_ap
911 INTEGER,
INTENT(IN) :: ispin, jspin
913 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_allocate_blk'
917 CALL timeset(routinen, handle)
919 ALLOCATE (local_i_al(dimen_ri, my_b_size(ispin), block_size))
921 ALLOCATE (local_j_al(dimen_ri, my_b_size(jspin), block_size))
924 IF (calc_forces)
THEN
925 ALLOCATE (y_i_ap(my_b_size(ispin), dimen_ri, block_size))
929 ALLOCATE (y_j_ap(my_b_size(jspin), dimen_ri, block_size))
934 CALL timestop(handle)
936 END SUBROUTINE mp2_ri_allocate_blk
952 SUBROUTINE mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo, homo_beta, &
953 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_SS, unit_nr)
954 LOGICAL,
INTENT(IN) :: my_alpha_beta_case
955 INTEGER,
INTENT(OUT) :: total_ij_pairs
956 INTEGER,
INTENT(IN) :: homo, homo_beta, block_size, ngroup
957 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(OUT) :: ij_map
958 INTEGER,
INTENT(IN) :: color_sub
959 INTEGER,
INTENT(OUT) :: my_ij_pairs
960 LOGICAL,
INTENT(IN) :: my_open_shell_ss
961 INTEGER,
INTENT(IN) :: unit_nr
963 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_communication'
965 INTEGER :: assigned_blocks, first_i_block, first_j_block, handle, iib, ij_block_counter, &
966 ij_counter, jjb, last_i_block, last_j_block, num_block_per_group, num_ij_blocks, &
967 num_ij_blocks_beta, total_ij_block, total_ij_pairs_blocks
968 LOGICAL,
ALLOCATABLE,
DIMENSION(:, :) :: ij_marker
973 CALL timeset(routinen, handle)
975 IF (.NOT. my_open_shell_ss .AND. .NOT. my_alpha_beta_case)
THEN
976 total_ij_pairs = homo*(1 + homo)/2
977 num_ij_blocks = homo/block_size - 1
980 last_i_block = block_size*(num_ij_blocks - 1)
982 first_j_block = block_size + 1
983 last_j_block = block_size*(num_ij_blocks + 1)
986 DO iib = first_i_block, last_i_block, block_size
987 DO jjb = iib + block_size, last_j_block, block_size
988 ij_block_counter = ij_block_counter + 1
992 total_ij_block = ij_block_counter
993 num_block_per_group = total_ij_block/ngroup
994 assigned_blocks = num_block_per_group*ngroup
996 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
998 ALLOCATE (ij_marker(homo, homo))
1000 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1004 DO iib = first_i_block, last_i_block, block_size
1005 DO jjb = iib + block_size, last_j_block, block_size
1006 IF (ij_counter + 1 > assigned_blocks)
EXIT
1007 ij_counter = ij_counter + 1
1008 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1009 ij_map(1, ij_counter) = iib
1010 ij_map(2, ij_counter) = jjb
1011 ij_map(3, ij_counter) = block_size
1012 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1017 IF (ij_marker(iib, jjb))
THEN
1018 ij_counter = ij_counter + 1
1019 ij_map(1, ij_counter) = iib
1020 ij_map(2, ij_counter) = jjb
1021 ij_map(3, ij_counter) = 1
1022 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1026 DEALLOCATE (ij_marker)
1028 ELSE IF (.NOT. my_alpha_beta_case)
THEN
1031 total_ij_pairs = homo*(homo - 1)/2
1032 num_ij_blocks = (homo - 1)/block_size - 1
1035 last_i_block = block_size*(num_ij_blocks - 1)
1038 first_j_block = block_size + 2
1039 last_j_block = block_size*(num_ij_blocks + 1) + 1
1041 ij_block_counter = 0
1042 DO iib = first_i_block, last_i_block, block_size
1043 DO jjb = iib + block_size + 1, last_j_block, block_size
1044 ij_block_counter = ij_block_counter + 1
1048 total_ij_block = ij_block_counter
1049 num_block_per_group = total_ij_block/ngroup
1050 assigned_blocks = num_block_per_group*ngroup
1052 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1054 ALLOCATE (ij_marker(homo, homo))
1056 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1060 DO iib = first_i_block, last_i_block, block_size
1061 DO jjb = iib + block_size + 1, last_j_block, block_size
1062 IF (ij_counter + 1 > assigned_blocks)
EXIT
1063 ij_counter = ij_counter + 1
1064 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1065 ij_map(1, ij_counter) = iib
1066 ij_map(2, ij_counter) = jjb
1067 ij_map(3, ij_counter) = block_size
1068 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1072 DO jjb = iib + 1, homo
1073 IF (ij_marker(iib, jjb))
THEN
1074 ij_counter = ij_counter + 1
1075 ij_map(1, ij_counter) = iib
1076 ij_map(2, ij_counter) = jjb
1077 ij_map(3, ij_counter) = 1
1078 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1082 DEALLOCATE (ij_marker)
1085 total_ij_pairs = homo*homo_beta
1086 num_ij_blocks = homo/block_size
1087 num_ij_blocks_beta = homo_beta/block_size
1090 last_i_block = block_size*(num_ij_blocks - 1)
1093 last_j_block = block_size*(num_ij_blocks_beta - 1)
1095 ij_block_counter = 0
1096 DO iib = first_i_block, last_i_block, block_size
1097 DO jjb = first_j_block, last_j_block, block_size
1098 ij_block_counter = ij_block_counter + 1
1102 total_ij_block = ij_block_counter
1103 num_block_per_group = total_ij_block/ngroup
1104 assigned_blocks = num_block_per_group*ngroup
1106 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1108 ALLOCATE (ij_marker(homo, homo_beta))
1110 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1114 DO iib = first_i_block, last_i_block, block_size
1115 DO jjb = first_j_block, last_j_block, block_size
1116 IF (ij_counter + 1 > assigned_blocks)
EXIT
1117 ij_counter = ij_counter + 1
1118 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1119 ij_map(1, ij_counter) = iib
1120 ij_map(2, ij_counter) = jjb
1121 ij_map(3, ij_counter) = block_size
1122 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1126 DO jjb = 1, homo_beta
1127 IF (ij_marker(iib, jjb))
THEN
1128 ij_counter = ij_counter + 1
1129 ij_map(1, ij_counter) = iib
1130 ij_map(2, ij_counter) = jjb
1131 ij_map(3, ij_counter) = 1
1132 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1136 DEALLOCATE (ij_marker)
1139 IF (unit_nr > 0)
THEN
1140 IF (block_size == 1)
THEN
1141 WRITE (unit=unit_nr, fmt=
"(T3,A,T66,F15.1)") &
1142 "RI_INFO| Percentage of ij pairs communicated with block size 1:", 100.0_dp
1144 WRITE (unit=unit_nr, fmt=
"(T3,A,T66,F15.1)") &
1145 "RI_INFO| Percentage of ij pairs communicated with block size 1:", &
1146 100.0_dp*real((total_ij_pairs - assigned_blocks*(block_size**2)), kind=dp)/real(total_ij_pairs, kind=dp)
1148 CALL m_flush(unit_nr)
1151 CALL timestop(handle)
1153 END SUBROUTINE mp2_ri_communication
1175 SUBROUTINE mp2_ri_create_group(para_env, para_env_sub, color_sub, &
1176 sizes_array, calc_forces, &
1177 integ_group_size, my_group_L_end, &
1178 my_group_L_size, my_group_L_size_orig, my_group_L_start, my_new_group_L_size, &
1179 integ_group_pos2color_sub, &
1180 sizes_array_orig, ranges_info_array, comm_exchange, comm_rep, num_integ_group)
1181 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1182 INTEGER,
INTENT(IN) :: color_sub
1183 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(INOUT) :: sizes_array
1184 LOGICAL,
INTENT(IN) :: calc_forces
1185 INTEGER,
INTENT(IN) :: integ_group_size, my_group_l_end
1186 INTEGER,
INTENT(INOUT) :: my_group_l_size
1187 INTEGER,
INTENT(OUT) :: my_group_l_size_orig
1188 INTEGER,
INTENT(IN) :: my_group_l_start
1189 INTEGER,
INTENT(INOUT) :: my_new_group_l_size
1190 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT) :: integ_group_pos2color_sub, &
1192 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :), &
1193 INTENT(OUT) :: ranges_info_array
1194 TYPE(mp_comm_type),
INTENT(OUT) :: comm_exchange, comm_rep
1195 INTEGER,
INTENT(IN) :: num_integ_group
1197 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_create_group'
1199 INTEGER :: handle, iib, proc_receive, proc_shift, &
1201 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: new_sizes_array, rep_ends_array, &
1202 rep_sizes_array, rep_starts_array
1203 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: my_info
1205 CALL timeset(routinen, handle)
1207 sub_sub_color = para_env_sub%mepos*num_integ_group + color_sub/integ_group_size
1208 CALL comm_exchange%from_split(para_env, sub_sub_color)
1211 sub_sub_color = para_env_sub%mepos*comm_exchange%num_pe + comm_exchange%mepos
1212 CALL comm_rep%from_split(para_env, sub_sub_color)
1218 ALLOCATE (rep_ends_array(0:comm_rep%num_pe - 1))
1219 ALLOCATE (rep_starts_array(0:comm_rep%num_pe - 1))
1220 ALLOCATE (rep_sizes_array(0:comm_rep%num_pe - 1))
1222 CALL comm_rep%allgather(my_group_l_size, rep_sizes_array)
1223 CALL comm_rep%allgather(my_group_l_start, rep_starts_array)
1224 CALL comm_rep%allgather(my_group_l_end, rep_ends_array)
1227 my_new_group_l_size = my_group_l_size
1230 ALLOCATE (my_info(4, 0:comm_rep%num_pe - 1))
1231 my_info(1, 0) = my_group_l_start
1232 my_info(2, 0) = my_group_l_end
1234 my_info(4, 0) = my_group_l_size
1236 DO proc_shift = 1, comm_rep%num_pe - 1
1237 proc_receive =
modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
1239 my_new_group_l_size = my_new_group_l_size + rep_sizes_array(proc_receive)
1241 my_info(1, proc_shift) = rep_starts_array(proc_receive)
1242 my_info(2, proc_shift) = rep_ends_array(proc_receive)
1243 my_info(3, proc_shift) = my_info(4, proc_shift - 1) + 1
1244 my_info(4, proc_shift) = my_new_group_l_size
1248 ALLOCATE (new_sizes_array(0:comm_exchange%num_pe - 1))
1249 ALLOCATE (ranges_info_array(4, 0:comm_rep%num_pe - 1, 0:comm_exchange%num_pe - 1))
1250 CALL comm_exchange%allgather(my_new_group_l_size, new_sizes_array)
1251 CALL comm_exchange%allgather(my_info, ranges_info_array)
1253 DEALLOCATE (rep_sizes_array)
1254 DEALLOCATE (rep_starts_array)
1255 DEALLOCATE (rep_ends_array)
1257 ALLOCATE (integ_group_pos2color_sub(0:comm_exchange%num_pe - 1))
1258 CALL comm_exchange%allgather(color_sub, integ_group_pos2color_sub)
1260 IF (calc_forces)
THEN
1261 iib =
SIZE(sizes_array)
1262 ALLOCATE (sizes_array_orig(0:iib - 1))
1263 sizes_array_orig(:) = sizes_array
1266 my_group_l_size_orig = my_group_l_size
1267 my_group_l_size = my_new_group_l_size
1268 DEALLOCATE (sizes_array)
1270 ALLOCATE (sizes_array(0:integ_group_size - 1))
1271 sizes_array(:) = new_sizes_array
1273 DEALLOCATE (new_sizes_array)
1275 CALL timestop(handle)
1277 END SUBROUTINE mp2_ri_create_group
1295 SUBROUTINE mp2_ri_get_integ_group_size(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
1296 homo, dimen_RI, unit_nr, &
1298 ngroup, num_integ_group, &
1299 virtual, calc_forces)
1300 TYPE(mp2_type) :: mp2_env
1301 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1302 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_array
1303 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
1304 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
1305 INTEGER,
INTENT(IN) :: dimen_ri, unit_nr
1306 INTEGER,
INTENT(OUT) :: integ_group_size, ngroup, num_integ_group
1307 INTEGER,
DIMENSION(:),
INTENT(IN) :: virtual
1308 LOGICAL,
INTENT(IN) :: calc_forces
1310 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_get_integ_group_size'
1312 INTEGER :: block_size, handle, iib, &
1313 max_repl_group_size, &
1314 min_integ_group_size
1315 INTEGER(KIND=int_8) :: mem
1316 LOGICAL :: calc_group_size
1317 REAL(kind=dp) :: factor, mem_base, mem_min, mem_per_blk, &
1318 mem_per_repl, mem_per_repl_blk, &
1321 CALL timeset(routinen, handle)
1323 ngroup = para_env%num_pe/para_env_sub%num_pe
1325 calc_group_size = mp2_env%ri_mp2%number_integration_groups <= 0
1326 IF (.NOT. calc_group_size)
THEN
1327 IF (mod(ngroup, mp2_env%ri_mp2%number_integration_groups) /= 0) calc_group_size = .true.
1330 IF (calc_group_size)
THEN
1332 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
1333 CALL para_env%min(mem_real)
1336 mem_per_blk = 0.0_dp
1337 mem_per_repl = 0.0_dp
1338 mem_per_repl_blk = 0.0_dp
1341 mem_per_repl = mem_per_repl + maxval(max(real(homo, kind=dp)*maxsize(gd_array), real(dimen_ri, kind=dp))* &
1342 maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1344 mem_per_repl = mem_per_repl + sum(real(homo, kind=dp)*maxsize(gd_b_virtual))*maxsize(gd_array)*8.0_dp/(1024**2)
1346 mem_per_repl_blk = mem_per_repl_blk + real(maxval(maxsize(gd_b_virtual)), kind=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
1348 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*real(dimen_ri, kind=dp)*8.0_dp/(1024**2)
1350 mem_base = mem_base + maxval(real(virtual, kind=dp)*maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1352 mem_base = mem_base + real(max(dimen_ri, maxval(virtual)), kind=dp)*maxval(maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1354 IF (calc_forces)
THEN
1356 mem_per_repl = mem_per_repl + sum(real(homo, kind=dp)*maxsize(gd_array)* &
1357 maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1359 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*dimen_ri*8.0_dp/(1024**2)
1361 mem_base = mem_base + real(maxval(maxsize(gd_b_virtual)), kind=dp)*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1363 mem_base = mem_base + sum(real(homo, kind=dp)*homo)*8.0_dp/(1024**2)
1365 mem_base = mem_base + sum(real(virtual, kind=dp)*maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1367 mem_base = mem_base + real(max(dimen_ri, maxval(virtual)), kind=dp)*maxval(maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1371 block_size = max(1, min(floor(sqrt(real(minval(homo), kind=dp))), floor(minval(homo)/sqrt(2.0_dp*ngroup))))
1372 IF (mp2_env%ri_mp2%block_size > 0) block_size = mp2_env%ri_mp2%block_size
1374 mem_min = mem_base + mem_per_repl + (mem_per_blk + mem_per_repl_blk)*block_size
1376 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T68,F9.2,A4)')
'RI_INFO| Minimum available memory per MPI process:', &
1378 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T68,F9.2,A4)')
'RI_INFO| Minimum required memory per MPI process:', &
1399 integ_group_size = ngroup
1402 factor = real(sum(homo*virtual), kind=dp) &
1403 - sum((real(maxval(homo), kind=dp)/block_size + block_size - 2.0_dp)*homo*virtual)/ngroup
1404 IF (
SIZE(homo) == 2) factor = factor - 2.0_dp*product(homo)/block_size/ngroup*sum(homo*virtual)
1406 IF (factor <= 0.0_dp)
THEN
1408 max_repl_group_size = min(max(floor((mem_real - mem_base - mem_per_blk*block_size)/ &
1409 (mem_per_repl + mem_per_repl_blk*block_size)), 1), ngroup)
1411 min_integ_group_size = ngroup/max_repl_group_size
1414 DO iib = max(min(min_integ_group_size, ngroup), 1), ngroup
1416 IF (mod(ngroup, iib) == 0)
THEN
1417 integ_group_size = iib
1420 integ_group_size = integ_group_size + 1
1424 integ_group_size = ngroup/mp2_env%ri_mp2%number_integration_groups
1427 IF (unit_nr > 0)
THEN
1428 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
1429 "RI_INFO| Group size for integral replication:", integ_group_size*para_env_sub%num_pe
1430 CALL m_flush(unit_nr)
1433 num_integ_group = ngroup/integ_group_size
1435 CALL timestop(handle)
1437 END SUBROUTINE mp2_ri_get_integ_group_size
1457 SUBROUTINE mp2_ri_get_block_size(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
1458 homo, virtual, dimen_RI, unit_nr, &
1459 block_size, ngroup, num_integ_group, &
1460 my_open_shell_ss, calc_forces, buffer_1D)
1461 TYPE(mp2_type) :: mp2_env
1462 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1463 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_array
1464 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
1465 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
1466 INTEGER,
INTENT(IN) :: dimen_ri, unit_nr
1467 INTEGER,
INTENT(OUT) :: block_size, ngroup
1468 INTEGER,
INTENT(IN) :: num_integ_group
1469 LOGICAL,
INTENT(IN) :: my_open_shell_ss, calc_forces
1470 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1471 INTENT(OUT) :: buffer_1d
1473 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_get_block_size'
1475 INTEGER :: best_block_size, handle, num_ij_blocks
1476 INTEGER(KIND=int_8) :: buffer_size, mem
1477 REAL(kind=dp) :: mem_base, mem_per_blk, mem_per_repl_blk, &
1480 CALL timeset(routinen, handle)
1482 ngroup = para_env%num_pe/para_env_sub%num_pe
1485 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
1486 CALL para_env%min(mem_real)
1489 mem_per_blk = 0.0_dp
1490 mem_per_repl_blk = 0.0_dp
1493 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1495 mem_per_repl_blk = mem_per_repl_blk + real(maxval(maxsize(gd_b_virtual)), kind=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
1497 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*real(dimen_ri, kind=dp)*8.0_dp/(1024**2)
1499 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1501 IF (calc_forces)
THEN
1503 mem_per_blk = mem_per_blk + 3.0_dp*maxval(maxsize(gd_b_virtual))*dimen_ri*8.0_dp/(1024**2)
1505 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1511 IF (mp2_env%ri_mp2%block_size > 0)
THEN
1512 best_block_size = mp2_env%ri_mp2%block_size
1514 best_block_size = max(floor((mem_real - mem_base)/(mem_per_blk + mem_per_repl_blk*ngroup/num_integ_group)), 1)
1517 IF (
SIZE(homo) == 1)
THEN
1518 IF (.NOT. my_open_shell_ss)
THEN
1519 num_ij_blocks = (homo(1)/best_block_size)
1520 num_ij_blocks = (num_ij_blocks*num_ij_blocks - num_ij_blocks)/2
1522 num_ij_blocks = ((homo(1) - 1)/best_block_size)
1523 num_ij_blocks = (num_ij_blocks*num_ij_blocks - num_ij_blocks)/2
1526 num_ij_blocks = product(homo/best_block_size)
1529 IF ((num_ij_blocks >= ngroup .AND. num_ij_blocks > 0) .OR. best_block_size == 1)
THEN
1532 best_block_size = best_block_size - 1
1536 IF (
SIZE(homo) == 1)
THEN
1537 IF (my_open_shell_ss)
THEN
1540 best_block_size = min(floor(sqrt(real(homo(1) - 1, kind=dp))), best_block_size)
1543 best_block_size = min(floor(sqrt(real(homo(1), kind=dp))), best_block_size)
1547 block_size = max(1, best_block_size)
1549 IF (unit_nr > 0)
THEN
1550 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
1551 "RI_INFO| Block size:", block_size
1552 CALL m_flush(unit_nr)
1556 buffer_size = max(int(maxsize(gd_array), kind=int_8)*block_size, int(max(dimen_ri, maxval(virtual)), kind=int_8)) &
1557 *maxval(maxsize(gd_b_virtual))
1559 IF (calc_forces) buffer_size = buffer_size*2
1560 ALLOCATE (buffer_1d(buffer_size))
1562 CALL timestop(handle)
1564 END SUBROUTINE mp2_ri_get_block_size
1595 SUBROUTINE mp2_update_p_gamma(mp2_env, para_env_sub, gd_B_virtual, &
1596 Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, &
1597 my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, local_ab, &
1598 t_ab, my_local_i_aL, my_local_j_aL, open_ss, Y_i_aP, Y_j_aP, &
1599 local_ba, ispin, jspin, dgemm_counter, buffer_1D)
1600 TYPE(mp2_type) :: mp2_env
1601 TYPE(mp_para_env_type),
INTENT(IN) :: para_env_sub
1602 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
1603 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
1604 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
1605 INTEGER,
INTENT(IN) :: dimen_ri, iib, jjb
1606 INTEGER,
DIMENSION(:),
INTENT(IN) :: my_b_size, my_b_virtual_end, &
1608 INTEGER,
INTENT(IN) :: my_i, my_j
1609 INTEGER,
DIMENSION(:),
INTENT(IN) :: virtual
1610 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1611 INTENT(INOUT),
TARGET :: local_ab
1612 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1613 INTENT(IN),
TARGET :: t_ab, my_local_i_al, my_local_j_al
1614 LOGICAL,
INTENT(IN) :: open_ss
1615 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1616 INTENT(INOUT),
TARGET :: y_i_ap, y_j_ap, local_ba
1617 INTEGER,
INTENT(IN) :: ispin, jspin
1618 TYPE(dgemm_counter_type),
INTENT(INOUT) :: dgemm_counter
1619 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:),
TARGET :: buffer_1d
1621 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_update_P_gamma'
1623 INTEGER :: a, b, b_global, handle, proc_receive, proc_send, proc_shift, rec_b_size, &
1624 rec_b_virtual_end, rec_b_virtual_start, send_b_size, send_b_virtual_end, &
1625 send_b_virtual_start
1626 INTEGER(KIND=int_8) :: offset
1627 LOGICAL :: alpha_beta
1628 REAL(kind=dp) :: factor, p_ij_diag
1629 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1630 POINTER :: external_ab, send_ab
1632 CALL timeset(routinen//
"_Pia", handle)
1634 alpha_beta = .NOT. (ispin == jspin)
1641 DO b = 1, my_b_size(jspin)
1642 b_global = b + my_b_virtual_start(jspin) - 1
1643 DO a = 1, virtual(ispin)
1644 local_ab(a, b) = -local_ab(a, b)/ &
1645 (eigenval(homo(ispin) + a, ispin) + eigenval(homo(jspin) + b_global, jspin) - &
1646 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
1649 IF (.NOT. (alpha_beta))
THEN
1650 p_ij_diag = -sum(local_ab*t_ab)*factor
1653 p_ij_diag = -sum(local_ab*local_ab)*mp2_env%scale_S
1655 DO b = 1, my_b_size(ispin)
1656 b_global = b + my_b_virtual_start(ispin) - 1
1657 DO a = 1, virtual(jspin)
1658 local_ba(a, b) = -local_ba(a, b)/ &
1659 (eigenval(homo(jspin) + a, jspin) + eigenval(homo(ispin) + b_global, ispin) - &
1660 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
1667 CALL dgemm_counter_start(dgemm_counter)
1668 IF (.NOT. (alpha_beta))
THEN
1669 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), my_b_size(ispin), virtual(ispin), 1.0_dp, &
1670 t_ab, virtual(ispin), local_ab, virtual(ispin), &
1671 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, &
1672 my_b_virtual_start(ispin):my_b_virtual_end(ispin)), my_b_size(ispin))
1673 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) = &
1674 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) + p_ij_diag
1676 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), my_b_size(ispin), virtual(jspin), mp2_env%scale_S, &
1677 local_ba, virtual(jspin), local_ba, virtual(jspin), 1.0_dp, &
1678 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_virtual_start(ispin):my_b_virtual_end(ispin)), my_b_size(ispin))
1680 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) = &
1681 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) + p_ij_diag
1683 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(jspin), my_b_size(jspin), virtual(ispin), mp2_env%scale_S, &
1684 local_ab, virtual(ispin), local_ab, virtual(ispin), 1.0_dp, &
1685 mp2_env%ri_grad%P_ab(jspin)%array(:, my_b_virtual_start(jspin):my_b_virtual_end(jspin)), my_b_size(jspin))
1687 mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjb - 1, my_j + jjb - 1) = &
1688 mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjb - 1, my_j + jjb - 1) + p_ij_diag
1693 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta))
THEN
1695 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', my_b_size(ispin), virtual(ispin), my_b_size(ispin), 1.0_dp, &
1696 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1697 local_ab, virtual(ispin), &
1698 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array, my_b_size(ispin))
1700 mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjb - 1, my_j + jjb - 1) = &
1701 mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjb - 1, my_j + jjb - 1) + p_ij_diag
1703 DO proc_shift = 1, para_env_sub%num_pe - 1
1704 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1705 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1707 CALL get_group_dist(gd_b_virtual(jspin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1708 CALL get_group_dist(gd_b_virtual(jspin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1710 external_ab(1:virtual(ispin), 1:rec_b_size) => buffer_1d(1:int(virtual(ispin), int_8)*rec_b_size)
1711 external_ab = 0.0_dp
1713 CALL para_env_sub%sendrecv(local_ab, proc_send, &
1714 external_ab, proc_receive)
1716 IF (.NOT. (alpha_beta))
THEN
1717 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), rec_b_size, virtual(ispin), 1.0_dp, &
1718 t_ab, virtual(ispin), external_ab, virtual(ispin), &
1719 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_b_virtual_start:rec_b_virtual_end), my_b_size(ispin))
1721 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(jspin), rec_b_size, virtual(ispin), mp2_env%scale_S, &
1722 local_ab, virtual(ispin), external_ab, virtual(ispin), &
1723 1.0_dp, mp2_env%ri_grad%P_ab(jspin)%array(:, rec_b_virtual_start:rec_b_virtual_end), &
1728 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1729 CALL get_group_dist(gd_b_virtual(ispin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1730 external_ab(1:virtual(jspin), 1:rec_b_size) => buffer_1d(1:int(virtual(jspin), int_8)*rec_b_size)
1731 external_ab = 0.0_dp
1732 CALL para_env_sub%sendrecv(local_ba, proc_send, &
1733 external_ab, proc_receive)
1734 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), rec_b_size, virtual(jspin), mp2_env%scale_S, &
1735 local_ba, virtual(jspin), external_ab, virtual(jspin), &
1736 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_b_virtual_start:rec_b_virtual_end), my_b_size(ispin))
1739 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta))
THEN
1740 external_ab(1:my_b_size(ispin), 1:virtual(ispin)) => &
1741 buffer_1d(1:int(virtual(ispin), int_8)*my_b_size(ispin))
1742 external_ab = 0.0_dp
1744 offset = int(virtual(ispin), int_8)*my_b_size(ispin)
1746 send_ab(1:send_b_size, 1:virtual(ispin)) => buffer_1d(offset + 1:offset + int(send_b_size, int_8)*virtual(ispin))
1749 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', send_b_size, virtual(ispin), my_b_size(ispin), 1.0_dp, &
1750 t_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1751 local_ab, virtual(ispin), 0.0_dp, send_ab, send_b_size)
1752 CALL para_env_sub%sendrecv(send_ab, proc_send, &
1753 external_ab, proc_receive)
1755 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = mp2_env%ri_grad%P_ab(ispin)%array + external_ab
1759 IF (.NOT. alpha_beta)
THEN
1760 IF (my_i /= my_j)
THEN
1761 CALL dgemm_counter_stop(dgemm_counter, 2*my_b_size(ispin), virtual(ispin), virtual(ispin))
1763 CALL dgemm_counter_stop(dgemm_counter, my_b_size(ispin), virtual(ispin), virtual(ispin))
1766 CALL dgemm_counter_stop(dgemm_counter, sum(my_b_size), virtual(ispin), virtual(jspin))
1768 CALL timestop(handle)
1772 CALL timeset(routinen//
"_Gamma", handle)
1773 CALL dgemm_counter_start(dgemm_counter)
1774 IF (.NOT. alpha_beta)
THEN
1776 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', my_b_size(ispin), dimen_ri, my_b_size(ispin), 1.0_dp, &
1777 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1778 my_local_j_al, dimen_ri, 1.0_dp, y_i_ap, my_b_size(ispin))
1780 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', my_b_size(ispin), dimen_ri, my_b_size(jspin), mp2_env%scale_S, &
1781 local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1782 my_local_j_al, dimen_ri, 1.0_dp, y_i_ap, my_b_size(ispin))
1783 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'T', my_b_size(jspin), dimen_ri, my_b_size(ispin), mp2_env%scale_S, &
1784 local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1785 my_local_i_al, dimen_ri, 1.0_dp, y_j_ap, my_b_size(jspin))
1788 IF (para_env_sub%num_pe > 1)
THEN
1789 external_ab(1:my_b_size(ispin), 1:dimen_ri) => buffer_1d(1:int(my_b_size(ispin), int_8)*dimen_ri)
1790 external_ab = 0.0_dp
1792 offset = int(my_b_size(ispin), int_8)*dimen_ri
1795 DO proc_shift = 1, para_env_sub%num_pe - 1
1796 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1797 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1799 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1800 CALL get_group_dist(gd_b_virtual(ispin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1802 send_ab(1:send_b_size, 1:dimen_ri) => buffer_1d(offset + 1:offset + int(dimen_ri, int_8)*send_b_size)
1804 IF (.NOT. alpha_beta)
THEN
1805 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', send_b_size, dimen_ri, my_b_size(ispin), 1.0_dp, &
1806 t_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1807 my_local_j_al, dimen_ri, 0.0_dp, send_ab, send_b_size)
1808 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1810 y_i_ap(:, :) = y_i_ap + external_ab
1814 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', send_b_size, dimen_ri, my_b_size(jspin), mp2_env%scale_S, &
1815 local_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1816 my_local_j_al, dimen_ri, 0.0_dp, send_ab, send_b_size)
1817 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1818 y_i_ap(:, :) = y_i_ap + external_ab
1822 IF (alpha_beta)
THEN
1824 IF (para_env_sub%num_pe > 1)
THEN
1825 external_ab(1:my_b_size(jspin), 1:dimen_ri) => buffer_1d(1:int(my_b_size(jspin), int_8)*dimen_ri)
1826 external_ab = 0.0_dp
1828 offset = int(my_b_size(jspin), int_8)*dimen_ri
1830 DO proc_shift = 1, para_env_sub%num_pe - 1
1831 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1832 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1834 CALL get_group_dist(gd_b_virtual(jspin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1835 send_ab(1:send_b_size, 1:dimen_ri) => buffer_1d(offset + 1:offset + int(dimen_ri, int_8)*send_b_size)
1837 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', send_b_size, dimen_ri, my_b_size(ispin), mp2_env%scale_S, &
1838 local_ba(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1839 my_local_i_al, dimen_ri, 0.0_dp, send_ab, send_b_size)
1840 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1841 y_j_ap(:, :) = y_j_ap + external_ab
1846 CALL dgemm_counter_stop(dgemm_counter, 3*virtual(ispin), dimen_ri, my_b_size(jspin))
1848 CALL dgemm_counter_stop(dgemm_counter, virtual(ispin), dimen_ri, my_b_size(ispin))
1851 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta))
THEN
1853 CALL dgemm_counter_start(dgemm_counter)
1854 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'T', my_b_size(ispin), dimen_ri, my_b_size(ispin), 1.0_dp, &
1855 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1856 my_local_i_al, dimen_ri, 1.0_dp, y_j_ap, my_b_size(ispin))
1857 DO proc_shift = 1, para_env_sub%num_pe - 1
1858 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1859 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1861 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1863 external_ab(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, int_8)*rec_b_size)
1864 external_ab = 0.0_dp
1866 CALL para_env_sub%sendrecv(my_local_i_al, proc_send, &
1867 external_ab, proc_receive)
1870 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'T', my_b_size(ispin), dimen_ri, rec_b_size, 1.0_dp, &
1871 t_ab(rec_b_virtual_start:rec_b_virtual_end, :), rec_b_size, &
1872 external_ab, dimen_ri, 1.0_dp, y_j_ap, my_b_size(ispin))
1875 CALL dgemm_counter_stop(dgemm_counter, my_b_size(ispin), dimen_ri, virtual(ispin))
1878 CALL timestop(handle)
1879 END SUBROUTINE mp2_update_p_gamma
1902 SUBROUTINE mp2_redistribute_gamma(Gamma_P_ia, ij_index, my_B_size, &
1903 my_block_size, my_group_L_size, my_i, my_ij_pairs, ngroup, &
1904 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
1905 ij_map, ranges_info_array, Y_i_aP, comm_exchange, &
1906 sizes_array, spin, buffer_1D)
1908 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: gamma_p_ia
1909 INTEGER,
INTENT(IN) :: ij_index, my_b_size, my_block_size, &
1910 my_group_l_size, my_i, my_ij_pairs, &
1911 ngroup, num_integ_group
1912 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: integ_group_pos2color_sub, num_ij_pairs
1913 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(IN) :: ij_map
1914 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :), &
1915 INTENT(IN) :: ranges_info_array
1916 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(IN) :: y_i_ap
1917 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange
1918 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: sizes_array
1919 INTEGER,
INTENT(IN) :: spin
1920 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:),
TARGET :: buffer_1d
1922 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_redistribute_gamma'
1924 INTEGER :: end_point, handle, handle2, iib, ij_counter_rec, irep, kkk, lll, lstart_pos, &
1925 proc_receive, proc_send, proc_shift, rec_i, rec_ij_index, send_l_size, start_point, tag
1926 INTEGER(KIND=int_8) :: offset
1927 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
1928 POINTER :: bi_c_rec, bi_c_send
1932 CALL timeset(routinen//
"_comm2", handle)
1936 IF (ij_index <= my_ij_pairs)
THEN
1939 CALL timeset(routinen//
"_comm2_w", handle2)
1940 DO irep = 0, num_integ_group - 1
1941 lstart_pos = ranges_info_array(1, irep, comm_exchange%mepos)
1942 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
1943 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
1948 DO kkk = start_point, end_point
1949 lll = kkk - start_point + lstart_pos
1950 DO iib = 1, my_block_size
1951 gamma_p_ia(1:my_b_size, my_i + iib - 1, kkk) = &
1952 gamma_p_ia(1:my_b_size, my_i + iib - 1, kkk) + &
1953 y_i_ap(1:my_b_size, lll, iib)
1958 CALL timestop(handle2)
1962 DO proc_shift = 1, comm_exchange%num_pe - 1
1963 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
1964 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
1966 send_l_size = sizes_array(proc_send)
1967 bi_c_send(1:my_b_size, 1:my_block_size, 1:send_l_size) => &
1968 buffer_1d(1:int(my_b_size, int_8)*my_block_size*send_l_size)
1970 offset = int(my_b_size, int_8)*my_block_size*send_l_size
1972 CALL timeset(routinen//
"_comm2_w", handle2)
1974 DO irep = 0, num_integ_group - 1
1975 lstart_pos = ranges_info_array(1, irep, proc_send)
1976 start_point = ranges_info_array(3, irep, proc_send)
1977 end_point = ranges_info_array(4, irep, proc_send)
1982 DO kkk = start_point, end_point
1983 lll = kkk - start_point + lstart_pos
1984 DO iib = 1, my_block_size
1985 bi_c_send(1:my_b_size, iib, kkk) = y_i_ap(1:my_b_size, lll, iib)
1990 CALL timestop(handle2)
1992 rec_ij_index = num_ij_pairs(proc_receive)
1994 IF (ij_index <= rec_ij_index)
THEN
1997 (ij_index - min(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
1999 rec_i = ij_map(spin, ij_counter_rec)
2001 bi_c_rec(1:my_b_size, 1:my_block_size, 1:my_group_l_size) => &
2002 buffer_1d(offset + 1:offset + int(my_b_size, int_8)*my_block_size*my_group_l_size)
2005 CALL comm_exchange%sendrecv(bi_c_send, proc_send, &
2006 bi_c_rec, proc_receive, tag)
2008 CALL timeset(routinen//
"_comm2_w", handle2)
2009 DO irep = 0, num_integ_group - 1
2010 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
2011 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
2015 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) = &
2016 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) + &
2017 bi_c_rec(1:my_b_size, :, start_point:end_point)
2020 CALL timestop(handle2)
2024 CALL comm_exchange%send(bi_c_send, proc_send, tag)
2032 DO proc_shift = 1, comm_exchange%num_pe - 1
2033 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2034 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2035 rec_ij_index = num_ij_pairs(proc_receive)
2037 IF (ij_index <= rec_ij_index)
THEN
2040 (ij_index - min(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
2042 rec_i = ij_map(spin, ij_counter_rec)
2044 bi_c_rec(1:my_b_size, 1:my_block_size, 1:my_group_l_size) => &
2045 buffer_1d(1:int(my_b_size, int_8)*my_block_size*my_group_l_size)
2049 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2051 CALL timeset(routinen//
"_comm2_w", handle2)
2052 DO irep = 0, num_integ_group - 1
2053 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
2054 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
2055#if !defined(__INTEL_LLVM_COMPILER) || (20250000 <= __INTEL_LLVM_COMPILER)
2060 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) = &
2061 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) + &
2062 bi_c_rec(1:my_b_size, :, start_point:end_point)
2063#if !defined(__INTEL_LLVM_COMPILER) || (20250000 <= __INTEL_LLVM_COMPILER)
2067 CALL timestop(handle2)
2073 CALL timestop(handle)
2075 END SUBROUTINE mp2_redistribute_gamma
2104 SUBROUTINE quasi_degenerate_p_ij(mp2_env, Eigenval, homo, virtual, open_shell, &
2105 beta_beta, Bib_C, unit_nr, dimen_RI, &
2106 my_B_size, ngroup, my_group_L_size, &
2107 color_sub, ranges_info_array, comm_exchange, para_env_sub, para_env, &
2108 my_B_virtual_start, my_B_virtual_end, sizes_array, gd_B_virtual, &
2109 integ_group_pos2color_sub, dgemm_counter, buffer_1D)
2110 TYPE(mp2_type) :: mp2_env
2111 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
2112 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
2113 LOGICAL,
INTENT(IN) :: open_shell, beta_beta
2114 TYPE(three_dim_real_array),
DIMENSION(:), &
2116 INTEGER,
INTENT(IN) :: unit_nr, dimen_ri
2117 INTEGER,
DIMENSION(:),
INTENT(IN) :: my_b_size
2118 INTEGER,
INTENT(IN) :: ngroup, my_group_l_size, color_sub
2119 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :), &
2120 INTENT(IN) :: ranges_info_array
2121 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange
2122 TYPE(mp_para_env_type),
INTENT(IN) :: para_env_sub, para_env
2123 INTEGER,
DIMENSION(:),
INTENT(IN) :: my_b_virtual_start, my_b_virtual_end
2124 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: sizes_array
2125 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
2126 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: integ_group_pos2color_sub
2127 TYPE(dgemm_counter_type),
INTENT(INOUT) :: dgemm_counter
2128 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:),
TARGET :: buffer_1d
2130 CHARACTER(LEN=*),
PARAMETER :: routinen =
'quasi_degenerate_P_ij'
2132 INTEGER :: a, a_global, b, b_global, block_size, decil, handle, handle2, ijk_counter, &
2133 ijk_counter_send, ijk_index, ispin, kkb, kspin, max_block_size, max_ijk, my_i, my_ijk, &
2134 my_j, my_k, my_last_k(2), my_virtual, nspins, proc_receive, proc_send, proc_shift, &
2135 rec_b_size, rec_b_virtual_end, rec_b_virtual_start, rec_l_size, send_b_size, &
2136 send_b_virtual_end, send_b_virtual_start, send_i, send_ijk_index, send_j, send_k, &
2137 size_b_i, size_b_k, tag, tag2
2138 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_ijk
2139 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ijk_map, send_last_k
2140 LOGICAL :: alpha_beta, do_recv_i, do_recv_j, &
2141 do_recv_k, do_send_i, do_send_j, &
2143 REAL(kind=dp) :: amp_fac, p_ij_elem, t_new, t_start
2144 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
2145 TARGET :: local_ab, local_al_i, local_al_j, t_ab
2146 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: local_al_k
2147 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: bi_c_rec, external_ab, external_al
2148 REAL(kind=dp),
DIMENSION(:, :, :),
POINTER :: bi_c_rec_3d
2150 CALL timeset(routinen//
"_ij_sing", handle)
2155 nspins =
SIZE(bib_c)
2156 alpha_beta = (nspins == 2)
2159 amp_fac = mp2_env%scale_S + mp2_env%scale_T
2160 IF (open_shell) amp_fac = mp2_env%scale_T
2162 ALLOCATE (send_last_k(2, comm_exchange%num_pe - 1))
2165 DO ispin = 1, nspins
2166 size_b_i = my_b_size(ispin)
2167 IF (ispin == 1 .AND. alpha_beta)
THEN
2172 size_b_k = my_b_size(kspin)
2176 CALL find_quasi_degenerate_ij(my_ijk, homo(ispin), homo(kspin), eigenval(:, ispin), mp2_env, ijk_map, unit_nr, ngroup, &
2177 .NOT. beta_beta .AND. ispin /= 2, comm_exchange, num_ijk, max_ijk, color_sub, &
2178 SIZE(buffer_1d), my_group_l_size, size_b_k, para_env, virtual(ispin), size_b_i)
2180 my_virtual = virtual(ispin)
2181 IF (
SIZE(ijk_map, 2) > 0)
THEN
2182 max_block_size = ijk_map(4, 1)
2187 ALLOCATE (local_al_i(dimen_ri, size_b_i))
2188 ALLOCATE (local_al_j(dimen_ri, size_b_i))
2189 ALLOCATE (local_al_k(dimen_ri, size_b_k, max_block_size))
2190 ALLOCATE (t_ab(my_virtual, size_b_k))
2195 t_start = m_walltime()
2196 DO ijk_index = 1, max_ijk
2199 IF (unit_nr > 0 .AND. ijk_index > 1)
THEN
2200 decil = ijk_index*10/max_ijk
2201 IF (decil /= (ijk_index - 1)*10/max_ijk)
THEN
2202 t_new = m_walltime()
2203 t_new = (t_new - t_start)/60.0_dp*(max_ijk - ijk_index + 1)/(ijk_index - 1)
2204 WRITE (unit_nr, fmt=
"(T3,A)")
"Percentage of finished loop: "// &
2205 cp_to_string(decil*10)//
". Minutes left: "//cp_to_string(t_new)
2206 CALL m_flush(unit_nr)
2210 IF (ijk_index <= my_ijk)
THEN
2212 ijk_counter = (ijk_index - min(1, color_sub))*ngroup + color_sub
2213 my_i = ijk_map(1, ijk_counter)
2214 my_j = ijk_map(2, ijk_counter)
2215 my_k = ijk_map(3, ijk_counter)
2216 block_size = ijk_map(4, ijk_counter)
2218 do_recv_i = (ispin /= kspin) .OR. my_i < my_k .OR. my_i > my_k + block_size - 1
2219 do_recv_j = (ispin /= kspin) .OR. my_j < my_k .OR. my_j > my_k + block_size - 1
2220 do_recv_k = my_k /= my_last_k(1) .OR. my_k + block_size - 1 /= my_last_k(2)
2222 my_last_k(2) = my_k + block_size - 1
2226 CALL fill_local_i_al_2d(local_al_i, ranges_info_array(:, :, comm_exchange%mepos), &
2227 bib_c(ispin)%array(:, :, my_i))
2232 CALL fill_local_i_al_2d(local_al_j, ranges_info_array(:, :, comm_exchange%mepos), &
2233 bib_c(ispin)%array(:, :, my_j))
2238 CALL fill_local_i_al(local_al_k(:, :, 1:block_size), ranges_info_array(:, :, comm_exchange%mepos), &
2239 bib_c(kspin)%array(:, :, my_k:my_k + block_size - 1))
2242 CALL timeset(routinen//
"_comm", handle2)
2243 DO proc_shift = 1, comm_exchange%num_pe - 1
2244 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2245 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2247 send_ijk_index = num_ijk(proc_send)
2249 rec_l_size = sizes_array(proc_receive)
2250 bi_c_rec(1:rec_l_size, 1:size_b_i) => buffer_1d(1:int(rec_l_size, kind=int_8)*size_b_i)
2255 IF (ijk_index <= send_ijk_index)
THEN
2257 ijk_counter_send = (ijk_index - min(1, integ_group_pos2color_sub(proc_send)))* &
2258 ngroup + integ_group_pos2color_sub(proc_send)
2259 send_i = ijk_map(1, ijk_counter_send)
2260 send_j = ijk_map(2, ijk_counter_send)
2261 send_k = ijk_map(3, ijk_counter_send)
2263 do_send_i = (ispin /= kspin) .OR. send_i < send_k .OR. send_i > send_k + block_size - 1
2264 do_send_j = (ispin /= kspin) .OR. send_j < send_k .OR. send_j > send_k + block_size - 1
2265 do_send_k = send_k /= send_last_k(1, proc_shift) .OR. send_k + block_size - 1 /= send_last_k(2, proc_shift)
2266 send_last_k(1, proc_shift) = send_k
2267 send_last_k(2, proc_shift) = send_k + block_size - 1
2274 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_i), proc_send, &
2275 bi_c_rec, proc_receive, tag)
2277 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i), proc_send, tag)
2279 ELSE IF (do_recv_i)
THEN
2280 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2283 CALL fill_local_i_al_2d(local_al_i, ranges_info_array(:, :, proc_receive), bi_c_rec)
2290 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_j), proc_send, &
2291 bi_c_rec, proc_receive, tag)
2293 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_j), proc_send, tag)
2295 ELSE IF (do_recv_j)
THEN
2296 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2299 CALL fill_local_i_al_2d(local_al_j, ranges_info_array(:, :, proc_receive), bi_c_rec)
2303 bi_c_rec_3d(1:rec_l_size, 1:size_b_k, 1:block_size) => &
2304 buffer_1d(1:int(rec_l_size, kind=int_8)*size_b_k*block_size)
2307 CALL comm_exchange%sendrecv(bib_c(kspin)%array(:, :, send_k:send_k + block_size - 1), proc_send, &
2308 bi_c_rec_3d, proc_receive, tag)
2310 CALL comm_exchange%send(bi_c_rec, proc_receive, tag)
2312 ELSE IF (do_recv_k)
THEN
2313 CALL comm_exchange%recv(bi_c_rec_3d, proc_receive, tag)
2316 CALL fill_local_i_al(local_al_k(:, :, 1:block_size), ranges_info_array(:, :, proc_receive), bi_c_rec_3d)
2320 IF (.NOT. do_recv_i) local_al_i(:, :) = local_al_k(:, :, my_i - my_k + 1)
2321 IF (.NOT. do_recv_j) local_al_j(:, :) = local_al_k(:, :, my_j - my_k + 1)
2322 CALL timestop(handle2)
2325 DO kkb = 1, block_size
2326 CALL timeset(routinen//
"_exp_ik", handle2)
2327 CALL dgemm_counter_start(dgemm_counter)
2328 ALLOCATE (local_ab(my_virtual, size_b_k))
2330 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', size_b_i, size_b_k, dimen_ri, 1.0_dp, &
2331 local_al_i, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2332 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), 1:size_b_k), size_b_i)
2333 DO proc_shift = 1, para_env_sub%num_pe - 1
2334 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2335 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2337 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2339 external_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, kind=int_8)*rec_b_size)
2341 CALL comm_exchange%sendrecv(local_al_i, proc_send, &
2342 external_al, proc_receive, tag)
2344 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', rec_b_size, size_b_k, dimen_ri, 1.0_dp, &
2345 external_al, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2346 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:size_b_k), rec_b_size)
2348 CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_b_k, dimen_ri)
2349 CALL timestop(handle2)
2352 CALL timeset(routinen//
"_tab", handle2)
2355 IF (.NOT. alpha_beta)
THEN
2357 b_global = b + my_b_virtual_start(1) - 1
2358 DO a = 1, my_b_size(1)
2359 a_global = a + my_b_virtual_start(1) - 1
2360 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - mp2_env%scale_T*local_ab(b_global, a))/ &
2361 (eigenval(my_i, 1) + eigenval(my_k + kkb - 1, 1) &
2362 - eigenval(homo(1) + a_global, 1) - eigenval(homo(1) + b_global, 1))
2367 b_global = b + my_b_virtual_start(kspin) - 1
2368 DO a = 1, my_b_size(ispin)
2369 a_global = a + my_b_virtual_start(ispin) - 1
2370 t_ab(a_global, b) = mp2_env%scale_S*local_ab(a_global, b)/ &
2371 (eigenval(my_i, ispin) + eigenval(my_k + kkb - 1, kspin) &
2372 - eigenval(homo(ispin) + a_global, ispin) - eigenval(homo(kspin) + b_global, kspin))
2377 IF (.NOT. alpha_beta)
THEN
2378 DO proc_shift = 1, para_env_sub%num_pe - 1
2379 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2380 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2381 CALL get_group_dist(gd_b_virtual(1), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2382 CALL get_group_dist(gd_b_virtual(1), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
2384 external_ab(1:size_b_i, 1:rec_b_size) => buffer_1d(1:int(size_b_i, kind=int_8)*rec_b_size)
2385 CALL para_env_sub%sendrecv(local_ab(send_b_virtual_start:send_b_virtual_end, 1:size_b_k), proc_send, &
2386 external_ab(1:size_b_i, 1:rec_b_size), proc_receive, tag)
2388 DO b = 1, my_b_size(1)
2389 b_global = b + my_b_virtual_start(1) - 1
2390 DO a = 1, rec_b_size
2391 a_global = a + rec_b_virtual_start - 1
2392 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - mp2_env%scale_T*external_ab(b, a))/ &
2393 (eigenval(my_i, 1) + eigenval(my_k + kkb - 1, 1) &
2394 - eigenval(homo(1) + a_global, 1) - eigenval(homo(1) + b_global, 1))
2399 CALL timestop(handle2)
2402 CALL timeset(routinen//
"_exp_jk", handle2)
2404 CALL dgemm_counter_start(dgemm_counter)
2405 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', size_b_i, size_b_k, dimen_ri, 1.0_dp, &
2406 local_al_j, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2407 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), 1:size_b_k), size_b_i)
2408 DO proc_shift = 1, para_env_sub%num_pe - 1
2409 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2410 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2412 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2414 external_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, kind=int_8)*rec_b_size)
2416 CALL comm_exchange%sendrecv(local_al_j, proc_send, &
2417 external_al, proc_receive, tag)
2418 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', rec_b_size, size_b_k, dimen_ri, 1.0_dp, &
2419 external_al, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2420 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:size_b_k), rec_b_size)
2422 CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_b_k, dimen_ri)
2423 CALL timestop(handle2)
2425 CALL timeset(routinen//
"_Pij", handle2)
2427 b_global = b + my_b_virtual_start(kspin) - 1
2428 DO a = 1, my_b_size(ispin)
2429 a_global = a + my_b_virtual_start(ispin) - 1
2430 local_ab(a_global, b) = &
2431 local_ab(a_global, b)/(eigenval(my_j, ispin) + eigenval(my_k + kkb - 1, kspin) &
2432 - eigenval(homo(ispin) + a_global, ispin) - eigenval(homo(kspin) + b_global, kspin))
2436 p_ij_elem = sum(local_ab*t_ab)
2437 DEALLOCATE (local_ab)
2438 IF ((.NOT. open_shell) .AND. (.NOT. alpha_beta))
THEN
2439 p_ij_elem = p_ij_elem*2.0_dp
2442 mp2_env%ri_grad%P_ij(2)%array(my_i, my_j) = mp2_env%ri_grad%P_ij(2)%array(my_i, my_j) - p_ij_elem
2443 mp2_env%ri_grad%P_ij(2)%array(my_j, my_i) = mp2_env%ri_grad%P_ij(2)%array(my_j, my_i) - p_ij_elem
2445 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) = mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) - p_ij_elem
2446 mp2_env%ri_grad%P_ij(ispin)%array(my_j, my_i) = mp2_env%ri_grad%P_ij(ispin)%array(my_j, my_i) - p_ij_elem
2448 CALL timestop(handle2)
2451 CALL timeset(routinen//
"_comm", handle2)
2453 DO proc_shift = 1, comm_exchange%num_pe - 1
2454 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2455 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2457 send_ijk_index = num_ijk(proc_send)
2459 IF (ijk_index <= send_ijk_index)
THEN
2461 ijk_counter_send = (ijk_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
2462 integ_group_pos2color_sub(proc_send)
2463 send_i = ijk_map(1, ijk_counter_send)
2464 send_j = ijk_map(2, ijk_counter_send)
2465 send_k = ijk_map(3, ijk_counter_send)
2466 block_size = ijk_map(4, ijk_counter_send)
2468 do_send_i = (ispin /= kspin) .OR. send_i < send_k .OR. send_i > send_k + block_size - 1
2469 do_send_j = (ispin /= kspin) .OR. send_j < send_k .OR. send_j > send_k + block_size - 1
2472 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i), proc_send, tag)
2476 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_j), proc_send, tag)
2479 CALL comm_exchange%send(bib_c(kspin)%array(:, :, send_k:send_k + block_size - 1), proc_send, tag)
2483 CALL timestop(handle2)
2486 DEALLOCATE (local_al_i)
2487 DEALLOCATE (local_al_j)
2488 DEALLOCATE (local_al_k)
2490 DEALLOCATE (ijk_map)
2492 CALL timestop(handle)
2494 END SUBROUTINE quasi_degenerate_p_ij
2518 SUBROUTINE find_quasi_degenerate_ij(my_ijk, homo, homo_beta, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, &
2519 do_print_alpha, comm_exchange, num_ijk, max_ijk, color_sub, &
2520 buffer_size, my_group_L_size, B_size_k, para_env, virtual, B_size_i)
2522 INTEGER,
INTENT(OUT) :: my_ijk
2523 INTEGER,
INTENT(IN) :: homo, homo_beta
2524 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: eigenval
2525 TYPE(mp2_type),
INTENT(IN) :: mp2_env
2526 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(OUT) :: ijk_map
2527 INTEGER,
INTENT(IN) :: unit_nr, ngroup
2528 LOGICAL,
INTENT(IN) :: do_print_alpha
2529 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange
2530 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT) :: num_ijk
2531 INTEGER,
INTENT(OUT) :: max_ijk
2532 INTEGER,
INTENT(IN) :: color_sub, buffer_size, my_group_l_size, &
2534 TYPE(mp_para_env_type),
INTENT(IN) :: para_env
2535 INTEGER,
INTENT(IN) :: virtual, b_size_i
2537 INTEGER :: block_size, communication_steps, communication_volume, iib, ij_counter, &
2538 ijk_counter, jjb, kkb, max_block_size, max_num_k_blocks, min_communication_volume, &
2539 my_steps, num_k_blocks, num_sing_ij, total_ijk
2540 INTEGER(KIND=int_8) :: mem
2541 LOGICAL,
ALLOCATABLE,
DIMENSION(:, :) :: ijk_marker
2543 ALLOCATE (num_ijk(0:comm_exchange%num_pe - 1))
2548 DO jjb = iib + 1, homo
2549 IF (abs(eigenval(jjb) - eigenval(iib)) < mp2_env%ri_grad%eps_canonical) &
2550 num_sing_ij = num_sing_ij + 1
2554 IF (unit_nr > 0)
THEN
2555 IF (do_print_alpha)
THEN
2556 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
2557 "MO_INFO| Number of ij pairs below EPS_CANONICAL:", num_sing_ij
2559 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
2560 "MO_INFO| Number of ij pairs (spin beta) below EPS_CANONICAL:", num_sing_ij
2565 max_block_size = buffer_size/(my_group_l_size*b_size_k)
2572 mem = mem - 2_int_8*(virtual*b_size_k + b_size_i*my_group_l_size)
2573 max_block_size = min(max_block_size, max(1, int(mem/(my_group_l_size*b_size_k), kind(max_block_size))))
2576 CALL para_env%min(max_block_size)
2580 min_communication_volume = 3*homo_beta*num_sing_ij
2581 communication_steps = 3*homo_beta*num_sing_ij
2582 DO iib = max_block_size, 2, -1
2583 max_num_k_blocks = homo_beta/iib*num_sing_ij
2584 num_k_blocks = max_num_k_blocks - mod(max_num_k_blocks, ngroup)
2585 communication_volume = num_k_blocks*(2 + iib) + 3*(homo_beta*num_sing_ij - iib*num_k_blocks)
2586 my_steps = num_k_blocks + homo_beta*num_sing_ij - iib*num_k_blocks
2587 IF (communication_volume < min_communication_volume)
THEN
2589 min_communication_volume = communication_volume
2590 communication_steps = my_steps
2591 ELSE IF (communication_volume == min_communication_volume .AND. my_steps < communication_steps)
THEN
2593 communication_steps = my_steps
2597 IF (unit_nr > 0)
THEN
2598 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
2599 "MO_INFO| Block size:", block_size
2600 CALL m_flush(unit_nr)
2604 max_num_k_blocks = homo_beta/block_size*num_sing_ij
2605 num_k_blocks = max_num_k_blocks - mod(max_num_k_blocks, ngroup)
2607 total_ijk = num_k_blocks + homo_beta*num_sing_ij - num_k_blocks*block_size
2608 ALLOCATE (ijk_map(4, total_ijk))
2610 ALLOCATE (ijk_marker(homo_beta, num_sing_ij))
2618 DO jjb = iib + 1, homo
2619 IF (abs(eigenval(jjb) - eigenval(iib)) >= mp2_env%ri_grad%eps_canonical) cycle
2620 ij_counter = ij_counter + 1
2621 DO kkb = 1, homo_beta - mod(homo_beta, block_size), block_size
2622 IF (ijk_counter + 1 > num_k_blocks)
EXIT
2623 ijk_counter = ijk_counter + 1
2624 ijk_marker(kkb:kkb + block_size - 1, ij_counter) = .false.
2625 ijk_map(1, ijk_counter) = iib
2626 ijk_map(2, ijk_counter) = jjb
2627 ijk_map(3, ijk_counter) = kkb
2628 ijk_map(4, ijk_counter) = block_size
2629 IF (mod(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1
2636 DO jjb = iib + 1, homo
2637 IF (abs(eigenval(jjb) - eigenval(iib)) >= mp2_env%ri_grad%eps_canonical) cycle
2638 ij_counter = ij_counter + 1
2639 DO kkb = 1, homo_beta
2640 IF (ijk_marker(kkb, ij_counter))
THEN
2641 ijk_counter = ijk_counter + 1
2642 ijk_map(1, ijk_counter) = iib
2643 ijk_map(2, ijk_counter) = jjb
2644 ijk_map(3, ijk_counter) = kkb
2645 ijk_map(4, ijk_counter) = 1
2646 IF (mod(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1
2652 DEALLOCATE (ijk_marker)
2654 CALL comm_exchange%allgather(my_ijk, num_ijk)
2655 max_ijk = maxval(num_ijk)
2657 END SUBROUTINE find_quasi_degenerate_ij
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various routines to log and control the output. The idea is that decisions about where to log should ...
Counters to determine the performance of parallel DGEMMs.
elemental subroutine, public dgemm_counter_init(dgemm_counter, unit_nr, print_info)
Initialize a dgemm_counter.
subroutine, public dgemm_counter_write(dgemm_counter, para_env)
calculate and print flop rates
subroutine, public dgemm_counter_start(dgemm_counter)
start timer of the counter
subroutine, public dgemm_counter_stop(dgemm_counter, size1, size2, size3)
stop timer of the counter and provide matrix sizes
Types to describe group distributions.
elemental integer function, public maxsize(this)
...
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
integer, parameter, public local_gemm_pu_gpu
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Interface to the message passing library MPI.
Routines to calculate RI-GPW-MP2 energy using pw.
subroutine, public mp2_ri_gpw_compute_en(emp2_cou, emp2_ex, emp2_s, emp2_t, bib_c, mp2_env, para_env, para_env_sub, color_sub, gd_array, gd_b_virtual, eigenval, nmo, homo, dimen_ri, unit_nr, calc_forces, calc_ex)
...
Routines for calculating RI-MP2 gradients.
subroutine, public complete_gamma(mp2_env, b_ia_q, dimen_ri, homo, virtual, para_env, para_env_sub, ngroup, my_group_l_size, my_group_l_start, my_group_l_end, my_b_size, my_b_virtual_start, gd_array, gd_b_virtual, kspin)
complete the calculation of the Gamma matrices
Types needed for MP2 calculations.
stores all the informations relevant to an mpi environment