39#include "./base/base_uses.f90"
45 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'mp2_ri_gpw'
72 SUBROUTINE mp2_ri_gpw_compute_en(Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T, BIb_C, mp2_env, para_env, para_env_sub, color_sub, &
73 gd_array, gd_B_virtual, &
74 Eigenval, nmo, homo, dimen_RI, unit_nr, calc_forces, calc_ex)
75 REAL(kind=
dp),
INTENT(INOUT) :: emp2_cou, emp2_ex, emp2_s, emp2_t
77 INTENT(INOUT) :: bib_c
80 INTEGER,
INTENT(IN) :: color_sub
82 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
83 INTEGER,
INTENT(IN) :: nmo
84 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
86 INTENT(INOUT) :: gd_b_virtual
87 INTEGER,
INTENT(IN) :: dimen_ri, unit_nr
88 LOGICAL,
INTENT(IN) :: calc_forces, calc_ex
90 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_gpw_compute_en'
92 INTEGER :: a, a_global, b, b_global, block_size, decil, end_point, handle, handle2, handle3, &
93 iib, ij_counter, ij_counter_send, ij_index, integ_group_size, ispin, jjb, jspin, &
94 max_ij_pairs, my_block_size, my_group_l_end, my_group_l_size, my_group_l_size_orig, &
95 my_group_l_start, my_i, my_ij_pairs, my_j, my_new_group_l_size, ngroup, nspins, &
96 num_integ_group, proc_receive, proc_send, proc_shift, rec_b_size, rec_b_virtual_end, &
97 rec_b_virtual_start, rec_l_size, send_b_size, send_b_virtual_end, send_b_virtual_start, &
98 send_i, send_ij_index, send_j, start_point, tag, total_ij_pairs
99 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: integ_group_pos2color_sub, my_b_size, &
100 my_b_virtual_end, my_b_virtual_start, num_ij_pairs, sizes_array_orig, virtual
101 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ij_map
102 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: ranges_info_array
103 LOGICAL :: my_alpha_beta_case, my_beta_beta_case, &
105 REAL(kind=
dp) :: amp_fac, my_emp2_cou, my_emp2_ex, &
106 sym_fac, t_new, t_start
107 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: buffer_1d
108 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :), &
109 TARGET :: local_ab, local_ba, t_ab
110 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
111 TARGET :: local_i_al, local_j_al, y_i_ap, y_j_ap
112 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :), &
113 POINTER :: external_ab, external_i_al
114 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
119 DIMENSION(:) :: b_ia_q
121 CALL timeset(routinen, handle)
125 ALLOCATE (virtual(nspins))
126 virtual(:) = nmo - homo(:)
128 ALLOCATE (my_b_size(nspins), my_b_virtual_start(nspins), my_b_virtual_end(nspins))
131 my_b_virtual_start(ispin), my_b_virtual_end(ispin), my_b_size(ispin))
134 CALL get_group_dist(gd_array, color_sub, my_group_l_start, my_group_l_end, my_group_l_size)
141 CALL mp2_env%local_gemm_ctx%set_op_threshold_gpu(128*128*128*2)
143 CALL mp2_ri_get_integ_group_size( &
144 mp2_env, para_env, para_env_sub, gd_array, gd_b_virtual, &
145 homo, dimen_ri, unit_nr, &
146 integ_group_size, ngroup, &
147 num_integ_group, virtual, calc_forces)
151 CALL mp2_ri_create_group( &
152 para_env, para_env_sub, color_sub, &
153 gd_array%sizes, calc_forces, &
154 integ_group_size, my_group_l_end, &
155 my_group_l_size, my_group_l_size_orig, my_group_l_start, my_new_group_l_size, &
156 integ_group_pos2color_sub, sizes_array_orig, &
157 ranges_info_array, comm_exchange, comm_rep, num_integ_group)
164 CALL replicate_iak_2intgroup(bib_c(jspin)%array, comm_exchange, comm_rep, &
165 homo(jspin), gd_array%sizes, my_b_size(jspin), &
166 my_group_l_size, ranges_info_array)
170 IF (unit_nr > 0)
THEN
171 IF (nspins == 1)
THEN
172 WRITE (unit_nr, *)
"Start loop run"
173 ELSE IF (ispin == 1 .AND. jspin == 1)
THEN
174 WRITE (unit_nr, *)
"Start loop run alpha-alpha"
175 ELSE IF (ispin == 1 .AND. jspin == 2)
THEN
176 WRITE (unit_nr, *)
"Start loop run alpha-beta"
177 ELSE IF (ispin == 2 .AND. jspin == 2)
THEN
178 WRITE (unit_nr, *)
"Start loop run beta-beta"
183 my_open_shell_ss = (nspins == 2) .AND. (ispin == jspin)
189 my_beta_beta_case = .false.
190 my_alpha_beta_case = .false.
191 IF (ispin /= jspin)
THEN
192 my_alpha_beta_case = .true.
193 ELSE IF (my_open_shell_ss)
THEN
194 IF (ispin == 2) my_beta_beta_case = .true.
197 amp_fac = mp2_env%scale_S + mp2_env%scale_T
198 IF (my_alpha_beta_case .OR. my_open_shell_ss) amp_fac = mp2_env%scale_T
200 CALL mp2_ri_allocate_no_blk(local_ab, t_ab, mp2_env, homo, virtual, my_b_size, &
201 my_group_l_size, calc_forces, ispin, jspin, local_ba)
203 CALL mp2_ri_get_block_size( &
204 mp2_env, para_env, para_env_sub, gd_array, gd_b_virtual(ispin:jspin), &
205 homo(ispin:jspin), virtual(ispin:jspin), dimen_ri, unit_nr, block_size, &
206 ngroup, num_integ_group, my_open_shell_ss, calc_forces, buffer_1d)
216 CALL mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo(ispin), homo(jspin), &
217 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_ss, unit_nr)
219 ALLOCATE (num_ij_pairs(0:comm_exchange%num_pe - 1))
220 CALL comm_exchange%allgather(my_ij_pairs, num_ij_pairs)
222 max_ij_pairs = maxval(num_ij_pairs)
225 CALL mp2_ri_allocate_blk(dimen_ri, my_b_size, block_size, local_i_al, &
226 local_j_al, calc_forces, y_i_ap, y_j_ap, ispin, jspin)
228 CALL timeset(routinen//
"_RI_loop", handle2)
232 DO ij_index = 1, max_ij_pairs
235 IF (unit_nr > 0 .AND. ij_index > 1)
THEN
236 decil = ij_index*10/max_ij_pairs
237 IF (decil /= (ij_index - 1)*10/max_ij_pairs)
THEN
239 t_new = (t_new - t_start)/60.0_dp*(max_ij_pairs - ij_index + 1)/(ij_index - 1)
240 WRITE (unit_nr, fmt=
"(T3,A)")
"Percentage of finished loop: "// &
246 IF (calc_forces)
THEN
251 IF (ij_index <= my_ij_pairs)
THEN
253 ij_counter = (ij_index - min(1, color_sub))*ngroup + color_sub
254 my_i = ij_map(1, ij_counter)
255 my_j = ij_map(2, ij_counter)
256 my_block_size = ij_map(3, ij_counter)
259 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, comm_exchange%mepos), &
260 bib_c(ispin)%array(:, :, my_i:my_i + my_block_size - 1))
263 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, comm_exchange%mepos), &
264 bib_c(jspin)%array(:, :, my_j:my_j + my_block_size - 1))
267 CALL timeset(routinen//
"_comm", handle3)
268 DO proc_shift = 1, comm_exchange%num_pe - 1
269 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
270 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
272 send_ij_index = num_ij_pairs(proc_send)
276 IF (ij_index <= send_ij_index)
THEN
277 ij_counter_send = (ij_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
278 integ_group_pos2color_sub(proc_send)
279 send_i = ij_map(1, ij_counter_send)
280 send_j = ij_map(2, ij_counter_send)
283 bi_c_rec(1:rec_l_size, 1:my_b_size(ispin), 1:my_block_size) => &
284 buffer_1d(1:rec_l_size*my_b_size(ispin)*my_block_size)
286 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_i:send_i + my_block_size - 1), &
287 proc_send, bi_c_rec, proc_receive, tag)
289 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
290 bi_c_rec(:, 1:my_b_size(ispin), :))
293 bi_c_rec(1:rec_l_size, 1:my_b_size(jspin), 1:my_block_size) => &
294 buffer_1d(1:int(rec_l_size,
int_8)*my_b_size(jspin)*my_block_size)
296 CALL comm_exchange%sendrecv(bib_c(jspin)%array(:, :, send_j:send_j + my_block_size - 1), &
297 proc_send, bi_c_rec, proc_receive, tag)
299 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
300 bi_c_rec(:, 1:my_b_size(jspin), :))
306 bi_c_rec(1:rec_l_size, 1:my_b_size(ispin), 1:my_block_size) => &
307 buffer_1d(1:int(rec_l_size,
int_8)*my_b_size(ispin)*my_block_size)
309 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
311 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
312 bi_c_rec(:, 1:my_b_size(ispin), 1:my_block_size))
315 bi_c_rec(1:rec_l_size, 1:my_b_size(jspin), 1:my_block_size) => &
316 buffer_1d(1:int(rec_l_size,
int_8)*my_b_size(jspin)*my_block_size)
318 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
320 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
321 bi_c_rec(:, 1:my_b_size(jspin), 1:my_block_size))
327 CALL timestop(handle3)
330 DO iib = 1, my_block_size
331 DO jjb = 1, my_block_size
332 CALL timeset(routinen//
"_expansion", handle3)
333 associate(my_local_i_al => local_i_al(:, :, iib), my_local_j_al => local_j_al(:, :, jjb))
337 IF ((my_alpha_beta_case) .AND. (calc_forces))
THEN
341 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), my_b_size(jspin), dimen_ri, 1.0_dp, &
342 my_local_i_al, dimen_ri, my_local_j_al, dimen_ri, &
343 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), &
346 IF (my_alpha_beta_case .AND. calc_forces)
THEN
347 local_ba(my_b_virtual_start(jspin):my_b_virtual_end(jspin), :) = &
348 transpose(local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :))
351 DO proc_shift = 1, para_env_sub%num_pe - 1
352 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
353 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
355 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, &
356 rec_b_virtual_end, rec_b_size)
358 external_i_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri,
int_8)*rec_b_size)
359 external_i_al = 0.0_dp
361 CALL para_env_sub%sendrecv(my_local_i_al, proc_send, &
362 external_i_al, proc_receive, tag)
364 CALL mp2_env%local_gemm_ctx%gemm( &
365 'T',
'N', rec_b_size, my_b_size(jspin), dimen_ri, 1.0_dp, &
366 external_i_al, dimen_ri, my_local_j_al, dimen_ri, &
367 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:my_b_size(jspin)), rec_b_size)
370 IF (my_alpha_beta_case .AND. calc_forces)
THEN
372 CALL get_group_dist(gd_b_virtual(jspin), proc_receive, rec_b_virtual_start, &
373 rec_b_virtual_end, rec_b_size)
375 external_i_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri,
int_8)*rec_b_size)
376 external_i_al = 0.0_dp
378 CALL para_env_sub%sendrecv(my_local_j_al, proc_send, &
379 external_i_al, proc_receive, tag)
381 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', rec_b_size, my_b_size(ispin), dimen_ri, 1.0_dp, &
382 external_i_al, dimen_ri, my_local_i_al, dimen_ri, &
383 0.0_dp, local_ba(rec_b_virtual_start:rec_b_virtual_end, 1:my_b_size(ispin)), rec_b_size)
386 IF (my_alpha_beta_case .AND. calc_forces)
THEN
388 CALL dgemm_counter_stop(dgemm_counter, virtual(ispin), my_b_size(ispin) + my_b_size(jspin), dimen_ri)
392 CALL timestop(handle3)
397 CALL timeset(routinen//
"_ener", handle3)
400 IF (my_i == my_j) sym_fac = 1.0_dp
401 IF (my_alpha_beta_case) sym_fac = 0.5_dp
402 DO b = 1, my_b_size(jspin)
403 b_global = b + my_b_virtual_start(jspin) - 1
404 DO a = 1, virtual(ispin)
405 my_emp2_cou = my_emp2_cou - sym_fac*2.0_dp*local_ab(a, b)**2/ &
406 (eigenval(homo(ispin) + a, ispin) + eigenval(homo(jspin) + b_global, jspin) - &
407 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
414 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) t_ab = 0.0_dp
415 DO b = 1, my_b_size(ispin)
416 b_global = b + my_b_virtual_start(ispin) - 1
417 DO a = 1, my_b_size(ispin)
418 a_global = a + my_b_virtual_start(ispin) - 1
419 my_emp2_ex = my_emp2_ex + sym_fac*local_ab(a_global, b)*local_ab(b_global, a)/ &
420 (eigenval(homo(ispin) + a_global, ispin) + eigenval(homo(ispin) + b_global, ispin) - &
421 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
422 IF (calc_forces .AND. (.NOT. my_alpha_beta_case))
THEN
423 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - mp2_env%scale_T*local_ab(b_global, a))/ &
424 (eigenval(homo(ispin) + a_global, ispin) + &
425 eigenval(homo(ispin) + b_global, ispin) - &
426 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
431 DO proc_shift = 1, para_env_sub%num_pe - 1
432 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
433 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
436 rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
438 send_b_virtual_start, send_b_virtual_end, send_b_size)
440 external_ab(1:my_b_size(ispin), 1:rec_b_size) => &
441 buffer_1d(1:int(rec_b_size,
int_8)*my_b_size(ispin))
444 CALL para_env_sub%sendrecv(local_ab(send_b_virtual_start:send_b_virtual_end, 1:my_b_size(ispin)), proc_send, &
445 external_ab(1:my_b_size(ispin), 1:rec_b_size), proc_receive, tag)
447 DO b = 1, my_b_size(ispin)
448 b_global = b + my_b_virtual_start(ispin) - 1
450 a_global = a + rec_b_virtual_start - 1
451 my_emp2_ex = my_emp2_ex + sym_fac*local_ab(a_global, b)*external_ab(b, a)/ &
452 (eigenval(homo(ispin) + a_global, ispin) + eigenval(homo(ispin) + b_global, ispin) - &
453 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
454 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) &
455 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - mp2_env%scale_T*external_ab(b, a))/ &
456 (eigenval(homo(ispin) + a_global, ispin) + &
457 eigenval(homo(ispin) + b_global, ispin) - &
458 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
463 CALL timestop(handle3)
465 IF (calc_forces)
THEN
467 CALL mp2_update_p_gamma(mp2_env, para_env_sub, gd_b_virtual, &
468 eigenval, homo, dimen_ri, iib, jjb, my_b_size, &
469 my_b_virtual_end, my_b_virtual_start, my_i, my_j, virtual, &
470 local_ab, t_ab, my_local_i_al, my_local_j_al, &
471 my_open_shell_ss, y_i_ap(:, :, iib), y_j_ap(:, :, jjb), local_ba, &
472 ispin, jspin, dgemm_counter, buffer_1d)
485 CALL timeset(routinen//
"_comm", handle3)
488 DO proc_shift = 1, comm_exchange%num_pe - 1
489 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
490 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
492 send_ij_index = num_ij_pairs(proc_send)
494 IF (ij_index <= send_ij_index)
THEN
496 ij_counter_send = (ij_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
497 integ_group_pos2color_sub(proc_send)
498 send_i = ij_map(1, ij_counter_send)
499 send_j = ij_map(2, ij_counter_send)
502 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i:send_i + my_block_size - 1), &
505 CALL comm_exchange%send(bib_c(jspin)%array(:, :, send_j:send_j + my_block_size - 1), &
509 CALL timestop(handle3)
513 IF (calc_forces)
THEN
514 CALL mp2_redistribute_gamma(mp2_env%ri_grad%Gamma_P_ia(ispin)%array, ij_index, my_b_size(ispin), &
515 my_block_size, my_group_l_size, my_i, my_ij_pairs, ngroup, &
516 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
517 ij_map, ranges_info_array, y_i_ap(:, :, 1:my_block_size), comm_exchange, &
518 gd_array%sizes, 1, buffer_1d)
519 CALL mp2_redistribute_gamma(mp2_env%ri_grad%Gamma_P_ia(jspin)%array, ij_index, my_b_size(jspin), &
520 my_block_size, my_group_l_size, my_j, my_ij_pairs, ngroup, &
521 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
522 ij_map, ranges_info_array, y_j_ap(:, :, 1:my_block_size), comm_exchange, &
523 gd_array%sizes, 2, buffer_1d)
527 CALL timestop(handle2)
529 DEALLOCATE (local_i_al)
530 DEALLOCATE (local_j_al)
532 DEALLOCATE (num_ij_pairs)
533 DEALLOCATE (local_ab)
535 IF (calc_forces)
THEN
538 IF (
ALLOCATED(t_ab))
THEN
541 DEALLOCATE (local_ba)
550 CALL quasi_degenerate_p_ij( &
551 mp2_env, eigenval(:, ispin:jspin), homo(ispin:jspin), virtual(ispin:jspin), my_open_shell_ss, &
552 my_beta_beta_case, bib_c(ispin:jspin), unit_nr, dimen_ri, &
553 my_b_size(ispin:jspin), ngroup, my_group_l_size, &
554 color_sub, ranges_info_array, comm_exchange, para_env_sub, para_env, &
555 my_b_virtual_start(ispin:jspin), my_b_virtual_end(ispin:jspin), gd_array%sizes, gd_b_virtual(ispin:jspin), &
556 integ_group_pos2color_sub, dgemm_counter, buffer_1d)
560 DEALLOCATE (buffer_1d)
565 IF (calc_forces .AND. jspin == nspins)
THEN
566 IF (.NOT.
ALLOCATED(b_ia_q))
ALLOCATE (b_ia_q(nspins))
567 ALLOCATE (b_ia_q(ispin)%array(homo(ispin), my_b_size(ispin), my_group_l_size_orig))
568 b_ia_q(ispin)%array = 0.0_dp
569 DO jjb = 1, homo(ispin)
570 DO iib = 1, my_b_size(ispin)
571 b_ia_q(ispin)%array(jjb, iib, 1:my_group_l_size_orig) = &
572 bib_c(ispin)%array(1:my_group_l_size_orig, iib, jjb)
575 DEALLOCATE (bib_c(ispin)%array)
578 ALLOCATE (bib_c(ispin)%array(my_b_size(ispin), homo(ispin), my_group_l_size_orig))
579 DO proc_shift = 1, comm_rep%num_pe - 1
581 proc_send =
modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
582 proc_receive =
modulo(comm_rep%mepos + proc_shift, comm_rep%num_pe)
584 start_point = ranges_info_array(3, proc_shift, comm_exchange%mepos)
585 end_point = ranges_info_array(4, proc_shift, comm_exchange%mepos)
587 CALL comm_rep%sendrecv(mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, start_point:end_point), &
588 proc_send, bib_c(ispin)%array, proc_receive, tag)
591 mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig) = &
592 mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig) &
593 + bib_c(ispin)%array(:, :, :)
597 bib_c(ispin)%array(:, :, :) = mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig)
598 DEALLOCATE (mp2_env%ri_grad%Gamma_P_ia(ispin)%array)
599 CALL move_alloc(bib_c(ispin)%array, mp2_env%ri_grad%Gamma_P_ia(ispin)%array)
600 ELSE IF (jspin == nspins)
THEN
601 DEALLOCATE (bib_c(ispin)%array)
604 CALL para_env%sum(my_emp2_cou)
605 CALL para_env%sum(my_emp2_ex)
607 IF (my_open_shell_ss .OR. my_alpha_beta_case)
THEN
608 IF (my_alpha_beta_case)
THEN
609 emp2_s = emp2_s + my_emp2_cou
610 emp2_cou = emp2_cou + my_emp2_cou
612 my_emp2_cou = my_emp2_cou*0.25_dp
613 my_emp2_ex = my_emp2_ex*0.5_dp
614 emp2_t = emp2_t + my_emp2_cou + my_emp2_ex
615 emp2_cou = emp2_cou + my_emp2_cou
616 emp2_ex = emp2_ex + my_emp2_ex
619 emp2_cou = emp2_cou + my_emp2_cou
620 emp2_ex = emp2_ex + my_emp2_ex
626 DEALLOCATE (integ_group_pos2color_sub)
627 DEALLOCATE (ranges_info_array)
629 CALL comm_exchange%free()
632 IF (calc_forces)
THEN
634 DEALLOCATE (gd_array%sizes)
635 iib =
SIZE(sizes_array_orig)
636 ALLOCATE (gd_array%sizes(0:iib - 1))
637 gd_array%sizes(:) = sizes_array_orig
638 DEALLOCATE (sizes_array_orig)
641 my_group_l_size = my_group_l_size_orig
645 CALL complete_gamma(mp2_env, b_ia_q(ispin)%array, dimen_ri, homo(ispin), &
646 virtual(ispin), para_env, para_env_sub, ngroup, &
647 my_group_l_size, my_group_l_start, my_group_l_end, &
648 my_b_size(ispin), my_b_virtual_start(ispin), &
649 gd_array, gd_b_virtual(ispin), &
654 IF (nspins == 1) mp2_env%ri_grad%P_ab(1)%array(:, :) = mp2_env%ri_grad%P_ab(1)%array(:, :)*2.0_dp
657 CALL comm%from_split(para_env, para_env_sub%mepos)
660 CALL comm%sum(mp2_env%ri_grad%P_ab(ispin)%array)
662 CALL para_env%sum(mp2_env%ri_grad%P_ij(ispin)%array)
668 CALL release_group_dist(gd_array)
670 IF (
ALLOCATED(bib_c(ispin)%array))
DEALLOCATE (bib_c(ispin)%array)
671 CALL release_group_dist(gd_b_virtual(ispin))
675 IF (calc_forces)
DEALLOCATE (mp2_env%ri_grad%PQ_half)
676 IF (calc_forces .AND. .NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) &
677 DEALLOCATE (mp2_env%ri_grad%operator_half)
679 CALL dgemm_counter_write(dgemm_counter, para_env)
682 CALL mp2_env%local_gemm_ctx%destroy()
683 CALL timestop(handle)
693 SUBROUTINE fill_local_i_al(local_i_aL, ranges_info_array, BIb_C_rec)
694 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: local_i_al
695 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: ranges_info_array
696 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(IN) :: bib_c_rec
698 CHARACTER(LEN=*),
PARAMETER :: routinen =
'fill_local_i_aL'
700 INTEGER :: end_point, handle, irep, lend_pos, &
701 lstart_pos, start_point
703 CALL timeset(routinen, handle)
705 DO irep = 1,
SIZE(ranges_info_array, 2)
706 lstart_pos = ranges_info_array(1, irep)
707 lend_pos = ranges_info_array(2, irep)
708 start_point = ranges_info_array(3, irep)
709 end_point = ranges_info_array(4, irep)
713 local_i_al(lstart_pos:lend_pos, :, :) = bib_c_rec(start_point:end_point, :, :)
717 CALL timestop(handle)
719 END SUBROUTINE fill_local_i_al
727 SUBROUTINE fill_local_i_al_2d(local_i_aL, ranges_info_array, BIb_C_rec)
728 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: local_i_al
729 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: ranges_info_array
730 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: bib_c_rec
732 CHARACTER(LEN=*),
PARAMETER :: routinen =
'fill_local_i_aL_2D'
734 INTEGER :: end_point, handle, irep, lend_pos, &
735 lstart_pos, start_point
737 CALL timeset(routinen, handle)
739 DO irep = 1,
SIZE(ranges_info_array, 2)
740 lstart_pos = ranges_info_array(1, irep)
741 lend_pos = ranges_info_array(2, irep)
742 start_point = ranges_info_array(3, irep)
743 end_point = ranges_info_array(4, irep)
747 local_i_al(lstart_pos:lend_pos, :) = bib_c_rec(start_point:end_point, :)
751 CALL timestop(handle)
753 END SUBROUTINE fill_local_i_al_2d
766 SUBROUTINE replicate_iak_2intgroup(BIb_C, comm_exchange, comm_rep, homo, sizes_array, my_B_size, &
767 my_group_L_size, ranges_info_array)
768 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
769 INTENT(INOUT) :: bib_c
770 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange, comm_rep
771 INTEGER,
INTENT(IN) :: homo
772 INTEGER,
DIMENSION(:),
INTENT(IN) :: sizes_array
773 INTEGER,
INTENT(IN) :: my_b_size, my_group_l_size
774 INTEGER,
DIMENSION(:, 0:, 0:),
INTENT(IN) :: ranges_info_array
776 CHARACTER(LEN=*),
PARAMETER :: routinen =
'replicate_iaK_2intgroup'
778 INTEGER :: end_point, handle, max_l_size, &
779 proc_receive, proc_shift, start_point
780 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: bib_c_copy
781 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :, :) :: bib_c_gather
783 CALL timeset(routinen, handle)
787 max_l_size = maxval(sizes_array)
789 ALLOCATE (bib_c_copy(max_l_size, my_b_size, homo))
791 bib_c_copy(1:
SIZE(bib_c, 1), 1:my_b_size, 1:homo) = bib_c
795 ALLOCATE (bib_c_gather(max_l_size, my_b_size, homo, 0:comm_rep%num_pe - 1))
796 bib_c_gather = 0.0_dp
798 CALL comm_rep%allgather(bib_c_copy, bib_c_gather)
800 DEALLOCATE (bib_c_copy)
802 ALLOCATE (bib_c(my_group_l_size, my_b_size, homo))
806 DO proc_shift = 0, comm_rep%num_pe - 1
807 proc_receive =
modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
809 start_point = ranges_info_array(3, proc_shift, comm_exchange%mepos)
810 end_point = ranges_info_array(4, proc_shift, comm_exchange%mepos)
812 bib_c(start_point:end_point, 1:my_b_size, 1:homo) = &
813 bib_c_gather(1:end_point - start_point + 1, 1:my_b_size, 1:homo, proc_receive)
817 DEALLOCATE (bib_c_gather)
819 CALL timestop(handle)
821 END SUBROUTINE replicate_iak_2intgroup
837 SUBROUTINE mp2_ri_allocate_no_blk(local_ab, t_ab, mp2_env, homo, virtual, my_B_size, &
838 my_group_L_size, calc_forces, ispin, jspin, local_ba)
839 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
840 INTENT(OUT) :: local_ab, t_ab
841 TYPE(mp2_type) :: mp2_env
842 INTEGER,
INTENT(IN) :: homo(2), virtual(2), my_b_size(2), &
844 LOGICAL,
INTENT(IN) :: calc_forces
845 INTEGER,
INTENT(IN) :: ispin, jspin
846 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
847 INTENT(OUT) :: local_ba
849 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_allocate_no_blk'
853 CALL timeset(routinen, handle)
855 ALLOCATE (local_ab(virtual(ispin), my_b_size(jspin)))
858 IF (calc_forces)
THEN
859 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%P_ij(jspin)%array))
THEN
860 ALLOCATE (mp2_env%ri_grad%P_ij(jspin)%array(homo(ispin), homo(ispin)))
861 mp2_env%ri_grad%P_ij(jspin)%array = 0.0_dp
863 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%P_ab(jspin)%array))
THEN
864 ALLOCATE (mp2_env%ri_grad%P_ab(jspin)%array(my_b_size(jspin), virtual(jspin)))
865 mp2_env%ri_grad%P_ab(jspin)%array = 0.0_dp
867 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%Gamma_P_ia(jspin)%array))
THEN
868 ALLOCATE (mp2_env%ri_grad%Gamma_P_ia(jspin)%array(my_b_size(jspin), homo(jspin), my_group_l_size))
869 mp2_env%ri_grad%Gamma_P_ia(jspin)%array = 0.0_dp
872 IF (ispin == jspin)
THEN
874 ALLOCATE (t_ab(virtual(ispin), my_b_size(jspin)))
877 ALLOCATE (local_ba(1, 1))
880 ALLOCATE (local_ba(virtual(jspin), my_b_size(ispin)))
885 CALL timestop(handle)
887 END SUBROUTINE mp2_ri_allocate_no_blk
902 SUBROUTINE mp2_ri_allocate_blk(dimen_RI, my_B_size, block_size, &
903 local_i_aL, local_j_aL, calc_forces, &
904 Y_i_aP, Y_j_aP, ispin, jspin)
905 INTEGER,
INTENT(IN) :: dimen_ri, my_b_size(2), block_size
906 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
907 INTENT(OUT) :: local_i_al, local_j_al
908 LOGICAL,
INTENT(IN) :: calc_forces
909 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
910 INTENT(OUT) :: y_i_ap, y_j_ap
911 INTEGER,
INTENT(IN) :: ispin, jspin
913 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_allocate_blk'
917 CALL timeset(routinen, handle)
919 ALLOCATE (local_i_al(dimen_ri, my_b_size(ispin), block_size))
921 ALLOCATE (local_j_al(dimen_ri, my_b_size(jspin), block_size))
924 IF (calc_forces)
THEN
925 ALLOCATE (y_i_ap(my_b_size(ispin), dimen_ri, block_size))
929 ALLOCATE (y_j_ap(my_b_size(jspin), dimen_ri, block_size))
934 CALL timestop(handle)
936 END SUBROUTINE mp2_ri_allocate_blk
952 SUBROUTINE mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo, homo_beta, &
953 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_SS, unit_nr)
954 LOGICAL,
INTENT(IN) :: my_alpha_beta_case
955 INTEGER,
INTENT(OUT) :: total_ij_pairs
956 INTEGER,
INTENT(IN) :: homo, homo_beta, block_size, ngroup
957 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(OUT) :: ij_map
958 INTEGER,
INTENT(IN) :: color_sub
959 INTEGER,
INTENT(OUT) :: my_ij_pairs
960 LOGICAL,
INTENT(IN) :: my_open_shell_ss
961 INTEGER,
INTENT(IN) :: unit_nr
963 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_communication'
965 INTEGER :: assigned_blocks, first_i_block, first_j_block, handle, iib, ij_block_counter, &
966 ij_counter, jjb, last_i_block, last_j_block, num_block_per_group, num_ij_blocks, &
967 num_ij_blocks_beta, total_ij_block, total_ij_pairs_blocks
968 LOGICAL,
ALLOCATABLE,
DIMENSION(:, :) :: ij_marker
973 CALL timeset(routinen, handle)
975 IF (.NOT. my_open_shell_ss .AND. .NOT. my_alpha_beta_case)
THEN
976 total_ij_pairs = homo*(1 + homo)/2
977 num_ij_blocks = homo/block_size - 1
980 last_i_block = block_size*(num_ij_blocks - 1)
982 first_j_block = block_size + 1
983 last_j_block = block_size*(num_ij_blocks + 1)
986 DO iib = first_i_block, last_i_block, block_size
987 DO jjb = iib + block_size, last_j_block, block_size
988 ij_block_counter = ij_block_counter + 1
992 total_ij_block = ij_block_counter
993 num_block_per_group = total_ij_block/ngroup
994 assigned_blocks = num_block_per_group*ngroup
996 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
998 ALLOCATE (ij_marker(homo, homo))
1000 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1004 DO iib = first_i_block, last_i_block, block_size
1005 DO jjb = iib + block_size, last_j_block, block_size
1006 IF (ij_counter + 1 > assigned_blocks)
EXIT
1007 ij_counter = ij_counter + 1
1008 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1009 ij_map(1, ij_counter) = iib
1010 ij_map(2, ij_counter) = jjb
1011 ij_map(3, ij_counter) = block_size
1012 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1017 IF (ij_marker(iib, jjb))
THEN
1018 ij_counter = ij_counter + 1
1019 ij_map(1, ij_counter) = iib
1020 ij_map(2, ij_counter) = jjb
1021 ij_map(3, ij_counter) = 1
1022 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1026 DEALLOCATE (ij_marker)
1028 ELSE IF (.NOT. my_alpha_beta_case)
THEN
1031 total_ij_pairs = homo*(homo - 1)/2
1032 num_ij_blocks = (homo - 1)/block_size - 1
1035 last_i_block = block_size*(num_ij_blocks - 1)
1038 first_j_block = block_size + 2
1039 last_j_block = block_size*(num_ij_blocks + 1) + 1
1041 ij_block_counter = 0
1042 DO iib = first_i_block, last_i_block, block_size
1043 DO jjb = iib + block_size + 1, last_j_block, block_size
1044 ij_block_counter = ij_block_counter + 1
1048 total_ij_block = ij_block_counter
1049 num_block_per_group = total_ij_block/ngroup
1050 assigned_blocks = num_block_per_group*ngroup
1052 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1054 ALLOCATE (ij_marker(homo, homo))
1056 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1060 DO iib = first_i_block, last_i_block, block_size
1061 DO jjb = iib + block_size + 1, last_j_block, block_size
1062 IF (ij_counter + 1 > assigned_blocks)
EXIT
1063 ij_counter = ij_counter + 1
1064 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1065 ij_map(1, ij_counter) = iib
1066 ij_map(2, ij_counter) = jjb
1067 ij_map(3, ij_counter) = block_size
1068 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1072 DO jjb = iib + 1, homo
1073 IF (ij_marker(iib, jjb))
THEN
1074 ij_counter = ij_counter + 1
1075 ij_map(1, ij_counter) = iib
1076 ij_map(2, ij_counter) = jjb
1077 ij_map(3, ij_counter) = 1
1078 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1082 DEALLOCATE (ij_marker)
1085 total_ij_pairs = homo*homo_beta
1086 num_ij_blocks = homo/block_size
1087 num_ij_blocks_beta = homo_beta/block_size
1090 last_i_block = block_size*(num_ij_blocks - 1)
1093 last_j_block = block_size*(num_ij_blocks_beta - 1)
1095 ij_block_counter = 0
1096 DO iib = first_i_block, last_i_block, block_size
1097 DO jjb = first_j_block, last_j_block, block_size
1098 ij_block_counter = ij_block_counter + 1
1102 total_ij_block = ij_block_counter
1103 num_block_per_group = total_ij_block/ngroup
1104 assigned_blocks = num_block_per_group*ngroup
1106 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1108 ALLOCATE (ij_marker(homo, homo_beta))
1110 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1114 DO iib = first_i_block, last_i_block, block_size
1115 DO jjb = first_j_block, last_j_block, block_size
1116 IF (ij_counter + 1 > assigned_blocks)
EXIT
1117 ij_counter = ij_counter + 1
1118 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1119 ij_map(1, ij_counter) = iib
1120 ij_map(2, ij_counter) = jjb
1121 ij_map(3, ij_counter) = block_size
1122 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1126 DO jjb = 1, homo_beta
1127 IF (ij_marker(iib, jjb))
THEN
1128 ij_counter = ij_counter + 1
1129 ij_map(1, ij_counter) = iib
1130 ij_map(2, ij_counter) = jjb
1131 ij_map(3, ij_counter) = 1
1132 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1136 DEALLOCATE (ij_marker)
1139 IF (unit_nr > 0)
THEN
1140 IF (block_size == 1)
THEN
1141 WRITE (unit=unit_nr, fmt=
"(T3,A,T66,F15.1)") &
1142 "RI_INFO| Percentage of ij pairs communicated with block size 1:", 100.0_dp
1144 WRITE (unit=unit_nr, fmt=
"(T3,A,T66,F15.1)") &
1145 "RI_INFO| Percentage of ij pairs communicated with block size 1:", &
1146 100.0_dp*real((total_ij_pairs - assigned_blocks*(block_size**2)), kind=dp)/real(total_ij_pairs, kind=dp)
1148 CALL m_flush(unit_nr)
1151 CALL timestop(handle)
1153 END SUBROUTINE mp2_ri_communication
1175 SUBROUTINE mp2_ri_create_group(para_env, para_env_sub, color_sub, &
1176 sizes_array, calc_forces, &
1177 integ_group_size, my_group_L_end, &
1178 my_group_L_size, my_group_L_size_orig, my_group_L_start, my_new_group_L_size, &
1179 integ_group_pos2color_sub, &
1180 sizes_array_orig, ranges_info_array, comm_exchange, comm_rep, num_integ_group)
1181 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1182 INTEGER,
INTENT(IN) :: color_sub
1183 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(INOUT) :: sizes_array
1184 LOGICAL,
INTENT(IN) :: calc_forces
1185 INTEGER,
INTENT(IN) :: integ_group_size, my_group_l_end
1186 INTEGER,
INTENT(INOUT) :: my_group_l_size
1187 INTEGER,
INTENT(OUT) :: my_group_l_size_orig
1188 INTEGER,
INTENT(IN) :: my_group_l_start
1189 INTEGER,
INTENT(INOUT) :: my_new_group_l_size
1190 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT) :: integ_group_pos2color_sub, &
1192 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :), &
1193 INTENT(OUT) :: ranges_info_array
1194 TYPE(mp_comm_type),
INTENT(OUT) :: comm_exchange, comm_rep
1195 INTEGER,
INTENT(IN) :: num_integ_group
1197 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_create_group'
1199 INTEGER :: handle, iib, proc_receive, proc_shift, &
1201 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: new_sizes_array, rep_ends_array, &
1202 rep_sizes_array, rep_starts_array
1203 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: my_info
1205 CALL timeset(routinen, handle)
1207 sub_sub_color = para_env_sub%mepos*num_integ_group + color_sub/integ_group_size
1208 CALL comm_exchange%from_split(para_env, sub_sub_color)
1211 sub_sub_color = para_env_sub%mepos*comm_exchange%num_pe + comm_exchange%mepos
1212 CALL comm_rep%from_split(para_env, sub_sub_color)
1218 ALLOCATE (rep_ends_array(0:comm_rep%num_pe - 1))
1219 ALLOCATE (rep_starts_array(0:comm_rep%num_pe - 1))
1220 ALLOCATE (rep_sizes_array(0:comm_rep%num_pe - 1))
1222 CALL comm_rep%allgather(my_group_l_size, rep_sizes_array)
1223 CALL comm_rep%allgather(my_group_l_start, rep_starts_array)
1224 CALL comm_rep%allgather(my_group_l_end, rep_ends_array)
1227 my_new_group_l_size = my_group_l_size
1230 ALLOCATE (my_info(4, 0:comm_rep%num_pe - 1))
1231 my_info(1, 0) = my_group_l_start
1232 my_info(2, 0) = my_group_l_end
1234 my_info(4, 0) = my_group_l_size
1236 DO proc_shift = 1, comm_rep%num_pe - 1
1237 proc_receive =
modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
1239 my_new_group_l_size = my_new_group_l_size + rep_sizes_array(proc_receive)
1241 my_info(1, proc_shift) = rep_starts_array(proc_receive)
1242 my_info(2, proc_shift) = rep_ends_array(proc_receive)
1243 my_info(3, proc_shift) = my_info(4, proc_shift - 1) + 1
1244 my_info(4, proc_shift) = my_new_group_l_size
1248 ALLOCATE (new_sizes_array(0:comm_exchange%num_pe - 1))
1249 ALLOCATE (ranges_info_array(4, 0:comm_rep%num_pe - 1, 0:comm_exchange%num_pe - 1))
1250 CALL comm_exchange%allgather(my_new_group_l_size, new_sizes_array)
1251 CALL comm_exchange%allgather(my_info, ranges_info_array)
1253 DEALLOCATE (rep_sizes_array)
1254 DEALLOCATE (rep_starts_array)
1255 DEALLOCATE (rep_ends_array)
1257 ALLOCATE (integ_group_pos2color_sub(0:comm_exchange%num_pe - 1))
1258 CALL comm_exchange%allgather(color_sub, integ_group_pos2color_sub)
1260 IF (calc_forces)
THEN
1261 iib =
SIZE(sizes_array)
1262 ALLOCATE (sizes_array_orig(0:iib - 1))
1263 sizes_array_orig(:) = sizes_array
1266 my_group_l_size_orig = my_group_l_size
1267 my_group_l_size = my_new_group_l_size
1268 DEALLOCATE (sizes_array)
1270 ALLOCATE (sizes_array(0:integ_group_size - 1))
1271 sizes_array(:) = new_sizes_array
1273 DEALLOCATE (new_sizes_array)
1275 CALL timestop(handle)
1277 END SUBROUTINE mp2_ri_create_group
1295 SUBROUTINE mp2_ri_get_integ_group_size(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
1296 homo, dimen_RI, unit_nr, &
1298 ngroup, num_integ_group, &
1299 virtual, calc_forces)
1300 TYPE(mp2_type) :: mp2_env
1301 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1302 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_array
1303 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
1304 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
1305 INTEGER,
INTENT(IN) :: dimen_ri, unit_nr
1306 INTEGER,
INTENT(OUT) :: integ_group_size, ngroup, num_integ_group
1307 INTEGER,
DIMENSION(:),
INTENT(IN) :: virtual
1308 LOGICAL,
INTENT(IN) :: calc_forces
1310 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_get_integ_group_size'
1312 INTEGER :: block_size, handle, iib, &
1313 max_repl_group_size, &
1314 min_integ_group_size
1315 INTEGER(KIND=int_8) :: mem
1316 LOGICAL :: calc_group_size
1317 REAL(kind=dp) :: factor, mem_base, mem_min, mem_per_blk, &
1318 mem_per_repl, mem_per_repl_blk, &
1321 CALL timeset(routinen, handle)
1323 ngroup = para_env%num_pe/para_env_sub%num_pe
1325 calc_group_size = mp2_env%ri_mp2%number_integration_groups <= 0
1326 IF (.NOT. calc_group_size)
THEN
1327 IF (mod(ngroup, mp2_env%ri_mp2%number_integration_groups) /= 0) calc_group_size = .true.
1330 IF (calc_group_size)
THEN
1332 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
1333 CALL para_env%min(mem_real)
1334 mem_real = min(mem_real, mp2_env%mp2_memory)
1337 mem_per_blk = 0.0_dp
1338 mem_per_repl = 0.0_dp
1339 mem_per_repl_blk = 0.0_dp
1342 mem_per_repl = mem_per_repl + maxval(max(real(homo, kind=dp)*maxsize(gd_array), real(dimen_ri, kind=dp))* &
1343 maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1345 mem_per_repl = mem_per_repl + sum(real(homo, kind=dp)*maxsize(gd_b_virtual))*maxsize(gd_array)*8.0_dp/(1024**2)
1347 mem_per_repl_blk = mem_per_repl_blk + real(maxval(maxsize(gd_b_virtual)), kind=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
1349 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*real(dimen_ri, kind=dp)*8.0_dp/(1024**2)
1351 mem_base = mem_base + maxval(real(virtual, kind=dp)*maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1353 mem_base = mem_base + real(max(dimen_ri, maxval(virtual)), kind=dp)*maxval(maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1355 IF (calc_forces)
THEN
1357 mem_per_repl = mem_per_repl + sum(real(homo, kind=dp)*maxsize(gd_array)* &
1358 maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1360 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*dimen_ri*8.0_dp/(1024**2)
1362 mem_base = mem_base + real(maxval(maxsize(gd_b_virtual)), kind=dp)*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1364 mem_base = mem_base + sum(real(homo, kind=dp)*homo)*8.0_dp/(1024**2)
1366 mem_base = mem_base + sum(real(virtual, kind=dp)*maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1368 mem_base = mem_base + real(max(dimen_ri, maxval(virtual)), kind=dp)*maxval(maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1372 block_size = max(1, min(floor(sqrt(real(minval(homo), kind=dp))), floor(minval(homo)/sqrt(2.0_dp*ngroup))))
1373 IF (mp2_env%ri_mp2%block_size > 0) block_size = mp2_env%ri_mp2%block_size
1375 mem_min = mem_base + mem_per_repl + (mem_per_blk + mem_per_repl_blk)*block_size
1377 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T68,F9.2,A4)')
'RI_INFO| Minimum available memory per MPI process:', &
1379 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T68,F9.2,A4)')
'RI_INFO| Minimum required memory per MPI process:', &
1400 integ_group_size = ngroup
1403 factor = real(sum(homo*virtual), kind=dp) &
1404 - sum((real(maxval(homo), kind=dp)/block_size + block_size - 2.0_dp)*homo*virtual)/ngroup
1405 IF (
SIZE(homo) == 2) factor = factor - 2.0_dp*product(homo)/block_size/ngroup*sum(homo*virtual)
1407 IF (factor <= 0.0_dp)
THEN
1409 max_repl_group_size = min(max(floor((mem_real - mem_base - mem_per_blk*block_size)/ &
1410 (mem_per_repl + mem_per_repl_blk*block_size)), 1), ngroup)
1412 min_integ_group_size = ngroup/max_repl_group_size
1415 DO iib = max(min(min_integ_group_size, ngroup), 1), ngroup
1417 IF (mod(ngroup, iib) == 0)
THEN
1418 integ_group_size = iib
1421 integ_group_size = integ_group_size + 1
1425 integ_group_size = ngroup/mp2_env%ri_mp2%number_integration_groups
1428 IF (unit_nr > 0)
THEN
1429 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
1430 "RI_INFO| Group size for integral replication:", integ_group_size*para_env_sub%num_pe
1431 CALL m_flush(unit_nr)
1434 num_integ_group = ngroup/integ_group_size
1436 CALL timestop(handle)
1438 END SUBROUTINE mp2_ri_get_integ_group_size
1458 SUBROUTINE mp2_ri_get_block_size(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
1459 homo, virtual, dimen_RI, unit_nr, &
1460 block_size, ngroup, num_integ_group, &
1461 my_open_shell_ss, calc_forces, buffer_1D)
1462 TYPE(mp2_type) :: mp2_env
1463 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1464 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_array
1465 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
1466 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
1467 INTEGER,
INTENT(IN) :: dimen_ri, unit_nr
1468 INTEGER,
INTENT(OUT) :: block_size, ngroup
1469 INTEGER,
INTENT(IN) :: num_integ_group
1470 LOGICAL,
INTENT(IN) :: my_open_shell_ss, calc_forces
1471 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1472 INTENT(OUT) :: buffer_1d
1474 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_get_block_size'
1476 INTEGER :: best_block_size, handle, num_ij_blocks
1477 INTEGER(KIND=int_8) :: buffer_size, mem
1478 REAL(kind=dp) :: mem_base, mem_per_blk, mem_per_repl_blk, &
1481 CALL timeset(routinen, handle)
1483 ngroup = para_env%num_pe/para_env_sub%num_pe
1486 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
1487 CALL para_env%min(mem_real)
1490 mem_per_blk = 0.0_dp
1491 mem_per_repl_blk = 0.0_dp
1494 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1496 mem_per_repl_blk = mem_per_repl_blk + real(maxval(maxsize(gd_b_virtual)), kind=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
1498 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*real(dimen_ri, kind=dp)*8.0_dp/(1024**2)
1500 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1502 IF (calc_forces)
THEN
1504 mem_per_blk = mem_per_blk + 3.0_dp*maxval(maxsize(gd_b_virtual))*dimen_ri*8.0_dp/(1024**2)
1506 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1512 IF (mp2_env%ri_mp2%block_size > 0)
THEN
1513 best_block_size = mp2_env%ri_mp2%block_size
1515 best_block_size = max(floor((mem_real - mem_base)/(mem_per_blk + mem_per_repl_blk*ngroup/num_integ_group)), 1)
1518 IF (
SIZE(homo) == 1)
THEN
1519 IF (.NOT. my_open_shell_ss)
THEN
1520 num_ij_blocks = (homo(1)/best_block_size)
1521 num_ij_blocks = (num_ij_blocks*num_ij_blocks - num_ij_blocks)/2
1523 num_ij_blocks = ((homo(1) - 1)/best_block_size)
1524 num_ij_blocks = (num_ij_blocks*num_ij_blocks - num_ij_blocks)/2
1527 num_ij_blocks = product(homo/best_block_size)
1530 IF ((num_ij_blocks >= ngroup .AND. num_ij_blocks > 0) .OR. best_block_size == 1)
THEN
1533 best_block_size = best_block_size - 1
1537 IF (
SIZE(homo) == 1)
THEN
1538 IF (my_open_shell_ss)
THEN
1541 best_block_size = min(floor(sqrt(real(homo(1) - 1, kind=dp))), best_block_size)
1544 best_block_size = min(floor(sqrt(real(homo(1), kind=dp))), best_block_size)
1548 block_size = max(1, best_block_size)
1550 IF (unit_nr > 0)
THEN
1551 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
1552 "RI_INFO| Block size:", block_size
1553 CALL m_flush(unit_nr)
1557 buffer_size = max(int(maxsize(gd_array), kind=int_8)*block_size, int(max(dimen_ri, maxval(virtual)), kind=int_8)) &
1558 *maxval(maxsize(gd_b_virtual))
1560 IF (calc_forces) buffer_size = buffer_size*2
1561 ALLOCATE (buffer_1d(buffer_size))
1563 CALL timestop(handle)
1565 END SUBROUTINE mp2_ri_get_block_size
1596 SUBROUTINE mp2_update_p_gamma(mp2_env, para_env_sub, gd_B_virtual, &
1597 Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, &
1598 my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, local_ab, &
1599 t_ab, my_local_i_aL, my_local_j_aL, open_ss, Y_i_aP, Y_j_aP, &
1600 local_ba, ispin, jspin, dgemm_counter, buffer_1D)
1601 TYPE(mp2_type) :: mp2_env
1602 TYPE(mp_para_env_type),
INTENT(IN) :: para_env_sub
1603 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
1604 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
1605 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
1606 INTEGER,
INTENT(IN) :: dimen_ri, iib, jjb
1607 INTEGER,
DIMENSION(:),
INTENT(IN) :: my_b_size, my_b_virtual_end, &
1609 INTEGER,
INTENT(IN) :: my_i, my_j
1610 INTEGER,
DIMENSION(:),
INTENT(IN) :: virtual
1611 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1612 INTENT(INOUT),
TARGET :: local_ab
1613 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1614 INTENT(IN),
TARGET :: t_ab, my_local_i_al, my_local_j_al
1615 LOGICAL,
INTENT(IN) :: open_ss
1616 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1617 INTENT(INOUT),
TARGET :: y_i_ap, y_j_ap, local_ba
1618 INTEGER,
INTENT(IN) :: ispin, jspin
1619 TYPE(dgemm_counter_type),
INTENT(INOUT) :: dgemm_counter
1620 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:),
TARGET :: buffer_1d
1622 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_update_P_gamma'
1624 INTEGER :: a, b, b_global, handle, proc_receive, proc_send, proc_shift, rec_b_size, &
1625 rec_b_virtual_end, rec_b_virtual_start, send_b_size, send_b_virtual_end, &
1626 send_b_virtual_start
1627 INTEGER(KIND=int_8) :: offset
1628 LOGICAL :: alpha_beta
1629 REAL(kind=dp) :: factor, p_ij_diag
1630 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1631 POINTER :: external_ab, send_ab
1633 CALL timeset(routinen//
"_Pia", handle)
1635 alpha_beta = .NOT. (ispin == jspin)
1642 DO b = 1, my_b_size(jspin)
1643 b_global = b + my_b_virtual_start(jspin) - 1
1644 DO a = 1, virtual(ispin)
1645 local_ab(a, b) = -local_ab(a, b)/ &
1646 (eigenval(homo(ispin) + a, ispin) + eigenval(homo(jspin) + b_global, jspin) - &
1647 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
1650 IF (.NOT. (alpha_beta))
THEN
1651 p_ij_diag = -sum(local_ab*t_ab)*factor
1654 p_ij_diag = -sum(local_ab*local_ab)*mp2_env%scale_S
1656 DO b = 1, my_b_size(ispin)
1657 b_global = b + my_b_virtual_start(ispin) - 1
1658 DO a = 1, virtual(jspin)
1659 local_ba(a, b) = -local_ba(a, b)/ &
1660 (eigenval(homo(jspin) + a, jspin) + eigenval(homo(ispin) + b_global, ispin) - &
1661 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
1668 CALL dgemm_counter_start(dgemm_counter)
1669 IF (.NOT. (alpha_beta))
THEN
1670 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), my_b_size(ispin), virtual(ispin), 1.0_dp, &
1671 t_ab, virtual(ispin), local_ab, virtual(ispin), &
1672 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, &
1673 my_b_virtual_start(ispin):my_b_virtual_end(ispin)), my_b_size(ispin))
1674 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) = &
1675 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) + p_ij_diag
1677 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), my_b_size(ispin), virtual(jspin), mp2_env%scale_S, &
1678 local_ba, virtual(jspin), local_ba, virtual(jspin), 1.0_dp, &
1679 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_virtual_start(ispin):my_b_virtual_end(ispin)), my_b_size(ispin))
1681 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) = &
1682 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) + p_ij_diag
1684 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(jspin), my_b_size(jspin), virtual(ispin), mp2_env%scale_S, &
1685 local_ab, virtual(ispin), local_ab, virtual(ispin), 1.0_dp, &
1686 mp2_env%ri_grad%P_ab(jspin)%array(:, my_b_virtual_start(jspin):my_b_virtual_end(jspin)), my_b_size(jspin))
1688 mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjb - 1, my_j + jjb - 1) = &
1689 mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjb - 1, my_j + jjb - 1) + p_ij_diag
1694 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta))
THEN
1696 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', my_b_size(ispin), virtual(ispin), my_b_size(ispin), 1.0_dp, &
1697 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1698 local_ab, virtual(ispin), &
1699 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array, my_b_size(ispin))
1701 mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjb - 1, my_j + jjb - 1) = &
1702 mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjb - 1, my_j + jjb - 1) + p_ij_diag
1704 DO proc_shift = 1, para_env_sub%num_pe - 1
1705 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1706 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1708 CALL get_group_dist(gd_b_virtual(jspin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1709 CALL get_group_dist(gd_b_virtual(jspin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1711 external_ab(1:virtual(ispin), 1:rec_b_size) => buffer_1d(1:int(virtual(ispin), int_8)*rec_b_size)
1712 external_ab = 0.0_dp
1714 CALL para_env_sub%sendrecv(local_ab, proc_send, &
1715 external_ab, proc_receive)
1717 IF (.NOT. (alpha_beta))
THEN
1718 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), rec_b_size, virtual(ispin), 1.0_dp, &
1719 t_ab, virtual(ispin), external_ab, virtual(ispin), &
1720 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_b_virtual_start:rec_b_virtual_end), my_b_size(ispin))
1722 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(jspin), rec_b_size, virtual(ispin), mp2_env%scale_S, &
1723 local_ab, virtual(ispin), external_ab, virtual(ispin), &
1724 1.0_dp, mp2_env%ri_grad%P_ab(jspin)%array(:, rec_b_virtual_start:rec_b_virtual_end), &
1729 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1730 CALL get_group_dist(gd_b_virtual(ispin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1731 external_ab(1:virtual(jspin), 1:rec_b_size) => buffer_1d(1:int(virtual(jspin), int_8)*rec_b_size)
1732 external_ab = 0.0_dp
1733 CALL para_env_sub%sendrecv(local_ba, proc_send, &
1734 external_ab, proc_receive)
1735 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', my_b_size(ispin), rec_b_size, virtual(jspin), mp2_env%scale_S, &
1736 local_ba, virtual(jspin), external_ab, virtual(jspin), &
1737 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_b_virtual_start:rec_b_virtual_end), my_b_size(ispin))
1740 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta))
THEN
1741 external_ab(1:my_b_size(ispin), 1:virtual(ispin)) => &
1742 buffer_1d(1:int(virtual(ispin), int_8)*my_b_size(ispin))
1743 external_ab = 0.0_dp
1745 offset = int(virtual(ispin), int_8)*my_b_size(ispin)
1747 send_ab(1:send_b_size, 1:virtual(ispin)) => buffer_1d(offset + 1:offset + int(send_b_size, int_8)*virtual(ispin))
1750 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', send_b_size, virtual(ispin), my_b_size(ispin), 1.0_dp, &
1751 t_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1752 local_ab, virtual(ispin), 0.0_dp, send_ab, send_b_size)
1753 CALL para_env_sub%sendrecv(send_ab, proc_send, &
1754 external_ab, proc_receive)
1756 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = mp2_env%ri_grad%P_ab(ispin)%array + external_ab
1760 IF (.NOT. alpha_beta)
THEN
1761 IF (my_i /= my_j)
THEN
1762 CALL dgemm_counter_stop(dgemm_counter, 2*my_b_size(ispin), virtual(ispin), virtual(ispin))
1764 CALL dgemm_counter_stop(dgemm_counter, my_b_size(ispin), virtual(ispin), virtual(ispin))
1767 CALL dgemm_counter_stop(dgemm_counter, sum(my_b_size), virtual(ispin), virtual(jspin))
1769 CALL timestop(handle)
1773 CALL timeset(routinen//
"_Gamma", handle)
1774 CALL dgemm_counter_start(dgemm_counter)
1775 IF (.NOT. alpha_beta)
THEN
1777 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', my_b_size(ispin), dimen_ri, my_b_size(ispin), 1.0_dp, &
1778 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1779 my_local_j_al, dimen_ri, 1.0_dp, y_i_ap, my_b_size(ispin))
1781 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', my_b_size(ispin), dimen_ri, my_b_size(jspin), mp2_env%scale_S, &
1782 local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1783 my_local_j_al, dimen_ri, 1.0_dp, y_i_ap, my_b_size(ispin))
1784 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'T', my_b_size(jspin), dimen_ri, my_b_size(ispin), mp2_env%scale_S, &
1785 local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1786 my_local_i_al, dimen_ri, 1.0_dp, y_j_ap, my_b_size(jspin))
1789 IF (para_env_sub%num_pe > 1)
THEN
1790 external_ab(1:my_b_size(ispin), 1:dimen_ri) => buffer_1d(1:int(my_b_size(ispin), int_8)*dimen_ri)
1791 external_ab = 0.0_dp
1793 offset = int(my_b_size(ispin), int_8)*dimen_ri
1796 DO proc_shift = 1, para_env_sub%num_pe - 1
1797 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1798 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1800 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1801 CALL get_group_dist(gd_b_virtual(ispin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1803 send_ab(1:send_b_size, 1:dimen_ri) => buffer_1d(offset + 1:offset + int(dimen_ri, int_8)*send_b_size)
1805 IF (.NOT. alpha_beta)
THEN
1806 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', send_b_size, dimen_ri, my_b_size(ispin), 1.0_dp, &
1807 t_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1808 my_local_j_al, dimen_ri, 0.0_dp, send_ab, send_b_size)
1809 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1811 y_i_ap(:, :) = y_i_ap + external_ab
1815 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', send_b_size, dimen_ri, my_b_size(jspin), mp2_env%scale_S, &
1816 local_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1817 my_local_j_al, dimen_ri, 0.0_dp, send_ab, send_b_size)
1818 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1819 y_i_ap(:, :) = y_i_ap + external_ab
1823 IF (alpha_beta)
THEN
1825 IF (para_env_sub%num_pe > 1)
THEN
1826 external_ab(1:my_b_size(jspin), 1:dimen_ri) => buffer_1d(1:int(my_b_size(jspin), int_8)*dimen_ri)
1827 external_ab = 0.0_dp
1829 offset = int(my_b_size(jspin), int_8)*dimen_ri
1831 DO proc_shift = 1, para_env_sub%num_pe - 1
1832 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1833 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1835 CALL get_group_dist(gd_b_virtual(jspin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1836 send_ab(1:send_b_size, 1:dimen_ri) => buffer_1d(offset + 1:offset + int(dimen_ri, int_8)*send_b_size)
1838 CALL mp2_env%local_gemm_ctx%gemm(
'N',
'T', send_b_size, dimen_ri, my_b_size(ispin), mp2_env%scale_S, &
1839 local_ba(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1840 my_local_i_al, dimen_ri, 0.0_dp, send_ab, send_b_size)
1841 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1842 y_j_ap(:, :) = y_j_ap + external_ab
1847 CALL dgemm_counter_stop(dgemm_counter, 3*virtual(ispin), dimen_ri, my_b_size(jspin))
1849 CALL dgemm_counter_stop(dgemm_counter, virtual(ispin), dimen_ri, my_b_size(ispin))
1852 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta))
THEN
1854 CALL dgemm_counter_start(dgemm_counter)
1855 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'T', my_b_size(ispin), dimen_ri, my_b_size(ispin), 1.0_dp, &
1856 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1857 my_local_i_al, dimen_ri, 1.0_dp, y_j_ap, my_b_size(ispin))
1858 DO proc_shift = 1, para_env_sub%num_pe - 1
1859 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1860 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1862 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1864 external_ab(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, int_8)*rec_b_size)
1865 external_ab = 0.0_dp
1867 CALL para_env_sub%sendrecv(my_local_i_al, proc_send, &
1868 external_ab, proc_receive)
1871 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'T', my_b_size(ispin), dimen_ri, rec_b_size, 1.0_dp, &
1872 t_ab(rec_b_virtual_start:rec_b_virtual_end, :), rec_b_size, &
1873 external_ab, dimen_ri, 1.0_dp, y_j_ap, my_b_size(ispin))
1876 CALL dgemm_counter_stop(dgemm_counter, my_b_size(ispin), dimen_ri, virtual(ispin))
1879 CALL timestop(handle)
1880 END SUBROUTINE mp2_update_p_gamma
1903 SUBROUTINE mp2_redistribute_gamma(Gamma_P_ia, ij_index, my_B_size, &
1904 my_block_size, my_group_L_size, my_i, my_ij_pairs, ngroup, &
1905 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
1906 ij_map, ranges_info_array, Y_i_aP, comm_exchange, &
1907 sizes_array, spin, buffer_1D)
1909 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: gamma_p_ia
1910 INTEGER,
INTENT(IN) :: ij_index, my_b_size, my_block_size, &
1911 my_group_l_size, my_i, my_ij_pairs, &
1912 ngroup, num_integ_group
1913 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: integ_group_pos2color_sub, num_ij_pairs
1914 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(IN) :: ij_map
1915 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :), &
1916 INTENT(IN) :: ranges_info_array
1917 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(IN) :: y_i_ap
1918 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange
1919 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: sizes_array
1920 INTEGER,
INTENT(IN) :: spin
1921 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:),
TARGET :: buffer_1d
1923 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_redistribute_gamma'
1925 INTEGER :: end_point, handle, handle2, iib, ij_counter_rec, irep, kkk, lll, lstart_pos, &
1926 proc_receive, proc_send, proc_shift, rec_i, rec_ij_index, send_l_size, start_point, tag
1927 INTEGER(KIND=int_8) :: offset
1928 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
1929 POINTER :: bi_c_rec, bi_c_send
1933 CALL timeset(routinen//
"_comm2", handle)
1937 IF (ij_index <= my_ij_pairs)
THEN
1940 CALL timeset(routinen//
"_comm2_w", handle2)
1941 DO irep = 0, num_integ_group - 1
1942 lstart_pos = ranges_info_array(1, irep, comm_exchange%mepos)
1943 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
1944 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
1949 DO kkk = start_point, end_point
1950 lll = kkk - start_point + lstart_pos
1951 DO iib = 1, my_block_size
1952 gamma_p_ia(1:my_b_size, my_i + iib - 1, kkk) = &
1953 gamma_p_ia(1:my_b_size, my_i + iib - 1, kkk) + &
1954 y_i_ap(1:my_b_size, lll, iib)
1959 CALL timestop(handle2)
1963 DO proc_shift = 1, comm_exchange%num_pe - 1
1964 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
1965 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
1967 send_l_size = sizes_array(proc_send)
1968 bi_c_send(1:my_b_size, 1:my_block_size, 1:send_l_size) => &
1969 buffer_1d(1:int(my_b_size, int_8)*my_block_size*send_l_size)
1971 offset = int(my_b_size, int_8)*my_block_size*send_l_size
1973 CALL timeset(routinen//
"_comm2_w", handle2)
1975 DO irep = 0, num_integ_group - 1
1976 lstart_pos = ranges_info_array(1, irep, proc_send)
1977 start_point = ranges_info_array(3, irep, proc_send)
1978 end_point = ranges_info_array(4, irep, proc_send)
1983 DO kkk = start_point, end_point
1984 lll = kkk - start_point + lstart_pos
1985 DO iib = 1, my_block_size
1986 bi_c_send(1:my_b_size, iib, kkk) = y_i_ap(1:my_b_size, lll, iib)
1991 CALL timestop(handle2)
1993 rec_ij_index = num_ij_pairs(proc_receive)
1995 IF (ij_index <= rec_ij_index)
THEN
1998 (ij_index - min(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
2000 rec_i = ij_map(spin, ij_counter_rec)
2002 bi_c_rec(1:my_b_size, 1:my_block_size, 1:my_group_l_size) => &
2003 buffer_1d(offset + 1:offset + int(my_b_size, int_8)*my_block_size*my_group_l_size)
2006 CALL comm_exchange%sendrecv(bi_c_send, proc_send, &
2007 bi_c_rec, proc_receive, tag)
2009 CALL timeset(routinen//
"_comm2_w", handle2)
2010 DO irep = 0, num_integ_group - 1
2011 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
2012 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
2016 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) = &
2017 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) + &
2018 bi_c_rec(1:my_b_size, :, start_point:end_point)
2021 CALL timestop(handle2)
2025 CALL comm_exchange%send(bi_c_send, proc_send, tag)
2033 DO proc_shift = 1, comm_exchange%num_pe - 1
2034 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2035 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2036 rec_ij_index = num_ij_pairs(proc_receive)
2038 IF (ij_index <= rec_ij_index)
THEN
2041 (ij_index - min(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
2043 rec_i = ij_map(spin, ij_counter_rec)
2045 bi_c_rec(1:my_b_size, 1:my_block_size, 1:my_group_l_size) => &
2046 buffer_1d(1:int(my_b_size, int_8)*my_block_size*my_group_l_size)
2050 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2052 CALL timeset(routinen//
"_comm2_w", handle2)
2053 DO irep = 0, num_integ_group - 1
2054 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
2055 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
2056#if !defined(__INTEL_LLVM_COMPILER) || (20250000 <= __INTEL_LLVM_COMPILER)
2061 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) = &
2062 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) + &
2063 bi_c_rec(1:my_b_size, :, start_point:end_point)
2064#if !defined(__INTEL_LLVM_COMPILER) || (20250000 <= __INTEL_LLVM_COMPILER)
2068 CALL timestop(handle2)
2074 CALL timestop(handle)
2076 END SUBROUTINE mp2_redistribute_gamma
2105 SUBROUTINE quasi_degenerate_p_ij(mp2_env, Eigenval, homo, virtual, open_shell, &
2106 beta_beta, Bib_C, unit_nr, dimen_RI, &
2107 my_B_size, ngroup, my_group_L_size, &
2108 color_sub, ranges_info_array, comm_exchange, para_env_sub, para_env, &
2109 my_B_virtual_start, my_B_virtual_end, sizes_array, gd_B_virtual, &
2110 integ_group_pos2color_sub, dgemm_counter, buffer_1D)
2111 TYPE(mp2_type) :: mp2_env
2112 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
2113 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
2114 LOGICAL,
INTENT(IN) :: open_shell, beta_beta
2115 TYPE(three_dim_real_array),
DIMENSION(:), &
2117 INTEGER,
INTENT(IN) :: unit_nr, dimen_ri
2118 INTEGER,
DIMENSION(:),
INTENT(IN) :: my_b_size
2119 INTEGER,
INTENT(IN) :: ngroup, my_group_l_size, color_sub
2120 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :), &
2121 INTENT(IN) :: ranges_info_array
2122 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange
2123 TYPE(mp_para_env_type),
INTENT(IN) :: para_env_sub, para_env
2124 INTEGER,
DIMENSION(:),
INTENT(IN) :: my_b_virtual_start, my_b_virtual_end
2125 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: sizes_array
2126 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
2127 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: integ_group_pos2color_sub
2128 TYPE(dgemm_counter_type),
INTENT(INOUT) :: dgemm_counter
2129 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:),
TARGET :: buffer_1d
2131 CHARACTER(LEN=*),
PARAMETER :: routinen =
'quasi_degenerate_P_ij'
2133 INTEGER :: a, a_global, b, b_global, block_size, decil, handle, handle2, ijk_counter, &
2134 ijk_counter_send, ijk_index, ispin, kkb, kspin, max_block_size, max_ijk, my_i, my_ijk, &
2135 my_j, my_k, my_last_k(2), my_virtual, nspins, proc_receive, proc_send, proc_shift, &
2136 rec_b_size, rec_b_virtual_end, rec_b_virtual_start, rec_l_size, send_b_size, &
2137 send_b_virtual_end, send_b_virtual_start, send_i, send_ijk_index, send_j, send_k, &
2138 size_b_i, size_b_k, tag, tag2
2139 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_ijk
2140 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ijk_map, send_last_k
2141 LOGICAL :: alpha_beta, do_recv_i, do_recv_j, &
2142 do_recv_k, do_send_i, do_send_j, &
2144 REAL(kind=dp) :: amp_fac, p_ij_elem, t_new, t_start
2145 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
2146 TARGET :: local_ab, local_al_i, local_al_j, t_ab
2147 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: local_al_k
2148 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: bi_c_rec, external_ab, external_al
2149 REAL(kind=dp),
DIMENSION(:, :, :),
POINTER :: bi_c_rec_3d
2151 CALL timeset(routinen//
"_ij_sing", handle)
2156 nspins =
SIZE(bib_c)
2157 alpha_beta = (nspins == 2)
2160 amp_fac = mp2_env%scale_S + mp2_env%scale_T
2161 IF (open_shell) amp_fac = mp2_env%scale_T
2163 ALLOCATE (send_last_k(2, comm_exchange%num_pe - 1))
2166 DO ispin = 1, nspins
2167 size_b_i = my_b_size(ispin)
2168 IF (ispin == 1 .AND. alpha_beta)
THEN
2173 size_b_k = my_b_size(kspin)
2177 CALL find_quasi_degenerate_ij(my_ijk, homo(ispin), homo(kspin), eigenval(:, ispin), mp2_env, ijk_map, unit_nr, ngroup, &
2178 .NOT. beta_beta .AND. ispin /= 2, comm_exchange, num_ijk, max_ijk, color_sub, &
2179 SIZE(buffer_1d), my_group_l_size, size_b_k, para_env, virtual(ispin), size_b_i)
2181 my_virtual = virtual(ispin)
2182 IF (
SIZE(ijk_map, 2) > 0)
THEN
2183 max_block_size = ijk_map(4, 1)
2188 ALLOCATE (local_al_i(dimen_ri, size_b_i))
2189 ALLOCATE (local_al_j(dimen_ri, size_b_i))
2190 ALLOCATE (local_al_k(dimen_ri, size_b_k, max_block_size))
2191 ALLOCATE (t_ab(my_virtual, size_b_k))
2196 t_start = m_walltime()
2197 DO ijk_index = 1, max_ijk
2200 IF (unit_nr > 0 .AND. ijk_index > 1)
THEN
2201 decil = ijk_index*10/max_ijk
2202 IF (decil /= (ijk_index - 1)*10/max_ijk)
THEN
2203 t_new = m_walltime()
2204 t_new = (t_new - t_start)/60.0_dp*(max_ijk - ijk_index + 1)/(ijk_index - 1)
2205 WRITE (unit_nr, fmt=
"(T3,A)")
"Percentage of finished loop: "// &
2206 cp_to_string(decil*10)//
". Minutes left: "//cp_to_string(t_new)
2207 CALL m_flush(unit_nr)
2211 IF (ijk_index <= my_ijk)
THEN
2213 ijk_counter = (ijk_index - min(1, color_sub))*ngroup + color_sub
2214 my_i = ijk_map(1, ijk_counter)
2215 my_j = ijk_map(2, ijk_counter)
2216 my_k = ijk_map(3, ijk_counter)
2217 block_size = ijk_map(4, ijk_counter)
2219 do_recv_i = (ispin /= kspin) .OR. my_i < my_k .OR. my_i > my_k + block_size - 1
2220 do_recv_j = (ispin /= kspin) .OR. my_j < my_k .OR. my_j > my_k + block_size - 1
2221 do_recv_k = my_k /= my_last_k(1) .OR. my_k + block_size - 1 /= my_last_k(2)
2223 my_last_k(2) = my_k + block_size - 1
2227 CALL fill_local_i_al_2d(local_al_i, ranges_info_array(:, :, comm_exchange%mepos), &
2228 bib_c(ispin)%array(:, :, my_i))
2233 CALL fill_local_i_al_2d(local_al_j, ranges_info_array(:, :, comm_exchange%mepos), &
2234 bib_c(ispin)%array(:, :, my_j))
2239 CALL fill_local_i_al(local_al_k(:, :, 1:block_size), ranges_info_array(:, :, comm_exchange%mepos), &
2240 bib_c(kspin)%array(:, :, my_k:my_k + block_size - 1))
2243 CALL timeset(routinen//
"_comm", handle2)
2244 DO proc_shift = 1, comm_exchange%num_pe - 1
2245 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2246 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2248 send_ijk_index = num_ijk(proc_send)
2250 rec_l_size = sizes_array(proc_receive)
2251 bi_c_rec(1:rec_l_size, 1:size_b_i) => buffer_1d(1:int(rec_l_size, kind=int_8)*size_b_i)
2256 IF (ijk_index <= send_ijk_index)
THEN
2258 ijk_counter_send = (ijk_index - min(1, integ_group_pos2color_sub(proc_send)))* &
2259 ngroup + integ_group_pos2color_sub(proc_send)
2260 send_i = ijk_map(1, ijk_counter_send)
2261 send_j = ijk_map(2, ijk_counter_send)
2262 send_k = ijk_map(3, ijk_counter_send)
2264 do_send_i = (ispin /= kspin) .OR. send_i < send_k .OR. send_i > send_k + block_size - 1
2265 do_send_j = (ispin /= kspin) .OR. send_j < send_k .OR. send_j > send_k + block_size - 1
2266 do_send_k = send_k /= send_last_k(1, proc_shift) .OR. send_k + block_size - 1 /= send_last_k(2, proc_shift)
2267 send_last_k(1, proc_shift) = send_k
2268 send_last_k(2, proc_shift) = send_k + block_size - 1
2275 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_i), proc_send, &
2276 bi_c_rec, proc_receive, tag)
2278 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i), proc_send, tag)
2280 ELSE IF (do_recv_i)
THEN
2281 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2284 CALL fill_local_i_al_2d(local_al_i, ranges_info_array(:, :, proc_receive), bi_c_rec)
2291 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_j), proc_send, &
2292 bi_c_rec, proc_receive, tag)
2294 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_j), proc_send, tag)
2296 ELSE IF (do_recv_j)
THEN
2297 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2300 CALL fill_local_i_al_2d(local_al_j, ranges_info_array(:, :, proc_receive), bi_c_rec)
2304 bi_c_rec_3d(1:rec_l_size, 1:size_b_k, 1:block_size) => &
2305 buffer_1d(1:int(rec_l_size, kind=int_8)*size_b_k*block_size)
2308 CALL comm_exchange%sendrecv(bib_c(kspin)%array(:, :, send_k:send_k + block_size - 1), proc_send, &
2309 bi_c_rec_3d, proc_receive, tag)
2311 CALL comm_exchange%send(bi_c_rec, proc_receive, tag)
2313 ELSE IF (do_recv_k)
THEN
2314 CALL comm_exchange%recv(bi_c_rec_3d, proc_receive, tag)
2317 CALL fill_local_i_al(local_al_k(:, :, 1:block_size), ranges_info_array(:, :, proc_receive), bi_c_rec_3d)
2321 IF (.NOT. do_recv_i) local_al_i(:, :) = local_al_k(:, :, my_i - my_k + 1)
2322 IF (.NOT. do_recv_j) local_al_j(:, :) = local_al_k(:, :, my_j - my_k + 1)
2323 CALL timestop(handle2)
2326 DO kkb = 1, block_size
2327 CALL timeset(routinen//
"_exp_ik", handle2)
2328 CALL dgemm_counter_start(dgemm_counter)
2329 ALLOCATE (local_ab(my_virtual, size_b_k))
2331 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', size_b_i, size_b_k, dimen_ri, 1.0_dp, &
2332 local_al_i, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2333 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), 1:size_b_k), size_b_i)
2334 DO proc_shift = 1, para_env_sub%num_pe - 1
2335 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2336 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2338 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2340 external_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, kind=int_8)*rec_b_size)
2342 CALL comm_exchange%sendrecv(local_al_i, proc_send, &
2343 external_al, proc_receive, tag)
2345 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', rec_b_size, size_b_k, dimen_ri, 1.0_dp, &
2346 external_al, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2347 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:size_b_k), rec_b_size)
2349 CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_b_k, dimen_ri)
2350 CALL timestop(handle2)
2353 CALL timeset(routinen//
"_tab", handle2)
2356 IF (.NOT. alpha_beta)
THEN
2358 b_global = b + my_b_virtual_start(1) - 1
2359 DO a = 1, my_b_size(1)
2360 a_global = a + my_b_virtual_start(1) - 1
2361 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - mp2_env%scale_T*local_ab(b_global, a))/ &
2362 (eigenval(my_i, 1) + eigenval(my_k + kkb - 1, 1) &
2363 - eigenval(homo(1) + a_global, 1) - eigenval(homo(1) + b_global, 1))
2368 b_global = b + my_b_virtual_start(kspin) - 1
2369 DO a = 1, my_b_size(ispin)
2370 a_global = a + my_b_virtual_start(ispin) - 1
2371 t_ab(a_global, b) = mp2_env%scale_S*local_ab(a_global, b)/ &
2372 (eigenval(my_i, ispin) + eigenval(my_k + kkb - 1, kspin) &
2373 - eigenval(homo(ispin) + a_global, ispin) - eigenval(homo(kspin) + b_global, kspin))
2378 IF (.NOT. alpha_beta)
THEN
2379 DO proc_shift = 1, para_env_sub%num_pe - 1
2380 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2381 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2382 CALL get_group_dist(gd_b_virtual(1), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2383 CALL get_group_dist(gd_b_virtual(1), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
2385 external_ab(1:size_b_i, 1:rec_b_size) => buffer_1d(1:int(size_b_i, kind=int_8)*rec_b_size)
2386 CALL para_env_sub%sendrecv(local_ab(send_b_virtual_start:send_b_virtual_end, 1:size_b_k), proc_send, &
2387 external_ab(1:size_b_i, 1:rec_b_size), proc_receive, tag)
2389 DO b = 1, my_b_size(1)
2390 b_global = b + my_b_virtual_start(1) - 1
2391 DO a = 1, rec_b_size
2392 a_global = a + rec_b_virtual_start - 1
2393 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - mp2_env%scale_T*external_ab(b, a))/ &
2394 (eigenval(my_i, 1) + eigenval(my_k + kkb - 1, 1) &
2395 - eigenval(homo(1) + a_global, 1) - eigenval(homo(1) + b_global, 1))
2400 CALL timestop(handle2)
2403 CALL timeset(routinen//
"_exp_jk", handle2)
2405 CALL dgemm_counter_start(dgemm_counter)
2406 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', size_b_i, size_b_k, dimen_ri, 1.0_dp, &
2407 local_al_j, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2408 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), 1:size_b_k), size_b_i)
2409 DO proc_shift = 1, para_env_sub%num_pe - 1
2410 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2411 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2413 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2415 external_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, kind=int_8)*rec_b_size)
2417 CALL comm_exchange%sendrecv(local_al_j, proc_send, &
2418 external_al, proc_receive, tag)
2419 CALL mp2_env%local_gemm_ctx%gemm(
'T',
'N', rec_b_size, size_b_k, dimen_ri, 1.0_dp, &
2420 external_al, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2421 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:size_b_k), rec_b_size)
2423 CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_b_k, dimen_ri)
2424 CALL timestop(handle2)
2426 CALL timeset(routinen//
"_Pij", handle2)
2428 b_global = b + my_b_virtual_start(kspin) - 1
2429 DO a = 1, my_b_size(ispin)
2430 a_global = a + my_b_virtual_start(ispin) - 1
2431 local_ab(a_global, b) = &
2432 local_ab(a_global, b)/(eigenval(my_j, ispin) + eigenval(my_k + kkb - 1, kspin) &
2433 - eigenval(homo(ispin) + a_global, ispin) - eigenval(homo(kspin) + b_global, kspin))
2437 p_ij_elem = sum(local_ab*t_ab)
2438 DEALLOCATE (local_ab)
2439 IF ((.NOT. open_shell) .AND. (.NOT. alpha_beta))
THEN
2440 p_ij_elem = p_ij_elem*2.0_dp
2443 mp2_env%ri_grad%P_ij(2)%array(my_i, my_j) = mp2_env%ri_grad%P_ij(2)%array(my_i, my_j) - p_ij_elem
2444 mp2_env%ri_grad%P_ij(2)%array(my_j, my_i) = mp2_env%ri_grad%P_ij(2)%array(my_j, my_i) - p_ij_elem
2446 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) = mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) - p_ij_elem
2447 mp2_env%ri_grad%P_ij(ispin)%array(my_j, my_i) = mp2_env%ri_grad%P_ij(ispin)%array(my_j, my_i) - p_ij_elem
2449 CALL timestop(handle2)
2452 CALL timeset(routinen//
"_comm", handle2)
2454 DO proc_shift = 1, comm_exchange%num_pe - 1
2455 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2456 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2458 send_ijk_index = num_ijk(proc_send)
2460 IF (ijk_index <= send_ijk_index)
THEN
2462 ijk_counter_send = (ijk_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
2463 integ_group_pos2color_sub(proc_send)
2464 send_i = ijk_map(1, ijk_counter_send)
2465 send_j = ijk_map(2, ijk_counter_send)
2466 send_k = ijk_map(3, ijk_counter_send)
2467 block_size = ijk_map(4, ijk_counter_send)
2469 do_send_i = (ispin /= kspin) .OR. send_i < send_k .OR. send_i > send_k + block_size - 1
2470 do_send_j = (ispin /= kspin) .OR. send_j < send_k .OR. send_j > send_k + block_size - 1
2473 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i), proc_send, tag)
2477 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_j), proc_send, tag)
2480 CALL comm_exchange%send(bib_c(kspin)%array(:, :, send_k:send_k + block_size - 1), proc_send, tag)
2484 CALL timestop(handle2)
2487 DEALLOCATE (local_al_i)
2488 DEALLOCATE (local_al_j)
2489 DEALLOCATE (local_al_k)
2491 DEALLOCATE (ijk_map)
2493 CALL timestop(handle)
2495 END SUBROUTINE quasi_degenerate_p_ij
2519 SUBROUTINE find_quasi_degenerate_ij(my_ijk, homo, homo_beta, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, &
2520 do_print_alpha, comm_exchange, num_ijk, max_ijk, color_sub, &
2521 buffer_size, my_group_L_size, B_size_k, para_env, virtual, B_size_i)
2523 INTEGER,
INTENT(OUT) :: my_ijk
2524 INTEGER,
INTENT(IN) :: homo, homo_beta
2525 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: eigenval
2526 TYPE(mp2_type),
INTENT(IN) :: mp2_env
2527 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(OUT) :: ijk_map
2528 INTEGER,
INTENT(IN) :: unit_nr, ngroup
2529 LOGICAL,
INTENT(IN) :: do_print_alpha
2530 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange
2531 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT) :: num_ijk
2532 INTEGER,
INTENT(OUT) :: max_ijk
2533 INTEGER,
INTENT(IN) :: color_sub, buffer_size, my_group_l_size, &
2535 TYPE(mp_para_env_type),
INTENT(IN) :: para_env
2536 INTEGER,
INTENT(IN) :: virtual, b_size_i
2538 INTEGER :: block_size, communication_steps, communication_volume, iib, ij_counter, &
2539 ijk_counter, jjb, kkb, max_block_size, max_num_k_blocks, min_communication_volume, &
2540 my_steps, num_k_blocks, num_sing_ij, total_ijk
2541 INTEGER(KIND=int_8) :: mem
2542 LOGICAL,
ALLOCATABLE,
DIMENSION(:, :) :: ijk_marker
2544 ALLOCATE (num_ijk(0:comm_exchange%num_pe - 1))
2549 DO jjb = iib + 1, homo
2550 IF (abs(eigenval(jjb) - eigenval(iib)) < mp2_env%ri_grad%eps_canonical) &
2551 num_sing_ij = num_sing_ij + 1
2555 IF (unit_nr > 0)
THEN
2556 IF (do_print_alpha)
THEN
2557 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
2558 "MO_INFO| Number of ij pairs below EPS_CANONICAL:", num_sing_ij
2560 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
2561 "MO_INFO| Number of ij pairs (spin beta) below EPS_CANONICAL:", num_sing_ij
2566 max_block_size = buffer_size/(my_group_l_size*b_size_k)
2573 mem = mem - 2_int_8*(virtual*b_size_k + b_size_i*my_group_l_size)
2574 max_block_size = min(max_block_size, max(1, int(mem/(my_group_l_size*b_size_k), kind(max_block_size))))
2577 CALL para_env%min(max_block_size)
2581 min_communication_volume = 3*homo_beta*num_sing_ij
2582 communication_steps = 3*homo_beta*num_sing_ij
2583 DO iib = max_block_size, 2, -1
2584 max_num_k_blocks = homo_beta/iib*num_sing_ij
2585 num_k_blocks = max_num_k_blocks - mod(max_num_k_blocks, ngroup)
2586 communication_volume = num_k_blocks*(2 + iib) + 3*(homo_beta*num_sing_ij - iib*num_k_blocks)
2587 my_steps = num_k_blocks + homo_beta*num_sing_ij - iib*num_k_blocks
2588 IF (communication_volume < min_communication_volume)
THEN
2590 min_communication_volume = communication_volume
2591 communication_steps = my_steps
2592 ELSE IF (communication_volume == min_communication_volume .AND. my_steps < communication_steps)
THEN
2594 communication_steps = my_steps
2598 IF (unit_nr > 0)
THEN
2599 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
2600 "MO_INFO| Block size:", block_size
2601 CALL m_flush(unit_nr)
2605 max_num_k_blocks = homo_beta/block_size*num_sing_ij
2606 num_k_blocks = max_num_k_blocks - mod(max_num_k_blocks, ngroup)
2608 total_ijk = num_k_blocks + homo_beta*num_sing_ij - num_k_blocks*block_size
2609 ALLOCATE (ijk_map(4, total_ijk))
2611 ALLOCATE (ijk_marker(homo_beta, num_sing_ij))
2619 DO jjb = iib + 1, homo
2620 IF (abs(eigenval(jjb) - eigenval(iib)) >= mp2_env%ri_grad%eps_canonical) cycle
2621 ij_counter = ij_counter + 1
2622 DO kkb = 1, homo_beta - mod(homo_beta, block_size), block_size
2623 IF (ijk_counter + 1 > num_k_blocks)
EXIT
2624 ijk_counter = ijk_counter + 1
2625 ijk_marker(kkb:kkb + block_size - 1, ij_counter) = .false.
2626 ijk_map(1, ijk_counter) = iib
2627 ijk_map(2, ijk_counter) = jjb
2628 ijk_map(3, ijk_counter) = kkb
2629 ijk_map(4, ijk_counter) = block_size
2630 IF (mod(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1
2637 DO jjb = iib + 1, homo
2638 IF (abs(eigenval(jjb) - eigenval(iib)) >= mp2_env%ri_grad%eps_canonical) cycle
2639 ij_counter = ij_counter + 1
2640 DO kkb = 1, homo_beta
2641 IF (ijk_marker(kkb, ij_counter))
THEN
2642 ijk_counter = ijk_counter + 1
2643 ijk_map(1, ijk_counter) = iib
2644 ijk_map(2, ijk_counter) = jjb
2645 ijk_map(3, ijk_counter) = kkb
2646 ijk_map(4, ijk_counter) = 1
2647 IF (mod(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1
2653 DEALLOCATE (ijk_marker)
2655 CALL comm_exchange%allgather(my_ijk, num_ijk)
2656 max_ijk = maxval(num_ijk)
2658 END SUBROUTINE find_quasi_degenerate_ij
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various routines to log and control the output. The idea is that decisions about where to log should ...
Counters to determine the performance of parallel DGEMMs.
elemental subroutine, public dgemm_counter_init(dgemm_counter, unit_nr, print_info)
Initialize a dgemm_counter.
subroutine, public dgemm_counter_write(dgemm_counter, para_env)
calculate and print flop rates
subroutine, public dgemm_counter_start(dgemm_counter)
start timer of the counter
subroutine, public dgemm_counter_stop(dgemm_counter, size1, size2, size3)
stop timer of the counter and provide matrix sizes
Types to describe group distributions.
elemental integer function, public maxsize(this)
...
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
integer, parameter, public local_gemm_pu_gpu
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Interface to the message passing library MPI.
Routines to calculate RI-GPW-MP2 energy using pw.
subroutine, public mp2_ri_gpw_compute_en(emp2_cou, emp2_ex, emp2_s, emp2_t, bib_c, mp2_env, para_env, para_env_sub, color_sub, gd_array, gd_b_virtual, eigenval, nmo, homo, dimen_ri, unit_nr, calc_forces, calc_ex)
...
Routines for calculating RI-MP2 gradients.
subroutine, public complete_gamma(mp2_env, b_ia_q, dimen_ri, homo, virtual, para_env, para_env_sub, ngroup, my_group_l_size, my_group_l_start, my_group_l_end, my_b_size, my_b_virtual_start, gd_array, gd_b_virtual, kspin)
complete the calculation of the Gamma matrices
Types needed for MP2 calculations.
stores all the informations relevant to an mpi environment