15 USE iso_c_binding,
ONLY: c_null_ptr,&
45#include "./base/base_uses.f90"
51 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'mp2_ri_gpw'
78 SUBROUTINE mp2_ri_gpw_compute_en(Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T, BIb_C, mp2_env, para_env, para_env_sub, color_sub, &
79 gd_array, gd_B_virtual, &
80 Eigenval, nmo, homo, dimen_RI, unit_nr, calc_forces, calc_ex)
81 REAL(kind=
dp),
INTENT(INOUT) :: emp2_cou, emp2_ex, emp2_s, emp2_t
83 INTENT(INOUT) :: bib_c
86 INTEGER,
INTENT(IN) :: color_sub
88 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
89 INTEGER,
INTENT(IN) :: nmo
90 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
92 INTENT(INOUT) :: gd_b_virtual
93 INTEGER,
INTENT(IN) :: dimen_ri, unit_nr
94 LOGICAL,
INTENT(IN) :: calc_forces, calc_ex
96 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_gpw_compute_en'
98 INTEGER :: a, a_global, b, b_global, block_size, decil, end_point, handle, handle2, handle3, &
99 iib, ij_counter, ij_counter_send, ij_index, integ_group_size, ispin, jjb, jspin, &
100 max_ij_pairs, my_block_size, my_group_l_end, my_group_l_size, my_group_l_size_orig, &
101 my_group_l_start, my_i, my_ij_pairs, my_j, my_new_group_l_size, ngroup, nspins, &
102 num_integ_group, proc_receive, proc_send, proc_shift, rec_b_size, rec_b_virtual_end, &
103 rec_b_virtual_start, rec_l_size, send_b_size, send_b_virtual_end, send_b_virtual_start, &
104 send_i, send_ij_index, send_j, start_point, tag, total_ij_pairs
105 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: integ_group_pos2color_sub, my_b_size, &
106 my_b_virtual_end, my_b_virtual_start, num_ij_pairs, sizes_array_orig, virtual
107 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ij_map
108 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :) :: ranges_info_array
109 LOGICAL :: my_alpha_beta_case, my_beta_beta_case, &
111 REAL(kind=
dp) :: amp_fac, my_emp2_cou, my_emp2_ex, &
112 sym_fac, t_new, t_start
113 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: buffer_1d
114 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :), &
115 TARGET :: local_ab, local_ba, t_ab
116 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
117 TARGET :: local_i_al, local_j_al, y_i_ap, y_j_ap
118 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :), &
119 POINTER :: external_ab, external_i_al
120 REAL(kind=
dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
125 DIMENSION(:) :: b_ia_q
127 CALL timeset(routinen, handle)
131 ALLOCATE (virtual(nspins))
132 virtual(:) = nmo - homo(:)
134 ALLOCATE (my_b_size(nspins), my_b_virtual_start(nspins), my_b_virtual_end(nspins))
137 my_b_virtual_start(ispin), my_b_virtual_end(ispin), my_b_size(ispin))
140 CALL get_group_dist(gd_array, color_sub, my_group_l_start, my_group_l_end, my_group_l_size)
146 IF (.NOT. c_associated(mp2_env%local_gemm_ctx))
THEN
151 CALL mp2_ri_get_integ_group_size( &
152 mp2_env, para_env, para_env_sub, gd_array, gd_b_virtual, &
153 homo, dimen_ri, unit_nr, &
154 integ_group_size, ngroup, &
155 num_integ_group, virtual, calc_forces)
159 CALL mp2_ri_create_group( &
160 para_env, para_env_sub, color_sub, &
161 gd_array%sizes, calc_forces, &
162 integ_group_size, my_group_l_end, &
163 my_group_l_size, my_group_l_size_orig, my_group_l_start, my_new_group_l_size, &
164 integ_group_pos2color_sub, sizes_array_orig, &
165 ranges_info_array, comm_exchange, comm_rep, num_integ_group)
172 CALL replicate_iak_2intgroup(bib_c(jspin)%array, comm_exchange, comm_rep, &
173 homo(jspin), gd_array%sizes, my_b_size(jspin), &
174 my_group_l_size, ranges_info_array)
178 IF (unit_nr > 0)
THEN
179 IF (nspins == 1)
THEN
180 WRITE (unit_nr, *)
"Start loop run"
181 ELSE IF (ispin == 1 .AND. jspin == 1)
THEN
182 WRITE (unit_nr, *)
"Start loop run alpha-alpha"
183 ELSE IF (ispin == 1 .AND. jspin == 2)
THEN
184 WRITE (unit_nr, *)
"Start loop run alpha-beta"
185 ELSE IF (ispin == 2 .AND. jspin == 2)
THEN
186 WRITE (unit_nr, *)
"Start loop run beta-beta"
191 my_open_shell_ss = (nspins == 2) .AND. (ispin == jspin)
197 my_beta_beta_case = .false.
198 my_alpha_beta_case = .false.
199 IF (ispin /= jspin)
THEN
200 my_alpha_beta_case = .true.
201 ELSE IF (my_open_shell_ss)
THEN
202 IF (ispin == 2) my_beta_beta_case = .true.
205 amp_fac = mp2_env%scale_S + mp2_env%scale_T
206 IF (my_alpha_beta_case .OR. my_open_shell_ss) amp_fac = mp2_env%scale_T
208 CALL mp2_ri_allocate_no_blk(local_ab, t_ab, mp2_env, homo, virtual, my_b_size, &
209 my_group_l_size, calc_forces, ispin, jspin, local_ba)
211 CALL mp2_ri_get_block_size( &
212 mp2_env, para_env, para_env_sub, gd_array, gd_b_virtual(ispin:jspin), &
213 homo(ispin:jspin), virtual(ispin:jspin), dimen_ri, unit_nr, block_size, &
214 ngroup, num_integ_group, my_open_shell_ss, calc_forces, buffer_1d)
224 CALL mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo(ispin), homo(jspin), &
225 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_ss, unit_nr)
227 ALLOCATE (num_ij_pairs(0:comm_exchange%num_pe - 1))
228 CALL comm_exchange%allgather(my_ij_pairs, num_ij_pairs)
230 max_ij_pairs = maxval(num_ij_pairs)
233 CALL mp2_ri_allocate_blk(dimen_ri, my_b_size, block_size, local_i_al, &
234 local_j_al, calc_forces, y_i_ap, y_j_ap, ispin, jspin)
236 CALL timeset(routinen//
"_RI_loop", handle2)
240 DO ij_index = 1, max_ij_pairs
243 IF (unit_nr > 0 .AND. ij_index > 1)
THEN
244 decil = ij_index*10/max_ij_pairs
245 IF (decil /= (ij_index - 1)*10/max_ij_pairs)
THEN
247 t_new = (t_new - t_start)/60.0_dp*(max_ij_pairs - ij_index + 1)/(ij_index - 1)
248 WRITE (unit_nr, fmt=
"(T3,A)")
"Percentage of finished loop: "// &
254 IF (calc_forces)
THEN
259 IF (ij_index <= my_ij_pairs)
THEN
261 ij_counter = (ij_index - min(1, color_sub))*ngroup + color_sub
262 my_i = ij_map(1, ij_counter)
263 my_j = ij_map(2, ij_counter)
264 my_block_size = ij_map(3, ij_counter)
267 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, comm_exchange%mepos), &
268 bib_c(ispin)%array(:, :, my_i:my_i + my_block_size - 1))
271 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, comm_exchange%mepos), &
272 bib_c(jspin)%array(:, :, my_j:my_j + my_block_size - 1))
275 CALL timeset(routinen//
"_comm", handle3)
276 DO proc_shift = 1, comm_exchange%num_pe - 1
277 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
278 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
280 send_ij_index = num_ij_pairs(proc_send)
284 IF (ij_index <= send_ij_index)
THEN
285 ij_counter_send = (ij_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
286 integ_group_pos2color_sub(proc_send)
287 send_i = ij_map(1, ij_counter_send)
288 send_j = ij_map(2, ij_counter_send)
291 bi_c_rec(1:rec_l_size, 1:my_b_size(ispin), 1:my_block_size) => &
292 buffer_1d(1:rec_l_size*my_b_size(ispin)*my_block_size)
294 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_i:send_i + my_block_size - 1), &
295 proc_send, bi_c_rec, proc_receive, tag)
297 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
298 bi_c_rec(:, 1:my_b_size(ispin), :))
301 bi_c_rec(1:rec_l_size, 1:my_b_size(jspin), 1:my_block_size) => &
302 buffer_1d(1:int(rec_l_size,
int_8)*my_b_size(jspin)*my_block_size)
304 CALL comm_exchange%sendrecv(bib_c(jspin)%array(:, :, send_j:send_j + my_block_size - 1), &
305 proc_send, bi_c_rec, proc_receive, tag)
307 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
308 bi_c_rec(:, 1:my_b_size(jspin), :))
314 bi_c_rec(1:rec_l_size, 1:my_b_size(ispin), 1:my_block_size) => &
315 buffer_1d(1:int(rec_l_size,
int_8)*my_b_size(ispin)*my_block_size)
317 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
319 CALL fill_local_i_al(local_i_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
320 bi_c_rec(:, 1:my_b_size(ispin), 1:my_block_size))
323 bi_c_rec(1:rec_l_size, 1:my_b_size(jspin), 1:my_block_size) => &
324 buffer_1d(1:int(rec_l_size,
int_8)*my_b_size(jspin)*my_block_size)
326 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
328 CALL fill_local_i_al(local_j_al(:, :, 1:my_block_size), ranges_info_array(:, :, proc_receive), &
329 bi_c_rec(:, 1:my_b_size(jspin), 1:my_block_size))
335 CALL timestop(handle3)
338 DO iib = 1, my_block_size
339 DO jjb = 1, my_block_size
340 CALL timeset(routinen//
"_expansion", handle3)
341 associate(my_local_i_al => local_i_al(:, :, iib), my_local_j_al => local_j_al(:, :, jjb))
345 IF ((my_alpha_beta_case) .AND. (calc_forces))
THEN
349 CALL local_gemm(
'T',
'N', my_b_size(ispin), my_b_size(jspin), dimen_ri, 1.0_dp, &
350 my_local_i_al, dimen_ri, my_local_j_al, dimen_ri, &
351 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), &
352 my_b_size(ispin), mp2_env%local_gemm_ctx)
354 IF (my_alpha_beta_case .AND. calc_forces)
THEN
355 local_ba(my_b_virtual_start(jspin):my_b_virtual_end(jspin), :) = &
356 transpose(local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :))
359 DO proc_shift = 1, para_env_sub%num_pe - 1
360 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
361 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
363 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, &
364 rec_b_virtual_end, rec_b_size)
366 external_i_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri,
int_8)*rec_b_size)
367 external_i_al = 0.0_dp
369 CALL para_env_sub%sendrecv(my_local_i_al, proc_send, &
370 external_i_al, proc_receive, tag)
372 CALL local_gemm(
'T',
'N', rec_b_size, my_b_size(jspin), dimen_ri, 1.0_dp, &
373 external_i_al, dimen_ri, my_local_j_al, dimen_ri, &
374 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:my_b_size(jspin)), rec_b_size, &
375 mp2_env%local_gemm_ctx)
378 IF (my_alpha_beta_case .AND. calc_forces)
THEN
380 CALL get_group_dist(gd_b_virtual(jspin), proc_receive, rec_b_virtual_start, &
381 rec_b_virtual_end, rec_b_size)
383 external_i_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri,
int_8)*rec_b_size)
384 external_i_al = 0.0_dp
386 CALL para_env_sub%sendrecv(my_local_j_al, proc_send, &
387 external_i_al, proc_receive, tag)
389 CALL local_gemm(
'T',
'N', rec_b_size, my_b_size(ispin), dimen_ri, 1.0_dp, &
390 external_i_al, dimen_ri, my_local_i_al, dimen_ri, &
391 0.0_dp, local_ba(rec_b_virtual_start:rec_b_virtual_end, 1:my_b_size(ispin)), rec_b_size, &
392 mp2_env%local_gemm_ctx)
395 IF (my_alpha_beta_case .AND. calc_forces)
THEN
397 CALL dgemm_counter_stop(dgemm_counter, virtual(ispin), my_b_size(ispin) + my_b_size(jspin), dimen_ri)
401 CALL timestop(handle3)
406 CALL timeset(routinen//
"_ener", handle3)
409 IF (my_i == my_j) sym_fac = 1.0_dp
410 IF (my_alpha_beta_case) sym_fac = 0.5_dp
411 DO b = 1, my_b_size(jspin)
412 b_global = b + my_b_virtual_start(jspin) - 1
413 DO a = 1, virtual(ispin)
414 my_emp2_cou = my_emp2_cou - sym_fac*2.0_dp*local_ab(a, b)**2/ &
415 (eigenval(homo(ispin) + a, ispin) + eigenval(homo(jspin) + b_global, jspin) - &
416 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
423 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) t_ab = 0.0_dp
424 DO b = 1, my_b_size(ispin)
425 b_global = b + my_b_virtual_start(ispin) - 1
426 DO a = 1, my_b_size(ispin)
427 a_global = a + my_b_virtual_start(ispin) - 1
428 my_emp2_ex = my_emp2_ex + sym_fac*local_ab(a_global, b)*local_ab(b_global, a)/ &
429 (eigenval(homo(ispin) + a_global, ispin) + eigenval(homo(ispin) + b_global, ispin) - &
430 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
431 IF (calc_forces .AND. (.NOT. my_alpha_beta_case))
THEN
432 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - mp2_env%scale_T*local_ab(b_global, a))/ &
433 (eigenval(homo(ispin) + a_global, ispin) + &
434 eigenval(homo(ispin) + b_global, ispin) - &
435 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
440 DO proc_shift = 1, para_env_sub%num_pe - 1
441 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
442 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
445 rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
447 send_b_virtual_start, send_b_virtual_end, send_b_size)
449 external_ab(1:my_b_size(ispin), 1:rec_b_size) => &
450 buffer_1d(1:int(rec_b_size,
int_8)*my_b_size(ispin))
453 CALL para_env_sub%sendrecv(local_ab(send_b_virtual_start:send_b_virtual_end, 1:my_b_size(ispin)), proc_send, &
454 external_ab(1:my_b_size(ispin), 1:rec_b_size), proc_receive, tag)
456 DO b = 1, my_b_size(ispin)
457 b_global = b + my_b_virtual_start(ispin) - 1
459 a_global = a + rec_b_virtual_start - 1
460 my_emp2_ex = my_emp2_ex + sym_fac*local_ab(a_global, b)*external_ab(b, a)/ &
461 (eigenval(homo(ispin) + a_global, ispin) + eigenval(homo(ispin) + b_global, ispin) - &
462 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
463 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) &
464 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - mp2_env%scale_T*external_ab(b, a))/ &
465 (eigenval(homo(ispin) + a_global, ispin) + &
466 eigenval(homo(ispin) + b_global, ispin) - &
467 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, ispin))
472 CALL timestop(handle3)
474 IF (calc_forces)
THEN
476 CALL mp2_update_p_gamma(mp2_env, para_env_sub, gd_b_virtual, &
477 eigenval, homo, dimen_ri, iib, jjb, my_b_size, &
478 my_b_virtual_end, my_b_virtual_start, my_i, my_j, virtual, &
479 local_ab, t_ab, my_local_i_al, my_local_j_al, &
480 my_open_shell_ss, y_i_ap(:, :, iib), y_j_ap(:, :, jjb), local_ba, &
481 ispin, jspin, dgemm_counter, buffer_1d)
494 CALL timeset(routinen//
"_comm", handle3)
497 DO proc_shift = 1, comm_exchange%num_pe - 1
498 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
499 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
501 send_ij_index = num_ij_pairs(proc_send)
503 IF (ij_index <= send_ij_index)
THEN
505 ij_counter_send = (ij_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
506 integ_group_pos2color_sub(proc_send)
507 send_i = ij_map(1, ij_counter_send)
508 send_j = ij_map(2, ij_counter_send)
511 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i:send_i + my_block_size - 1), &
514 CALL comm_exchange%send(bib_c(jspin)%array(:, :, send_j:send_j + my_block_size - 1), &
518 CALL timestop(handle3)
522 IF (calc_forces)
THEN
523 CALL mp2_redistribute_gamma(mp2_env%ri_grad%Gamma_P_ia(ispin)%array, ij_index, my_b_size(ispin), &
524 my_block_size, my_group_l_size, my_i, my_ij_pairs, ngroup, &
525 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
526 ij_map, ranges_info_array, y_i_ap(:, :, 1:my_block_size), comm_exchange, &
527 gd_array%sizes, 1, buffer_1d)
528 CALL mp2_redistribute_gamma(mp2_env%ri_grad%Gamma_P_ia(jspin)%array, ij_index, my_b_size(jspin), &
529 my_block_size, my_group_l_size, my_j, my_ij_pairs, ngroup, &
530 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
531 ij_map, ranges_info_array, y_j_ap(:, :, 1:my_block_size), comm_exchange, &
532 gd_array%sizes, 2, buffer_1d)
536 CALL timestop(handle2)
538 DEALLOCATE (local_i_al)
539 DEALLOCATE (local_j_al)
541 DEALLOCATE (num_ij_pairs)
542 DEALLOCATE (local_ab)
544 IF (calc_forces)
THEN
547 IF (
ALLOCATED(t_ab))
THEN
550 DEALLOCATE (local_ba)
559 CALL quasi_degenerate_p_ij( &
560 mp2_env, eigenval(:, ispin:jspin), homo(ispin:jspin), virtual(ispin:jspin), my_open_shell_ss, &
561 my_beta_beta_case, bib_c(ispin:jspin), unit_nr, dimen_ri, &
562 my_b_size(ispin:jspin), ngroup, my_group_l_size, &
563 color_sub, ranges_info_array, comm_exchange, para_env_sub, para_env, &
564 my_b_virtual_start(ispin:jspin), my_b_virtual_end(ispin:jspin), gd_array%sizes, gd_b_virtual(ispin:jspin), &
565 integ_group_pos2color_sub, dgemm_counter, buffer_1d)
569 DEALLOCATE (buffer_1d)
574 IF (calc_forces .AND. jspin == nspins)
THEN
575 IF (.NOT.
ALLOCATED(b_ia_q))
ALLOCATE (b_ia_q(nspins))
576 ALLOCATE (b_ia_q(ispin)%array(homo(ispin), my_b_size(ispin), my_group_l_size_orig))
577 b_ia_q(ispin)%array = 0.0_dp
578 DO jjb = 1, homo(ispin)
579 DO iib = 1, my_b_size(ispin)
580 b_ia_q(ispin)%array(jjb, iib, 1:my_group_l_size_orig) = &
581 bib_c(ispin)%array(1:my_group_l_size_orig, iib, jjb)
584 DEALLOCATE (bib_c(ispin)%array)
587 ALLOCATE (bib_c(ispin)%array(my_b_size(ispin), homo(ispin), my_group_l_size_orig))
588 DO proc_shift = 1, comm_rep%num_pe - 1
590 proc_send =
modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
591 proc_receive =
modulo(comm_rep%mepos + proc_shift, comm_rep%num_pe)
593 start_point = ranges_info_array(3, proc_shift, comm_exchange%mepos)
594 end_point = ranges_info_array(4, proc_shift, comm_exchange%mepos)
596 CALL comm_rep%sendrecv(mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, start_point:end_point), &
597 proc_send, bib_c(ispin)%array, proc_receive, tag)
600 mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig) = &
601 mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig) &
602 + bib_c(ispin)%array(:, :, :)
606 bib_c(ispin)%array(:, :, :) = mp2_env%ri_grad%Gamma_P_ia(ispin)%array(:, :, 1:my_group_l_size_orig)
607 DEALLOCATE (mp2_env%ri_grad%Gamma_P_ia(ispin)%array)
608 CALL move_alloc(bib_c(ispin)%array, mp2_env%ri_grad%Gamma_P_ia(ispin)%array)
609 ELSE IF (jspin == nspins)
THEN
610 DEALLOCATE (bib_c(ispin)%array)
613 CALL para_env%sum(my_emp2_cou)
614 CALL para_env%sum(my_emp2_ex)
616 IF (my_open_shell_ss .OR. my_alpha_beta_case)
THEN
617 IF (my_alpha_beta_case)
THEN
618 emp2_s = emp2_s + my_emp2_cou
619 emp2_cou = emp2_cou + my_emp2_cou
621 my_emp2_cou = my_emp2_cou*0.25_dp
622 my_emp2_ex = my_emp2_ex*0.5_dp
623 emp2_t = emp2_t + my_emp2_cou + my_emp2_ex
624 emp2_cou = emp2_cou + my_emp2_cou
625 emp2_ex = emp2_ex + my_emp2_ex
628 emp2_cou = emp2_cou + my_emp2_cou
629 emp2_ex = emp2_ex + my_emp2_ex
635 DEALLOCATE (integ_group_pos2color_sub)
636 DEALLOCATE (ranges_info_array)
638 CALL comm_exchange%free()
641 IF (calc_forces)
THEN
643 DEALLOCATE (gd_array%sizes)
644 iib =
SIZE(sizes_array_orig)
645 ALLOCATE (gd_array%sizes(0:iib - 1))
646 gd_array%sizes(:) = sizes_array_orig
647 DEALLOCATE (sizes_array_orig)
650 my_group_l_size = my_group_l_size_orig
654 CALL complete_gamma(mp2_env, b_ia_q(ispin)%array, dimen_ri, homo(ispin), &
655 virtual(ispin), para_env, para_env_sub, ngroup, &
656 my_group_l_size, my_group_l_start, my_group_l_end, &
657 my_b_size(ispin), my_b_virtual_start(ispin), &
658 gd_array, gd_b_virtual(ispin), &
663 IF (nspins == 1) mp2_env%ri_grad%P_ab(1)%array(:, :) = mp2_env%ri_grad%P_ab(1)%array(:, :)*2.0_dp
666 CALL comm%from_split(para_env, para_env_sub%mepos)
669 CALL comm%sum(mp2_env%ri_grad%P_ab(ispin)%array)
671 CALL para_env%sum(mp2_env%ri_grad%P_ij(ispin)%array)
677 CALL release_group_dist(gd_array)
679 IF (
ALLOCATED(bib_c(ispin)%array))
DEALLOCATE (bib_c(ispin)%array)
680 CALL release_group_dist(gd_b_virtual(ispin))
684 IF (calc_forces)
DEALLOCATE (mp2_env%ri_grad%PQ_half)
685 IF (calc_forces .AND. .NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) &
686 DEALLOCATE (mp2_env%ri_grad%operator_half)
688 CALL dgemm_counter_write(dgemm_counter, para_env)
691 CALL local_gemm_destroy(mp2_env%local_gemm_ctx)
692 mp2_env%local_gemm_ctx = c_null_ptr
693 CALL timestop(handle)
703 SUBROUTINE fill_local_i_al(local_i_aL, ranges_info_array, BIb_C_rec)
704 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: local_i_al
705 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: ranges_info_array
706 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(IN) :: bib_c_rec
708 CHARACTER(LEN=*),
PARAMETER :: routinen =
'fill_local_i_aL'
710 INTEGER :: end_point, handle, irep, lend_pos, &
711 lstart_pos, start_point
713 CALL timeset(routinen, handle)
715 DO irep = 1,
SIZE(ranges_info_array, 2)
716 lstart_pos = ranges_info_array(1, irep)
717 lend_pos = ranges_info_array(2, irep)
718 start_point = ranges_info_array(3, irep)
719 end_point = ranges_info_array(4, irep)
723 local_i_al(lstart_pos:lend_pos, :, :) = bib_c_rec(start_point:end_point, :, :)
727 CALL timestop(handle)
729 END SUBROUTINE fill_local_i_al
737 SUBROUTINE fill_local_i_al_2d(local_i_aL, ranges_info_array, BIb_C_rec)
738 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: local_i_al
739 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: ranges_info_array
740 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: bib_c_rec
742 CHARACTER(LEN=*),
PARAMETER :: routinen =
'fill_local_i_aL_2D'
744 INTEGER :: end_point, handle, irep, lend_pos, &
745 lstart_pos, start_point
747 CALL timeset(routinen, handle)
749 DO irep = 1,
SIZE(ranges_info_array, 2)
750 lstart_pos = ranges_info_array(1, irep)
751 lend_pos = ranges_info_array(2, irep)
752 start_point = ranges_info_array(3, irep)
753 end_point = ranges_info_array(4, irep)
757 local_i_al(lstart_pos:lend_pos, :) = bib_c_rec(start_point:end_point, :)
761 CALL timestop(handle)
763 END SUBROUTINE fill_local_i_al_2d
776 SUBROUTINE replicate_iak_2intgroup(BIb_C, comm_exchange, comm_rep, homo, sizes_array, my_B_size, &
777 my_group_L_size, ranges_info_array)
778 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
779 INTENT(INOUT) :: bib_c
780 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange, comm_rep
781 INTEGER,
INTENT(IN) :: homo
782 INTEGER,
DIMENSION(:),
INTENT(IN) :: sizes_array
783 INTEGER,
INTENT(IN) :: my_b_size, my_group_l_size
784 INTEGER,
DIMENSION(:, 0:, 0:),
INTENT(IN) :: ranges_info_array
786 CHARACTER(LEN=*),
PARAMETER :: routinen =
'replicate_iaK_2intgroup'
788 INTEGER :: end_point, handle, max_l_size, &
789 proc_receive, proc_shift, start_point
790 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: bib_c_copy
791 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :, :) :: bib_c_gather
793 CALL timeset(routinen, handle)
797 max_l_size = maxval(sizes_array)
799 ALLOCATE (bib_c_copy(max_l_size, my_b_size, homo))
801 bib_c_copy(1:
SIZE(bib_c, 1), 1:my_b_size, 1:homo) = bib_c
805 ALLOCATE (bib_c_gather(max_l_size, my_b_size, homo, 0:comm_rep%num_pe - 1))
806 bib_c_gather = 0.0_dp
808 CALL comm_rep%allgather(bib_c_copy, bib_c_gather)
810 DEALLOCATE (bib_c_copy)
812 ALLOCATE (bib_c(my_group_l_size, my_b_size, homo))
816 DO proc_shift = 0, comm_rep%num_pe - 1
817 proc_receive =
modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
819 start_point = ranges_info_array(3, proc_shift, comm_exchange%mepos)
820 end_point = ranges_info_array(4, proc_shift, comm_exchange%mepos)
822 bib_c(start_point:end_point, 1:my_b_size, 1:homo) = &
823 bib_c_gather(1:end_point - start_point + 1, 1:my_b_size, 1:homo, proc_receive)
827 DEALLOCATE (bib_c_gather)
829 CALL timestop(handle)
831 END SUBROUTINE replicate_iak_2intgroup
847 SUBROUTINE mp2_ri_allocate_no_blk(local_ab, t_ab, mp2_env, homo, virtual, my_B_size, &
848 my_group_L_size, calc_forces, ispin, jspin, local_ba)
849 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
850 INTENT(OUT) :: local_ab, t_ab
851 TYPE(mp2_type) :: mp2_env
852 INTEGER,
INTENT(IN) :: homo(2), virtual(2), my_b_size(2), &
854 LOGICAL,
INTENT(IN) :: calc_forces
855 INTEGER,
INTENT(IN) :: ispin, jspin
856 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
857 INTENT(OUT) :: local_ba
859 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_allocate_no_blk'
863 CALL timeset(routinen, handle)
865 ALLOCATE (local_ab(virtual(ispin), my_b_size(jspin)))
868 IF (calc_forces)
THEN
869 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%P_ij(jspin)%array))
THEN
870 ALLOCATE (mp2_env%ri_grad%P_ij(jspin)%array(homo(ispin), homo(ispin)))
871 mp2_env%ri_grad%P_ij(jspin)%array = 0.0_dp
873 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%P_ab(jspin)%array))
THEN
874 ALLOCATE (mp2_env%ri_grad%P_ab(jspin)%array(my_b_size(jspin), virtual(jspin)))
875 mp2_env%ri_grad%P_ab(jspin)%array = 0.0_dp
877 IF (.NOT.
ALLOCATED(mp2_env%ri_grad%Gamma_P_ia(jspin)%array))
THEN
878 ALLOCATE (mp2_env%ri_grad%Gamma_P_ia(jspin)%array(my_b_size(jspin), homo(jspin), my_group_l_size))
879 mp2_env%ri_grad%Gamma_P_ia(jspin)%array = 0.0_dp
882 IF (ispin == jspin)
THEN
884 ALLOCATE (t_ab(virtual(ispin), my_b_size(jspin)))
887 ALLOCATE (local_ba(1, 1))
890 ALLOCATE (local_ba(virtual(jspin), my_b_size(ispin)))
895 CALL timestop(handle)
897 END SUBROUTINE mp2_ri_allocate_no_blk
912 SUBROUTINE mp2_ri_allocate_blk(dimen_RI, my_B_size, block_size, &
913 local_i_aL, local_j_aL, calc_forces, &
914 Y_i_aP, Y_j_aP, ispin, jspin)
915 INTEGER,
INTENT(IN) :: dimen_ri, my_b_size(2), block_size
916 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
917 INTENT(OUT) :: local_i_al, local_j_al
918 LOGICAL,
INTENT(IN) :: calc_forces
919 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
920 INTENT(OUT) :: y_i_ap, y_j_ap
921 INTEGER,
INTENT(IN) :: ispin, jspin
923 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_allocate_blk'
927 CALL timeset(routinen, handle)
929 ALLOCATE (local_i_al(dimen_ri, my_b_size(ispin), block_size))
931 ALLOCATE (local_j_al(dimen_ri, my_b_size(jspin), block_size))
934 IF (calc_forces)
THEN
935 ALLOCATE (y_i_ap(my_b_size(ispin), dimen_ri, block_size))
939 ALLOCATE (y_j_ap(my_b_size(jspin), dimen_ri, block_size))
944 CALL timestop(handle)
946 END SUBROUTINE mp2_ri_allocate_blk
962 SUBROUTINE mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo, homo_beta, &
963 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_SS, unit_nr)
964 LOGICAL,
INTENT(IN) :: my_alpha_beta_case
965 INTEGER,
INTENT(OUT) :: total_ij_pairs
966 INTEGER,
INTENT(IN) :: homo, homo_beta, block_size, ngroup
967 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(OUT) :: ij_map
968 INTEGER,
INTENT(IN) :: color_sub
969 INTEGER,
INTENT(OUT) :: my_ij_pairs
970 LOGICAL,
INTENT(IN) :: my_open_shell_ss
971 INTEGER,
INTENT(IN) :: unit_nr
973 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_communication'
975 INTEGER :: assigned_blocks, first_i_block, first_j_block, handle, iib, ij_block_counter, &
976 ij_counter, jjb, last_i_block, last_j_block, num_block_per_group, num_ij_blocks, &
977 num_ij_blocks_beta, total_ij_block, total_ij_pairs_blocks
978 LOGICAL,
ALLOCATABLE,
DIMENSION(:, :) :: ij_marker
983 CALL timeset(routinen, handle)
985 IF (.NOT. my_open_shell_ss .AND. .NOT. my_alpha_beta_case)
THEN
986 total_ij_pairs = homo*(1 + homo)/2
987 num_ij_blocks = homo/block_size - 1
990 last_i_block = block_size*(num_ij_blocks - 1)
992 first_j_block = block_size + 1
993 last_j_block = block_size*(num_ij_blocks + 1)
996 DO iib = first_i_block, last_i_block, block_size
997 DO jjb = iib + block_size, last_j_block, block_size
998 ij_block_counter = ij_block_counter + 1
1002 total_ij_block = ij_block_counter
1003 num_block_per_group = total_ij_block/ngroup
1004 assigned_blocks = num_block_per_group*ngroup
1006 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1008 ALLOCATE (ij_marker(homo, homo))
1010 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1014 DO iib = first_i_block, last_i_block, block_size
1015 DO jjb = iib + block_size, last_j_block, block_size
1016 IF (ij_counter + 1 > assigned_blocks)
EXIT
1017 ij_counter = ij_counter + 1
1018 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1019 ij_map(1, ij_counter) = iib
1020 ij_map(2, ij_counter) = jjb
1021 ij_map(3, ij_counter) = block_size
1022 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1027 IF (ij_marker(iib, jjb))
THEN
1028 ij_counter = ij_counter + 1
1029 ij_map(1, ij_counter) = iib
1030 ij_map(2, ij_counter) = jjb
1031 ij_map(3, ij_counter) = 1
1032 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1036 DEALLOCATE (ij_marker)
1038 ELSE IF (.NOT. my_alpha_beta_case)
THEN
1041 total_ij_pairs = homo*(homo - 1)/2
1042 num_ij_blocks = (homo - 1)/block_size - 1
1045 last_i_block = block_size*(num_ij_blocks - 1)
1048 first_j_block = block_size + 2
1049 last_j_block = block_size*(num_ij_blocks + 1) + 1
1051 ij_block_counter = 0
1052 DO iib = first_i_block, last_i_block, block_size
1053 DO jjb = iib + block_size + 1, last_j_block, block_size
1054 ij_block_counter = ij_block_counter + 1
1058 total_ij_block = ij_block_counter
1059 num_block_per_group = total_ij_block/ngroup
1060 assigned_blocks = num_block_per_group*ngroup
1062 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1064 ALLOCATE (ij_marker(homo, homo))
1066 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1070 DO iib = first_i_block, last_i_block, block_size
1071 DO jjb = iib + block_size + 1, last_j_block, block_size
1072 IF (ij_counter + 1 > assigned_blocks)
EXIT
1073 ij_counter = ij_counter + 1
1074 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1075 ij_map(1, ij_counter) = iib
1076 ij_map(2, ij_counter) = jjb
1077 ij_map(3, ij_counter) = block_size
1078 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1082 DO jjb = iib + 1, homo
1083 IF (ij_marker(iib, jjb))
THEN
1084 ij_counter = ij_counter + 1
1085 ij_map(1, ij_counter) = iib
1086 ij_map(2, ij_counter) = jjb
1087 ij_map(3, ij_counter) = 1
1088 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1092 DEALLOCATE (ij_marker)
1095 total_ij_pairs = homo*homo_beta
1096 num_ij_blocks = homo/block_size
1097 num_ij_blocks_beta = homo_beta/block_size
1100 last_i_block = block_size*(num_ij_blocks - 1)
1103 last_j_block = block_size*(num_ij_blocks_beta - 1)
1105 ij_block_counter = 0
1106 DO iib = first_i_block, last_i_block, block_size
1107 DO jjb = first_j_block, last_j_block, block_size
1108 ij_block_counter = ij_block_counter + 1
1112 total_ij_block = ij_block_counter
1113 num_block_per_group = total_ij_block/ngroup
1114 assigned_blocks = num_block_per_group*ngroup
1116 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1118 ALLOCATE (ij_marker(homo, homo_beta))
1120 ALLOCATE (ij_map(3, total_ij_pairs_blocks))
1124 DO iib = first_i_block, last_i_block, block_size
1125 DO jjb = first_j_block, last_j_block, block_size
1126 IF (ij_counter + 1 > assigned_blocks)
EXIT
1127 ij_counter = ij_counter + 1
1128 ij_marker(iib:iib + block_size - 1, jjb:jjb + block_size - 1) = .false.
1129 ij_map(1, ij_counter) = iib
1130 ij_map(2, ij_counter) = jjb
1131 ij_map(3, ij_counter) = block_size
1132 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1136 DO jjb = 1, homo_beta
1137 IF (ij_marker(iib, jjb))
THEN
1138 ij_counter = ij_counter + 1
1139 ij_map(1, ij_counter) = iib
1140 ij_map(2, ij_counter) = jjb
1141 ij_map(3, ij_counter) = 1
1142 IF (mod(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1146 DEALLOCATE (ij_marker)
1149 IF (unit_nr > 0)
THEN
1150 IF (block_size == 1)
THEN
1151 WRITE (unit=unit_nr, fmt=
"(T3,A,T66,F15.1)") &
1152 "RI_INFO| Percentage of ij pairs communicated with block size 1:", 100.0_dp
1154 WRITE (unit=unit_nr, fmt=
"(T3,A,T66,F15.1)") &
1155 "RI_INFO| Percentage of ij pairs communicated with block size 1:", &
1156 100.0_dp*real((total_ij_pairs - assigned_blocks*(block_size**2)), kind=dp)/real(total_ij_pairs, kind=dp)
1158 CALL m_flush(unit_nr)
1161 CALL timestop(handle)
1163 END SUBROUTINE mp2_ri_communication
1185 SUBROUTINE mp2_ri_create_group(para_env, para_env_sub, color_sub, &
1186 sizes_array, calc_forces, &
1187 integ_group_size, my_group_L_end, &
1188 my_group_L_size, my_group_L_size_orig, my_group_L_start, my_new_group_L_size, &
1189 integ_group_pos2color_sub, &
1190 sizes_array_orig, ranges_info_array, comm_exchange, comm_rep, num_integ_group)
1191 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1192 INTEGER,
INTENT(IN) :: color_sub
1193 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(INOUT) :: sizes_array
1194 LOGICAL,
INTENT(IN) :: calc_forces
1195 INTEGER,
INTENT(IN) :: integ_group_size, my_group_l_end
1196 INTEGER,
INTENT(INOUT) :: my_group_l_size
1197 INTEGER,
INTENT(OUT) :: my_group_l_size_orig
1198 INTEGER,
INTENT(IN) :: my_group_l_start
1199 INTEGER,
INTENT(INOUT) :: my_new_group_l_size
1200 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT) :: integ_group_pos2color_sub, &
1202 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :), &
1203 INTENT(OUT) :: ranges_info_array
1204 TYPE(mp_comm_type),
INTENT(OUT) :: comm_exchange, comm_rep
1205 INTEGER,
INTENT(IN) :: num_integ_group
1207 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_create_group'
1209 INTEGER :: handle, iib, proc_receive, proc_shift, &
1211 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: new_sizes_array, rep_ends_array, &
1212 rep_sizes_array, rep_starts_array
1213 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: my_info
1215 CALL timeset(routinen, handle)
1217 sub_sub_color = para_env_sub%mepos*num_integ_group + color_sub/integ_group_size
1218 CALL comm_exchange%from_split(para_env, sub_sub_color)
1221 sub_sub_color = para_env_sub%mepos*comm_exchange%num_pe + comm_exchange%mepos
1222 CALL comm_rep%from_split(para_env, sub_sub_color)
1228 ALLOCATE (rep_ends_array(0:comm_rep%num_pe - 1))
1229 ALLOCATE (rep_starts_array(0:comm_rep%num_pe - 1))
1230 ALLOCATE (rep_sizes_array(0:comm_rep%num_pe - 1))
1232 CALL comm_rep%allgather(my_group_l_size, rep_sizes_array)
1233 CALL comm_rep%allgather(my_group_l_start, rep_starts_array)
1234 CALL comm_rep%allgather(my_group_l_end, rep_ends_array)
1237 my_new_group_l_size = my_group_l_size
1240 ALLOCATE (my_info(4, 0:comm_rep%num_pe - 1))
1241 my_info(1, 0) = my_group_l_start
1242 my_info(2, 0) = my_group_l_end
1244 my_info(4, 0) = my_group_l_size
1246 DO proc_shift = 1, comm_rep%num_pe - 1
1247 proc_receive =
modulo(comm_rep%mepos - proc_shift, comm_rep%num_pe)
1249 my_new_group_l_size = my_new_group_l_size + rep_sizes_array(proc_receive)
1251 my_info(1, proc_shift) = rep_starts_array(proc_receive)
1252 my_info(2, proc_shift) = rep_ends_array(proc_receive)
1253 my_info(3, proc_shift) = my_info(4, proc_shift - 1) + 1
1254 my_info(4, proc_shift) = my_new_group_l_size
1258 ALLOCATE (new_sizes_array(0:comm_exchange%num_pe - 1))
1259 ALLOCATE (ranges_info_array(4, 0:comm_rep%num_pe - 1, 0:comm_exchange%num_pe - 1))
1260 CALL comm_exchange%allgather(my_new_group_l_size, new_sizes_array)
1261 CALL comm_exchange%allgather(my_info, ranges_info_array)
1263 DEALLOCATE (rep_sizes_array)
1264 DEALLOCATE (rep_starts_array)
1265 DEALLOCATE (rep_ends_array)
1267 ALLOCATE (integ_group_pos2color_sub(0:comm_exchange%num_pe - 1))
1268 CALL comm_exchange%allgather(color_sub, integ_group_pos2color_sub)
1270 IF (calc_forces)
THEN
1271 iib =
SIZE(sizes_array)
1272 ALLOCATE (sizes_array_orig(0:iib - 1))
1273 sizes_array_orig(:) = sizes_array
1276 my_group_l_size_orig = my_group_l_size
1277 my_group_l_size = my_new_group_l_size
1278 DEALLOCATE (sizes_array)
1280 ALLOCATE (sizes_array(0:integ_group_size - 1))
1281 sizes_array(:) = new_sizes_array
1283 DEALLOCATE (new_sizes_array)
1285 CALL timestop(handle)
1287 END SUBROUTINE mp2_ri_create_group
1305 SUBROUTINE mp2_ri_get_integ_group_size(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
1306 homo, dimen_RI, unit_nr, &
1308 ngroup, num_integ_group, &
1309 virtual, calc_forces)
1310 TYPE(mp2_type) :: mp2_env
1311 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1312 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_array
1313 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
1314 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
1315 INTEGER,
INTENT(IN) :: dimen_ri, unit_nr
1316 INTEGER,
INTENT(OUT) :: integ_group_size, ngroup, num_integ_group
1317 INTEGER,
DIMENSION(:),
INTENT(IN) :: virtual
1318 LOGICAL,
INTENT(IN) :: calc_forces
1320 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_get_integ_group_size'
1322 INTEGER :: block_size, handle, iib, &
1323 max_repl_group_size, &
1324 min_integ_group_size
1325 INTEGER(KIND=int_8) :: mem
1326 LOGICAL :: calc_group_size
1327 REAL(kind=dp) :: factor, mem_base, mem_min, mem_per_blk, &
1328 mem_per_repl, mem_per_repl_blk, &
1331 CALL timeset(routinen, handle)
1333 ngroup = para_env%num_pe/para_env_sub%num_pe
1335 calc_group_size = mp2_env%ri_mp2%number_integration_groups <= 0
1336 IF (.NOT. calc_group_size)
THEN
1337 IF (mod(ngroup, mp2_env%ri_mp2%number_integration_groups) /= 0) calc_group_size = .true.
1340 IF (calc_group_size)
THEN
1342 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
1343 CALL para_env%min(mem_real)
1346 mem_per_blk = 0.0_dp
1347 mem_per_repl = 0.0_dp
1348 mem_per_repl_blk = 0.0_dp
1351 mem_per_repl = mem_per_repl + maxval(max(real(homo, kind=dp)*maxsize(gd_array), real(dimen_ri, kind=dp))* &
1352 maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1354 mem_per_repl = mem_per_repl + sum(real(homo, kind=dp)*maxsize(gd_b_virtual))*maxsize(gd_array)*8.0_dp/(1024**2)
1356 mem_per_repl_blk = mem_per_repl_blk + real(maxval(maxsize(gd_b_virtual)), kind=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
1358 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*real(dimen_ri, kind=dp)*8.0_dp/(1024**2)
1360 mem_base = mem_base + maxval(real(virtual, kind=dp)*maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1362 mem_base = mem_base + real(max(dimen_ri, maxval(virtual)), kind=dp)*maxval(maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1364 IF (calc_forces)
THEN
1366 mem_per_repl = mem_per_repl + sum(real(homo, kind=dp)*maxsize(gd_array)* &
1367 maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1369 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*dimen_ri*8.0_dp/(1024**2)
1371 mem_base = mem_base + real(maxval(maxsize(gd_b_virtual)), kind=dp)*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1373 mem_base = mem_base + sum(real(homo, kind=dp)*homo)*8.0_dp/(1024**2)
1375 mem_base = mem_base + sum(real(virtual, kind=dp)*maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1377 mem_base = mem_base + real(max(dimen_ri, maxval(virtual)), kind=dp)*maxval(maxsize(gd_b_virtual))*8.0_dp/(1024**2)
1381 block_size = max(1, min(floor(sqrt(real(minval(homo), kind=dp))), floor(minval(homo)/sqrt(2.0_dp*ngroup))))
1382 IF (mp2_env%ri_mp2%block_size > 0) block_size = mp2_env%ri_mp2%block_size
1384 mem_min = mem_base + mem_per_repl + (mem_per_blk + mem_per_repl_blk)*block_size
1386 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T68,F9.2,A4)')
'RI_INFO| Minimum available memory per MPI process:', &
1388 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T68,F9.2,A4)')
'RI_INFO| Minimum required memory per MPI process:', &
1409 integ_group_size = ngroup
1412 factor = real(sum(homo*virtual), kind=dp) &
1413 - sum((real(maxval(homo), kind=dp)/block_size + block_size - 2.0_dp)*homo*virtual)/ngroup
1414 IF (
SIZE(homo) == 2) factor = factor - 2.0_dp*product(homo)/block_size/ngroup*sum(homo*virtual)
1416 IF (factor <= 0.0_dp)
THEN
1418 max_repl_group_size = min(max(floor((mem_real - mem_base - mem_per_blk*block_size)/ &
1419 (mem_per_repl + mem_per_repl_blk*block_size)), 1), ngroup)
1421 min_integ_group_size = ngroup/max_repl_group_size
1424 DO iib = max(min(min_integ_group_size, ngroup), 1), ngroup
1426 IF (mod(ngroup, iib) == 0)
THEN
1427 integ_group_size = iib
1430 integ_group_size = integ_group_size + 1
1434 integ_group_size = ngroup/mp2_env%ri_mp2%number_integration_groups
1437 IF (unit_nr > 0)
THEN
1438 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
1439 "RI_INFO| Group size for integral replication:", integ_group_size*para_env_sub%num_pe
1440 CALL m_flush(unit_nr)
1443 num_integ_group = ngroup/integ_group_size
1445 CALL timestop(handle)
1447 END SUBROUTINE mp2_ri_get_integ_group_size
1467 SUBROUTINE mp2_ri_get_block_size(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
1468 homo, virtual, dimen_RI, unit_nr, &
1469 block_size, ngroup, num_integ_group, &
1470 my_open_shell_ss, calc_forces, buffer_1D)
1471 TYPE(mp2_type) :: mp2_env
1472 TYPE(mp_para_env_type),
INTENT(IN) :: para_env, para_env_sub
1473 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_array
1474 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
1475 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
1476 INTEGER,
INTENT(IN) :: dimen_ri, unit_nr
1477 INTEGER,
INTENT(OUT) :: block_size, ngroup
1478 INTEGER,
INTENT(IN) :: num_integ_group
1479 LOGICAL,
INTENT(IN) :: my_open_shell_ss, calc_forces
1480 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1481 INTENT(OUT) :: buffer_1d
1483 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_ri_get_block_size'
1485 INTEGER :: best_block_size, handle, num_ij_blocks
1486 INTEGER(KIND=int_8) :: buffer_size, mem
1487 REAL(kind=dp) :: mem_base, mem_per_blk, mem_per_repl_blk, &
1490 CALL timeset(routinen, handle)
1492 ngroup = para_env%num_pe/para_env_sub%num_pe
1495 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
1496 CALL para_env%min(mem_real)
1499 mem_per_blk = 0.0_dp
1500 mem_per_repl_blk = 0.0_dp
1503 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1505 mem_per_repl_blk = mem_per_repl_blk + real(maxval(maxsize(gd_b_virtual)), kind=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
1507 mem_per_blk = mem_per_blk + 2.0_dp*maxval(maxsize(gd_b_virtual))*real(dimen_ri, kind=dp)*8.0_dp/(1024**2)
1509 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1511 IF (calc_forces)
THEN
1513 mem_per_blk = mem_per_blk + 3.0_dp*maxval(maxsize(gd_b_virtual))*dimen_ri*8.0_dp/(1024**2)
1515 mem_base = mem_base + maxval(maxsize(gd_b_virtual))*max(dimen_ri, maxval(virtual))*8.0_dp/(1024**2)
1521 IF (mp2_env%ri_mp2%block_size > 0)
THEN
1522 best_block_size = mp2_env%ri_mp2%block_size
1524 best_block_size = max(floor((mem_real - mem_base)/(mem_per_blk + mem_per_repl_blk*ngroup/num_integ_group)), 1)
1527 IF (
SIZE(homo) == 1)
THEN
1528 IF (.NOT. my_open_shell_ss)
THEN
1529 num_ij_blocks = (homo(1)/best_block_size)
1530 num_ij_blocks = (num_ij_blocks*num_ij_blocks - num_ij_blocks)/2
1532 num_ij_blocks = ((homo(1) - 1)/best_block_size)
1533 num_ij_blocks = (num_ij_blocks*num_ij_blocks - num_ij_blocks)/2
1536 num_ij_blocks = product(homo/best_block_size)
1539 IF ((num_ij_blocks >= ngroup .AND. num_ij_blocks > 0) .OR. best_block_size == 1)
THEN
1542 best_block_size = best_block_size - 1
1546 IF (
SIZE(homo) == 1)
THEN
1547 IF (my_open_shell_ss)
THEN
1550 best_block_size = min(floor(sqrt(real(homo(1) - 1, kind=dp))), best_block_size)
1553 best_block_size = min(floor(sqrt(real(homo(1), kind=dp))), best_block_size)
1557 block_size = max(1, best_block_size)
1559 IF (unit_nr > 0)
THEN
1560 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
1561 "RI_INFO| Block size:", block_size
1562 CALL m_flush(unit_nr)
1566 buffer_size = max(int(maxsize(gd_array), kind=int_8)*block_size, int(max(dimen_ri, maxval(virtual)), kind=int_8)) &
1567 *maxval(maxsize(gd_b_virtual))
1569 IF (calc_forces) buffer_size = buffer_size*2
1570 ALLOCATE (buffer_1d(buffer_size))
1572 CALL timestop(handle)
1574 END SUBROUTINE mp2_ri_get_block_size
1605 SUBROUTINE mp2_update_p_gamma(mp2_env, para_env_sub, gd_B_virtual, &
1606 Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, &
1607 my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, local_ab, &
1608 t_ab, my_local_i_aL, my_local_j_aL, open_ss, Y_i_aP, Y_j_aP, &
1609 local_ba, ispin, jspin, dgemm_counter, buffer_1D)
1610 TYPE(mp2_type) :: mp2_env
1611 TYPE(mp_para_env_type),
INTENT(IN) :: para_env_sub
1612 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
1613 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
1614 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
1615 INTEGER,
INTENT(IN) :: dimen_ri, iib, jjb
1616 INTEGER,
DIMENSION(:),
INTENT(IN) :: my_b_size, my_b_virtual_end, &
1618 INTEGER,
INTENT(IN) :: my_i, my_j
1619 INTEGER,
DIMENSION(:),
INTENT(IN) :: virtual
1620 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1621 INTENT(INOUT),
TARGET :: local_ab
1622 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1623 INTENT(IN),
TARGET :: t_ab, my_local_i_al, my_local_j_al
1624 LOGICAL,
INTENT(IN) :: open_ss
1625 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1626 INTENT(INOUT),
TARGET :: y_i_ap, y_j_ap, local_ba
1627 INTEGER,
INTENT(IN) :: ispin, jspin
1628 TYPE(dgemm_counter_type),
INTENT(INOUT) :: dgemm_counter
1629 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:),
TARGET :: buffer_1d
1631 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_update_P_gamma'
1633 INTEGER :: a, b, b_global, handle, proc_receive, proc_send, proc_shift, rec_b_size, &
1634 rec_b_virtual_end, rec_b_virtual_start, send_b_size, send_b_virtual_end, &
1635 send_b_virtual_start
1636 INTEGER(KIND=int_8) :: offset
1637 LOGICAL :: alpha_beta
1638 REAL(kind=dp) :: factor, p_ij_diag
1639 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :), &
1640 POINTER :: external_ab, send_ab
1642 CALL timeset(routinen//
"_Pia", handle)
1644 alpha_beta = .NOT. (ispin == jspin)
1651 DO b = 1, my_b_size(jspin)
1652 b_global = b + my_b_virtual_start(jspin) - 1
1653 DO a = 1, virtual(ispin)
1654 local_ab(a, b) = -local_ab(a, b)/ &
1655 (eigenval(homo(ispin) + a, ispin) + eigenval(homo(jspin) + b_global, jspin) - &
1656 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
1659 IF (.NOT. (alpha_beta))
THEN
1660 p_ij_diag = -sum(local_ab*t_ab)*factor
1663 p_ij_diag = -sum(local_ab*local_ab)*mp2_env%scale_S
1665 DO b = 1, my_b_size(ispin)
1666 b_global = b + my_b_virtual_start(ispin) - 1
1667 DO a = 1, virtual(jspin)
1668 local_ba(a, b) = -local_ba(a, b)/ &
1669 (eigenval(homo(jspin) + a, jspin) + eigenval(homo(ispin) + b_global, ispin) - &
1670 eigenval(my_i + iib - 1, ispin) - eigenval(my_j + jjb - 1, jspin))
1677 CALL dgemm_counter_start(dgemm_counter)
1678 IF (.NOT. (alpha_beta))
THEN
1679 CALL local_gemm(
'T',
'N', my_b_size(ispin), my_b_size(ispin), virtual(ispin), 1.0_dp, &
1680 t_ab, virtual(ispin), local_ab, virtual(ispin), &
1681 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, &
1682 my_b_virtual_start(ispin):my_b_virtual_end(ispin)), my_b_size(ispin), mp2_env%local_gemm_ctx)
1683 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) = &
1684 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) + p_ij_diag
1686 CALL local_gemm(
'T',
'N', my_b_size(ispin), my_b_size(ispin), virtual(jspin), mp2_env%scale_S, &
1687 local_ba, virtual(jspin), local_ba, virtual(jspin), 1.0_dp, &
1688 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_virtual_start(ispin):my_b_virtual_end(ispin)), my_b_size(ispin), &
1689 mp2_env%local_gemm_ctx)
1691 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) = &
1692 mp2_env%ri_grad%P_ij(ispin)%array(my_i + iib - 1, my_i + iib - 1) + p_ij_diag
1694 CALL local_gemm(
'T',
'N', my_b_size(jspin), my_b_size(jspin), virtual(ispin), mp2_env%scale_S, &
1695 local_ab, virtual(ispin), local_ab, virtual(ispin), 1.0_dp, &
1696 mp2_env%ri_grad%P_ab(jspin)%array(:, my_b_virtual_start(jspin):my_b_virtual_end(jspin)), my_b_size(jspin), &
1697 mp2_env%local_gemm_ctx)
1699 mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjb - 1, my_j + jjb - 1) = &
1700 mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjb - 1, my_j + jjb - 1) + p_ij_diag
1705 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta))
THEN
1707 CALL local_gemm(
'N',
'T', my_b_size(ispin), virtual(ispin), my_b_size(ispin), 1.0_dp, &
1708 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1709 local_ab, virtual(ispin), &
1710 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array, my_b_size(ispin), &
1711 mp2_env%local_gemm_ctx)
1713 mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjb - 1, my_j + jjb - 1) = &
1714 mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjb - 1, my_j + jjb - 1) + p_ij_diag
1716 DO proc_shift = 1, para_env_sub%num_pe - 1
1717 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1718 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1720 CALL get_group_dist(gd_b_virtual(jspin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1721 CALL get_group_dist(gd_b_virtual(jspin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1723 external_ab(1:virtual(ispin), 1:rec_b_size) => buffer_1d(1:int(virtual(ispin), int_8)*rec_b_size)
1724 external_ab = 0.0_dp
1726 CALL para_env_sub%sendrecv(local_ab, proc_send, &
1727 external_ab, proc_receive)
1729 IF (.NOT. (alpha_beta))
THEN
1730 CALL local_gemm(
'T',
'N', my_b_size(ispin), rec_b_size, virtual(ispin), 1.0_dp, &
1731 t_ab, virtual(ispin), external_ab, virtual(ispin), &
1732 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_b_virtual_start:rec_b_virtual_end), my_b_size(ispin), &
1733 mp2_env%local_gemm_ctx)
1735 CALL local_gemm(
'T',
'N', my_b_size(jspin), rec_b_size, virtual(ispin), mp2_env%scale_S, &
1736 local_ab, virtual(ispin), external_ab, virtual(ispin), &
1737 1.0_dp, mp2_env%ri_grad%P_ab(jspin)%array(:, rec_b_virtual_start:rec_b_virtual_end), &
1738 my_b_size(jspin), mp2_env%local_gemm_ctx)
1742 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1743 CALL get_group_dist(gd_b_virtual(ispin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1744 external_ab(1:virtual(jspin), 1:rec_b_size) => buffer_1d(1:int(virtual(jspin), int_8)*rec_b_size)
1745 external_ab = 0.0_dp
1746 CALL para_env_sub%sendrecv(local_ba, proc_send, &
1747 external_ab, proc_receive)
1748 CALL local_gemm(
'T',
'N', my_b_size(ispin), rec_b_size, virtual(jspin), mp2_env%scale_S, &
1749 local_ba, virtual(jspin), external_ab, virtual(jspin), &
1750 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_b_virtual_start:rec_b_virtual_end), my_b_size(ispin), &
1751 mp2_env%local_gemm_ctx)
1754 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta))
THEN
1755 external_ab(1:my_b_size(ispin), 1:virtual(ispin)) => &
1756 buffer_1d(1:int(virtual(ispin), int_8)*my_b_size(ispin))
1757 external_ab = 0.0_dp
1759 offset = int(virtual(ispin), int_8)*my_b_size(ispin)
1761 send_ab(1:send_b_size, 1:virtual(ispin)) => buffer_1d(offset + 1:offset + int(send_b_size, int_8)*virtual(ispin))
1764 CALL local_gemm(
'N',
'T', send_b_size, virtual(ispin), my_b_size(ispin), 1.0_dp, &
1765 t_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1766 local_ab, virtual(ispin), &
1767 0.0_dp, send_ab, send_b_size, &
1768 mp2_env%local_gemm_ctx)
1769 CALL para_env_sub%sendrecv(send_ab, proc_send, &
1770 external_ab, proc_receive)
1772 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = mp2_env%ri_grad%P_ab(ispin)%array + external_ab
1776 IF (.NOT. alpha_beta)
THEN
1777 IF (my_i /= my_j)
THEN
1778 CALL dgemm_counter_stop(dgemm_counter, 2*my_b_size(ispin), virtual(ispin), virtual(ispin))
1780 CALL dgemm_counter_stop(dgemm_counter, my_b_size(ispin), virtual(ispin), virtual(ispin))
1783 CALL dgemm_counter_stop(dgemm_counter, sum(my_b_size), virtual(ispin), virtual(jspin))
1785 CALL timestop(handle)
1789 CALL timeset(routinen//
"_Gamma", handle)
1790 CALL dgemm_counter_start(dgemm_counter)
1791 IF (.NOT. alpha_beta)
THEN
1793 CALL local_gemm(
'N',
'T', my_b_size(ispin), dimen_ri, my_b_size(ispin), 1.0_dp, &
1794 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1795 my_local_j_al, dimen_ri, 1.0_dp, y_i_ap, my_b_size(ispin), &
1796 mp2_env%local_gemm_ctx)
1798 CALL local_gemm(
'N',
'T', my_b_size(ispin), dimen_ri, my_b_size(jspin), mp2_env%scale_S, &
1799 local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1800 my_local_j_al, dimen_ri, 1.0_dp, y_i_ap, my_b_size(ispin), mp2_env%local_gemm_ctx)
1801 CALL local_gemm(
'T',
'T', my_b_size(jspin), dimen_ri, my_b_size(ispin), mp2_env%scale_S, &
1802 local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1803 my_local_i_al, dimen_ri, 1.0_dp, y_j_ap, my_b_size(jspin), mp2_env%local_gemm_ctx)
1806 IF (para_env_sub%num_pe > 1)
THEN
1807 external_ab(1:my_b_size(ispin), 1:dimen_ri) => buffer_1d(1:int(my_b_size(ispin), int_8)*dimen_ri)
1808 external_ab = 0.0_dp
1810 offset = int(my_b_size(ispin), int_8)*dimen_ri
1813 DO proc_shift = 1, para_env_sub%num_pe - 1
1814 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1815 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1817 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1818 CALL get_group_dist(gd_b_virtual(ispin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1820 send_ab(1:send_b_size, 1:dimen_ri) => buffer_1d(offset + 1:offset + int(dimen_ri, int_8)*send_b_size)
1822 IF (.NOT. alpha_beta)
THEN
1823 CALL local_gemm(
'N',
'T', send_b_size, dimen_ri, my_b_size(ispin), 1.0_dp, &
1824 t_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1825 my_local_j_al, dimen_ri, 0.0_dp, send_ab, send_b_size, &
1826 mp2_env%local_gemm_ctx)
1827 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1829 y_i_ap(:, :) = y_i_ap + external_ab
1833 CALL local_gemm(
'N',
'T', send_b_size, dimen_ri, my_b_size(jspin), mp2_env%scale_S, &
1834 local_ab(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1835 my_local_j_al, dimen_ri, 0.0_dp, send_ab, send_b_size, &
1836 mp2_env%local_gemm_ctx)
1837 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1838 y_i_ap(:, :) = y_i_ap + external_ab
1842 IF (alpha_beta)
THEN
1844 IF (para_env_sub%num_pe > 1)
THEN
1845 external_ab(1:my_b_size(jspin), 1:dimen_ri) => buffer_1d(1:int(my_b_size(jspin), int_8)*dimen_ri)
1846 external_ab = 0.0_dp
1848 offset = int(my_b_size(jspin), int_8)*dimen_ri
1850 DO proc_shift = 1, para_env_sub%num_pe - 1
1851 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1852 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1854 CALL get_group_dist(gd_b_virtual(jspin), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
1855 send_ab(1:send_b_size, 1:dimen_ri) => buffer_1d(offset + 1:offset + int(dimen_ri, int_8)*send_b_size)
1857 CALL local_gemm(
'N',
'T', send_b_size, dimen_ri, my_b_size(ispin), mp2_env%scale_S, &
1858 local_ba(send_b_virtual_start:send_b_virtual_end, :), send_b_size, &
1859 my_local_i_al, dimen_ri, 0.0_dp, send_ab, send_b_size, &
1860 mp2_env%local_gemm_ctx)
1861 CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive)
1862 y_j_ap(:, :) = y_j_ap + external_ab
1867 CALL dgemm_counter_stop(dgemm_counter, 3*virtual(ispin), dimen_ri, my_b_size(jspin))
1869 CALL dgemm_counter_stop(dgemm_counter, virtual(ispin), dimen_ri, my_b_size(ispin))
1872 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta))
THEN
1874 CALL dgemm_counter_start(dgemm_counter)
1875 CALL local_gemm(
'T',
'T', my_b_size(ispin), dimen_ri, my_b_size(ispin), 1.0_dp, &
1876 t_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), :), my_b_size(ispin), &
1877 my_local_i_al, dimen_ri, 1.0_dp, y_j_ap, my_b_size(ispin), &
1878 mp2_env%local_gemm_ctx)
1879 DO proc_shift = 1, para_env_sub%num_pe - 1
1880 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1881 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1883 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
1885 external_ab(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, int_8)*rec_b_size)
1886 external_ab = 0.0_dp
1888 CALL para_env_sub%sendrecv(my_local_i_al, proc_send, &
1889 external_ab, proc_receive)
1892 CALL local_gemm(
'T',
'T', my_b_size(ispin), dimen_ri, rec_b_size, 1.0_dp, &
1893 t_ab(rec_b_virtual_start:rec_b_virtual_end, :), rec_b_size, &
1894 external_ab, dimen_ri, 1.0_dp, y_j_ap, my_b_size(ispin), mp2_env%local_gemm_ctx)
1897 CALL dgemm_counter_stop(dgemm_counter, my_b_size(ispin), dimen_ri, virtual(ispin))
1900 CALL timestop(handle)
1901 END SUBROUTINE mp2_update_p_gamma
1924 SUBROUTINE mp2_redistribute_gamma(Gamma_P_ia, ij_index, my_B_size, &
1925 my_block_size, my_group_L_size, my_i, my_ij_pairs, ngroup, &
1926 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, &
1927 ij_map, ranges_info_array, Y_i_aP, comm_exchange, &
1928 sizes_array, spin, buffer_1D)
1930 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: gamma_p_ia
1931 INTEGER,
INTENT(IN) :: ij_index, my_b_size, my_block_size, &
1932 my_group_l_size, my_i, my_ij_pairs, &
1933 ngroup, num_integ_group
1934 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: integ_group_pos2color_sub, num_ij_pairs
1935 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(IN) :: ij_map
1936 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :), &
1937 INTENT(IN) :: ranges_info_array
1938 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(IN) :: y_i_ap
1939 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange
1940 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: sizes_array
1941 INTEGER,
INTENT(IN) :: spin
1942 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:),
TARGET :: buffer_1d
1944 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_redistribute_gamma'
1946 INTEGER :: end_point, handle, handle2, iib, ij_counter_rec, irep, kkk, lll, lstart_pos, &
1947 proc_receive, proc_send, proc_shift, rec_i, rec_ij_index, send_l_size, start_point, tag
1948 INTEGER(KIND=int_8) :: offset
1949 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:, :, :), &
1950 POINTER :: bi_c_rec, bi_c_send
1954 CALL timeset(routinen//
"_comm2", handle)
1958 IF (ij_index <= my_ij_pairs)
THEN
1961 CALL timeset(routinen//
"_comm2_w", handle2)
1962 DO irep = 0, num_integ_group - 1
1963 lstart_pos = ranges_info_array(1, irep, comm_exchange%mepos)
1964 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
1965 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
1969 DO kkk = start_point, end_point
1970 lll = kkk - start_point + lstart_pos
1971 DO iib = 1, my_block_size
1972 gamma_p_ia(1:my_b_size, my_i + iib - 1, kkk) = &
1973 gamma_p_ia(1:my_b_size, my_i + iib - 1, kkk) + &
1974 y_i_ap(1:my_b_size, lll, iib)
1979 CALL timestop(handle2)
1983 DO proc_shift = 1, comm_exchange%num_pe - 1
1984 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
1985 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
1987 send_l_size = sizes_array(proc_send)
1988 bi_c_send(1:my_b_size, 1:my_block_size, 1:send_l_size) => &
1989 buffer_1d(1:int(my_b_size, int_8)*my_block_size*send_l_size)
1991 offset = int(my_b_size, int_8)*my_block_size*send_l_size
1993 CALL timeset(routinen//
"_comm2_w", handle2)
1995 DO irep = 0, num_integ_group - 1
1996 lstart_pos = ranges_info_array(1, irep, proc_send)
1997 start_point = ranges_info_array(3, irep, proc_send)
1998 end_point = ranges_info_array(4, irep, proc_send)
2002 DO kkk = start_point, end_point
2003 lll = kkk - start_point + lstart_pos
2004 DO iib = 1, my_block_size
2005 bi_c_send(1:my_b_size, iib, kkk) = y_i_ap(1:my_b_size, lll, iib)
2010 CALL timestop(handle2)
2012 rec_ij_index = num_ij_pairs(proc_receive)
2014 IF (ij_index <= rec_ij_index)
THEN
2017 (ij_index - min(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
2019 rec_i = ij_map(spin, ij_counter_rec)
2021 bi_c_rec(1:my_b_size, 1:my_block_size, 1:my_group_l_size) => &
2022 buffer_1d(offset + 1:offset + int(my_b_size, int_8)*my_block_size*my_group_l_size)
2025 CALL comm_exchange%sendrecv(bi_c_send, proc_send, &
2026 bi_c_rec, proc_receive, tag)
2028 CALL timeset(routinen//
"_comm2_w", handle2)
2029 DO irep = 0, num_integ_group - 1
2030 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
2031 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
2034 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) = &
2035 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) + &
2036 bi_c_rec(1:my_b_size, :, start_point:end_point)
2039 CALL timestop(handle2)
2043 CALL comm_exchange%send(bi_c_send, proc_send, tag)
2051 DO proc_shift = 1, comm_exchange%num_pe - 1
2052 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2053 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2054 rec_ij_index = num_ij_pairs(proc_receive)
2056 IF (ij_index <= rec_ij_index)
THEN
2059 (ij_index - min(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
2061 rec_i = ij_map(spin, ij_counter_rec)
2063 bi_c_rec(1:my_b_size, 1:my_block_size, 1:my_group_l_size) => &
2064 buffer_1d(1:int(my_b_size, int_8)*my_block_size*my_group_l_size)
2068 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2070 CALL timeset(routinen//
"_comm2_w", handle2)
2071 DO irep = 0, num_integ_group - 1
2072 start_point = ranges_info_array(3, irep, comm_exchange%mepos)
2073 end_point = ranges_info_array(4, irep, comm_exchange%mepos)
2076 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) = &
2077 gamma_p_ia(:, rec_i:rec_i + my_block_size - 1, start_point:end_point) + &
2078 bi_c_rec(1:my_b_size, :, start_point:end_point)
2081 CALL timestop(handle2)
2087 CALL timestop(handle)
2089 END SUBROUTINE mp2_redistribute_gamma
2118 SUBROUTINE quasi_degenerate_p_ij(mp2_env, Eigenval, homo, virtual, open_shell, &
2119 beta_beta, Bib_C, unit_nr, dimen_RI, &
2120 my_B_size, ngroup, my_group_L_size, &
2121 color_sub, ranges_info_array, comm_exchange, para_env_sub, para_env, &
2122 my_B_virtual_start, my_B_virtual_end, sizes_array, gd_B_virtual, &
2123 integ_group_pos2color_sub, dgemm_counter, buffer_1D)
2124 TYPE(mp2_type) :: mp2_env
2125 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
2126 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
2127 LOGICAL,
INTENT(IN) :: open_shell, beta_beta
2128 TYPE(three_dim_real_array),
DIMENSION(:), &
2130 INTEGER,
INTENT(IN) :: unit_nr, dimen_ri
2131 INTEGER,
DIMENSION(:),
INTENT(IN) :: my_b_size
2132 INTEGER,
INTENT(IN) :: ngroup, my_group_l_size, color_sub
2133 INTEGER,
ALLOCATABLE,
DIMENSION(:, :, :), &
2134 INTENT(IN) :: ranges_info_array
2135 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange
2136 TYPE(mp_para_env_type),
INTENT(IN) :: para_env_sub, para_env
2137 INTEGER,
DIMENSION(:),
INTENT(IN) :: my_b_virtual_start, my_b_virtual_end
2138 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: sizes_array
2139 TYPE(group_dist_d1_type),
DIMENSION(:),
INTENT(IN) :: gd_b_virtual
2140 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(IN) :: integ_group_pos2color_sub
2141 TYPE(dgemm_counter_type),
INTENT(INOUT) :: dgemm_counter
2142 REAL(kind=dp),
CONTIGUOUS,
DIMENSION(:),
TARGET :: buffer_1d
2144 CHARACTER(LEN=*),
PARAMETER :: routinen =
'quasi_degenerate_P_ij'
2146 INTEGER :: a, a_global, b, b_global, block_size, decil, handle, handle2, ijk_counter, &
2147 ijk_counter_send, ijk_index, ispin, kkb, kspin, max_block_size, max_ijk, my_i, my_ijk, &
2148 my_j, my_k, my_last_k(2), my_virtual, nspins, proc_receive, proc_send, proc_shift, &
2149 rec_b_size, rec_b_virtual_end, rec_b_virtual_start, rec_l_size, send_b_size, &
2150 send_b_virtual_end, send_b_virtual_start, send_i, send_ijk_index, send_j, send_k, &
2151 size_b_i, size_b_k, tag, tag2
2152 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_ijk
2153 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: ijk_map, send_last_k
2154 LOGICAL :: alpha_beta, do_recv_i, do_recv_j, &
2155 do_recv_k, do_send_i, do_send_j, &
2157 REAL(kind=dp) :: amp_fac, p_ij_elem, t_new, t_start
2158 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :), &
2159 TARGET :: local_ab, local_al_i, local_al_j, t_ab
2160 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: local_al_k
2161 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: bi_c_rec, external_ab, external_al
2162 REAL(kind=dp),
DIMENSION(:, :, :),
POINTER :: bi_c_rec_3d
2164 CALL timeset(routinen//
"_ij_sing", handle)
2169 nspins =
SIZE(bib_c)
2170 alpha_beta = (nspins == 2)
2173 amp_fac = mp2_env%scale_S + mp2_env%scale_T
2174 IF (open_shell) amp_fac = mp2_env%scale_T
2176 ALLOCATE (send_last_k(2, comm_exchange%num_pe - 1))
2179 DO ispin = 1, nspins
2180 size_b_i = my_b_size(ispin)
2181 IF (ispin == 1 .AND. alpha_beta)
THEN
2186 size_b_k = my_b_size(kspin)
2190 CALL find_quasi_degenerate_ij(my_ijk, homo(ispin), homo(kspin), eigenval(:, ispin), mp2_env, ijk_map, unit_nr, ngroup, &
2191 .NOT. beta_beta .AND. ispin /= 2, comm_exchange, num_ijk, max_ijk, color_sub, &
2192 SIZE(buffer_1d), my_group_l_size, size_b_k, para_env, virtual(ispin), size_b_i)
2194 my_virtual = virtual(ispin)
2195 IF (
SIZE(ijk_map, 2) > 0)
THEN
2196 max_block_size = ijk_map(4, 1)
2201 ALLOCATE (local_al_i(dimen_ri, size_b_i))
2202 ALLOCATE (local_al_j(dimen_ri, size_b_i))
2203 ALLOCATE (local_al_k(dimen_ri, size_b_k, max_block_size))
2204 ALLOCATE (t_ab(my_virtual, size_b_k))
2209 t_start = m_walltime()
2210 DO ijk_index = 1, max_ijk
2213 IF (unit_nr > 0 .AND. ijk_index > 1)
THEN
2214 decil = ijk_index*10/max_ijk
2215 IF (decil /= (ijk_index - 1)*10/max_ijk)
THEN
2216 t_new = m_walltime()
2217 t_new = (t_new - t_start)/60.0_dp*(max_ijk - ijk_index + 1)/(ijk_index - 1)
2218 WRITE (unit_nr, fmt=
"(T3,A)")
"Percentage of finished loop: "// &
2219 cp_to_string(decil*10)//
". Minutes left: "//cp_to_string(t_new)
2220 CALL m_flush(unit_nr)
2224 IF (ijk_index <= my_ijk)
THEN
2226 ijk_counter = (ijk_index - min(1, color_sub))*ngroup + color_sub
2227 my_i = ijk_map(1, ijk_counter)
2228 my_j = ijk_map(2, ijk_counter)
2229 my_k = ijk_map(3, ijk_counter)
2230 block_size = ijk_map(4, ijk_counter)
2232 do_recv_i = (ispin /= kspin) .OR. my_i < my_k .OR. my_i > my_k + block_size - 1
2233 do_recv_j = (ispin /= kspin) .OR. my_j < my_k .OR. my_j > my_k + block_size - 1
2234 do_recv_k = my_k /= my_last_k(1) .OR. my_k + block_size - 1 /= my_last_k(2)
2236 my_last_k(2) = my_k + block_size - 1
2240 CALL fill_local_i_al_2d(local_al_i, ranges_info_array(:, :, comm_exchange%mepos), &
2241 bib_c(ispin)%array(:, :, my_i))
2246 CALL fill_local_i_al_2d(local_al_j, ranges_info_array(:, :, comm_exchange%mepos), &
2247 bib_c(ispin)%array(:, :, my_j))
2252 CALL fill_local_i_al(local_al_k(:, :, 1:block_size), ranges_info_array(:, :, comm_exchange%mepos), &
2253 bib_c(kspin)%array(:, :, my_k:my_k + block_size - 1))
2256 CALL timeset(routinen//
"_comm", handle2)
2257 DO proc_shift = 1, comm_exchange%num_pe - 1
2258 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2259 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2261 send_ijk_index = num_ijk(proc_send)
2263 rec_l_size = sizes_array(proc_receive)
2264 bi_c_rec(1:rec_l_size, 1:size_b_i) => buffer_1d(1:int(rec_l_size, kind=int_8)*size_b_i)
2269 IF (ijk_index <= send_ijk_index)
THEN
2271 ijk_counter_send = (ijk_index - min(1, integ_group_pos2color_sub(proc_send)))* &
2272 ngroup + integ_group_pos2color_sub(proc_send)
2273 send_i = ijk_map(1, ijk_counter_send)
2274 send_j = ijk_map(2, ijk_counter_send)
2275 send_k = ijk_map(3, ijk_counter_send)
2277 do_send_i = (ispin /= kspin) .OR. send_i < send_k .OR. send_i > send_k + block_size - 1
2278 do_send_j = (ispin /= kspin) .OR. send_j < send_k .OR. send_j > send_k + block_size - 1
2279 do_send_k = send_k /= send_last_k(1, proc_shift) .OR. send_k + block_size - 1 /= send_last_k(2, proc_shift)
2280 send_last_k(1, proc_shift) = send_k
2281 send_last_k(2, proc_shift) = send_k + block_size - 1
2288 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_i), proc_send, &
2289 bi_c_rec, proc_receive, tag)
2291 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i), proc_send, tag)
2293 ELSE IF (do_recv_i)
THEN
2294 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2297 CALL fill_local_i_al_2d(local_al_i, ranges_info_array(:, :, proc_receive), bi_c_rec)
2304 CALL comm_exchange%sendrecv(bib_c(ispin)%array(:, :, send_j), proc_send, &
2305 bi_c_rec, proc_receive, tag)
2307 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_j), proc_send, tag)
2309 ELSE IF (do_recv_j)
THEN
2310 CALL comm_exchange%recv(bi_c_rec, proc_receive, tag)
2313 CALL fill_local_i_al_2d(local_al_j, ranges_info_array(:, :, proc_receive), bi_c_rec)
2317 bi_c_rec_3d(1:rec_l_size, 1:size_b_k, 1:block_size) => &
2318 buffer_1d(1:int(rec_l_size, kind=int_8)*size_b_k*block_size)
2321 CALL comm_exchange%sendrecv(bib_c(kspin)%array(:, :, send_k:send_k + block_size - 1), proc_send, &
2322 bi_c_rec_3d, proc_receive, tag)
2324 CALL comm_exchange%send(bi_c_rec, proc_receive, tag)
2326 ELSE IF (do_recv_k)
THEN
2327 CALL comm_exchange%recv(bi_c_rec_3d, proc_receive, tag)
2330 CALL fill_local_i_al(local_al_k(:, :, 1:block_size), ranges_info_array(:, :, proc_receive), bi_c_rec_3d)
2334 IF (.NOT. do_recv_i) local_al_i(:, :) = local_al_k(:, :, my_i - my_k + 1)
2335 IF (.NOT. do_recv_j) local_al_j(:, :) = local_al_k(:, :, my_j - my_k + 1)
2336 CALL timestop(handle2)
2339 DO kkb = 1, block_size
2340 CALL timeset(routinen//
"_exp_ik", handle2)
2341 CALL dgemm_counter_start(dgemm_counter)
2342 ALLOCATE (local_ab(my_virtual, size_b_k))
2344 CALL local_gemm(
'T',
'N', size_b_i, size_b_k, dimen_ri, 1.0_dp, &
2345 local_al_i, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2346 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), 1:size_b_k), size_b_i, &
2347 mp2_env%local_gemm_ctx)
2348 DO proc_shift = 1, para_env_sub%num_pe - 1
2349 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2350 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2352 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2354 external_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, kind=int_8)*rec_b_size)
2356 CALL comm_exchange%sendrecv(local_al_i, proc_send, &
2357 external_al, proc_receive, tag)
2359 CALL local_gemm(
'T',
'N', rec_b_size, size_b_k, dimen_ri, 1.0_dp, &
2360 external_al, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2361 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:size_b_k), rec_b_size, &
2362 mp2_env%local_gemm_ctx)
2364 CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_b_k, dimen_ri)
2365 CALL timestop(handle2)
2368 CALL timeset(routinen//
"_tab", handle2)
2371 IF (.NOT. alpha_beta)
THEN
2373 b_global = b + my_b_virtual_start(1) - 1
2374 DO a = 1, my_b_size(1)
2375 a_global = a + my_b_virtual_start(1) - 1
2376 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - mp2_env%scale_T*local_ab(b_global, a))/ &
2377 (eigenval(my_i, 1) + eigenval(my_k + kkb - 1, 1) &
2378 - eigenval(homo(1) + a_global, 1) - eigenval(homo(1) + b_global, 1))
2383 b_global = b + my_b_virtual_start(kspin) - 1
2384 DO a = 1, my_b_size(ispin)
2385 a_global = a + my_b_virtual_start(ispin) - 1
2386 t_ab(a_global, b) = mp2_env%scale_S*local_ab(a_global, b)/ &
2387 (eigenval(my_i, ispin) + eigenval(my_k + kkb - 1, kspin) &
2388 - eigenval(homo(ispin) + a_global, ispin) - eigenval(homo(kspin) + b_global, kspin))
2393 IF (.NOT. alpha_beta)
THEN
2394 DO proc_shift = 1, para_env_sub%num_pe - 1
2395 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2396 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2397 CALL get_group_dist(gd_b_virtual(1), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2398 CALL get_group_dist(gd_b_virtual(1), proc_send, send_b_virtual_start, send_b_virtual_end, send_b_size)
2400 external_ab(1:size_b_i, 1:rec_b_size) => buffer_1d(1:int(size_b_i, kind=int_8)*rec_b_size)
2401 CALL para_env_sub%sendrecv(local_ab(send_b_virtual_start:send_b_virtual_end, 1:size_b_k), proc_send, &
2402 external_ab(1:size_b_i, 1:rec_b_size), proc_receive, tag)
2404 DO b = 1, my_b_size(1)
2405 b_global = b + my_b_virtual_start(1) - 1
2406 DO a = 1, rec_b_size
2407 a_global = a + rec_b_virtual_start - 1
2408 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - mp2_env%scale_T*external_ab(b, a))/ &
2409 (eigenval(my_i, 1) + eigenval(my_k + kkb - 1, 1) &
2410 - eigenval(homo(1) + a_global, 1) - eigenval(homo(1) + b_global, 1))
2415 CALL timestop(handle2)
2418 CALL timeset(routinen//
"_exp_jk", handle2)
2420 CALL dgemm_counter_start(dgemm_counter)
2421 CALL local_gemm(
'T',
'N', size_b_i, size_b_k, dimen_ri, 1.0_dp, &
2422 local_al_j, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2423 0.0_dp, local_ab(my_b_virtual_start(ispin):my_b_virtual_end(ispin), 1:size_b_k), size_b_i, &
2424 mp2_env%local_gemm_ctx)
2425 DO proc_shift = 1, para_env_sub%num_pe - 1
2426 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2427 proc_receive =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2429 CALL get_group_dist(gd_b_virtual(ispin), proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
2431 external_al(1:dimen_ri, 1:rec_b_size) => buffer_1d(1:int(dimen_ri, kind=int_8)*rec_b_size)
2433 CALL comm_exchange%sendrecv(local_al_j, proc_send, &
2434 external_al, proc_receive, tag)
2435 CALL local_gemm(
'T',
'N', rec_b_size, size_b_k, dimen_ri, 1.0_dp, &
2436 external_al, dimen_ri, local_al_k(:, :, kkb), dimen_ri, &
2437 0.0_dp, local_ab(rec_b_virtual_start:rec_b_virtual_end, 1:size_b_k), rec_b_size, &
2438 mp2_env%local_gemm_ctx)
2440 CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_b_k, dimen_ri)
2441 CALL timestop(handle2)
2443 CALL timeset(routinen//
"_Pij", handle2)
2445 b_global = b + my_b_virtual_start(kspin) - 1
2446 DO a = 1, my_b_size(ispin)
2447 a_global = a + my_b_virtual_start(ispin) - 1
2448 local_ab(a_global, b) = &
2449 local_ab(a_global, b)/(eigenval(my_j, ispin) + eigenval(my_k + kkb - 1, kspin) &
2450 - eigenval(homo(ispin) + a_global, ispin) - eigenval(homo(kspin) + b_global, kspin))
2454 p_ij_elem = sum(local_ab*t_ab)
2455 DEALLOCATE (local_ab)
2456 IF ((.NOT. open_shell) .AND. (.NOT. alpha_beta))
THEN
2457 p_ij_elem = p_ij_elem*2.0_dp
2460 mp2_env%ri_grad%P_ij(2)%array(my_i, my_j) = mp2_env%ri_grad%P_ij(2)%array(my_i, my_j) - p_ij_elem
2461 mp2_env%ri_grad%P_ij(2)%array(my_j, my_i) = mp2_env%ri_grad%P_ij(2)%array(my_j, my_i) - p_ij_elem
2463 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) = mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) - p_ij_elem
2464 mp2_env%ri_grad%P_ij(ispin)%array(my_j, my_i) = mp2_env%ri_grad%P_ij(ispin)%array(my_j, my_i) - p_ij_elem
2466 CALL timestop(handle2)
2469 CALL timeset(routinen//
"_comm", handle2)
2471 DO proc_shift = 1, comm_exchange%num_pe - 1
2472 proc_send =
modulo(comm_exchange%mepos + proc_shift, comm_exchange%num_pe)
2473 proc_receive =
modulo(comm_exchange%mepos - proc_shift, comm_exchange%num_pe)
2475 send_ijk_index = num_ijk(proc_send)
2477 IF (ijk_index <= send_ijk_index)
THEN
2479 ijk_counter_send = (ijk_index - min(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
2480 integ_group_pos2color_sub(proc_send)
2481 send_i = ijk_map(1, ijk_counter_send)
2482 send_j = ijk_map(2, ijk_counter_send)
2483 send_k = ijk_map(3, ijk_counter_send)
2484 block_size = ijk_map(4, ijk_counter_send)
2486 do_send_i = (ispin /= kspin) .OR. send_i < send_k .OR. send_i > send_k + block_size - 1
2487 do_send_j = (ispin /= kspin) .OR. send_j < send_k .OR. send_j > send_k + block_size - 1
2490 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_i), proc_send, tag)
2494 CALL comm_exchange%send(bib_c(ispin)%array(:, :, send_j), proc_send, tag)
2497 CALL comm_exchange%send(bib_c(kspin)%array(:, :, send_k:send_k + block_size - 1), proc_send, tag)
2501 CALL timestop(handle2)
2504 DEALLOCATE (local_al_i)
2505 DEALLOCATE (local_al_j)
2506 DEALLOCATE (local_al_k)
2508 DEALLOCATE (ijk_map)
2510 CALL timestop(handle)
2512 END SUBROUTINE quasi_degenerate_p_ij
2536 SUBROUTINE find_quasi_degenerate_ij(my_ijk, homo, homo_beta, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, &
2537 do_print_alpha, comm_exchange, num_ijk, max_ijk, color_sub, &
2538 buffer_size, my_group_L_size, B_size_k, para_env, virtual, B_size_i)
2540 INTEGER,
INTENT(OUT) :: my_ijk
2541 INTEGER,
INTENT(IN) :: homo, homo_beta
2542 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: eigenval
2543 TYPE(mp2_type),
INTENT(IN) :: mp2_env
2544 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(OUT) :: ijk_map
2545 INTEGER,
INTENT(IN) :: unit_nr, ngroup
2546 LOGICAL,
INTENT(IN) :: do_print_alpha
2547 TYPE(mp_comm_type),
INTENT(IN) :: comm_exchange
2548 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT) :: num_ijk
2549 INTEGER,
INTENT(OUT) :: max_ijk
2550 INTEGER,
INTENT(IN) :: color_sub, buffer_size, my_group_l_size, &
2552 TYPE(mp_para_env_type),
INTENT(IN) :: para_env
2553 INTEGER,
INTENT(IN) :: virtual, b_size_i
2555 INTEGER :: block_size, communication_steps, communication_volume, iib, ij_counter, &
2556 ijk_counter, jjb, kkb, max_block_size, max_num_k_blocks, min_communication_volume, &
2557 my_steps, num_k_blocks, num_sing_ij, total_ijk
2558 INTEGER(KIND=int_8) :: mem
2559 LOGICAL,
ALLOCATABLE,
DIMENSION(:, :) :: ijk_marker
2561 ALLOCATE (num_ijk(0:comm_exchange%num_pe - 1))
2566 DO jjb = iib + 1, homo
2567 IF (abs(eigenval(jjb) - eigenval(iib)) < mp2_env%ri_grad%eps_canonical) &
2568 num_sing_ij = num_sing_ij + 1
2572 IF (unit_nr > 0)
THEN
2573 IF (do_print_alpha)
THEN
2574 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
2575 "MO_INFO| Number of ij pairs below EPS_CANONICAL:", num_sing_ij
2577 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
2578 "MO_INFO| Number of ij pairs (spin beta) below EPS_CANONICAL:", num_sing_ij
2583 max_block_size = buffer_size/(my_group_l_size*b_size_k)
2590 mem = mem - 2_int_8*(virtual*b_size_k + b_size_i*my_group_l_size)
2591 max_block_size = min(max_block_size, max(1, int(mem/(my_group_l_size*b_size_k), kind(max_block_size))))
2594 CALL para_env%min(max_block_size)
2598 min_communication_volume = 3*homo_beta*num_sing_ij
2599 communication_steps = 3*homo_beta*num_sing_ij
2600 DO iib = max_block_size, 2, -1
2601 max_num_k_blocks = homo_beta/iib*num_sing_ij
2602 num_k_blocks = max_num_k_blocks - mod(max_num_k_blocks, ngroup)
2603 communication_volume = num_k_blocks*(2 + iib) + 3*(homo_beta*num_sing_ij - iib*num_k_blocks)
2604 my_steps = num_k_blocks + homo_beta*num_sing_ij - iib*num_k_blocks
2605 IF (communication_volume < min_communication_volume)
THEN
2607 min_communication_volume = communication_volume
2608 communication_steps = my_steps
2609 ELSE IF (communication_volume == min_communication_volume .AND. my_steps < communication_steps)
THEN
2611 communication_steps = my_steps
2615 IF (unit_nr > 0)
THEN
2616 WRITE (unit=unit_nr, fmt=
"(T3,A,T75,i6)") &
2617 "MO_INFO| Block size:", block_size
2618 CALL m_flush(unit_nr)
2622 max_num_k_blocks = homo_beta/block_size*num_sing_ij
2623 num_k_blocks = max_num_k_blocks - mod(max_num_k_blocks, ngroup)
2625 total_ijk = num_k_blocks + homo_beta*num_sing_ij - num_k_blocks*block_size
2626 ALLOCATE (ijk_map(4, total_ijk))
2628 ALLOCATE (ijk_marker(homo_beta, num_sing_ij))
2636 DO jjb = iib + 1, homo
2637 IF (abs(eigenval(jjb) - eigenval(iib)) >= mp2_env%ri_grad%eps_canonical) cycle
2638 ij_counter = ij_counter + 1
2639 DO kkb = 1, homo_beta - mod(homo_beta, block_size), block_size
2640 IF (ijk_counter + 1 > num_k_blocks)
EXIT
2641 ijk_counter = ijk_counter + 1
2642 ijk_marker(kkb:kkb + block_size - 1, ij_counter) = .false.
2643 ijk_map(1, ijk_counter) = iib
2644 ijk_map(2, ijk_counter) = jjb
2645 ijk_map(3, ijk_counter) = kkb
2646 ijk_map(4, ijk_counter) = block_size
2647 IF (mod(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1
2654 DO jjb = iib + 1, homo
2655 IF (abs(eigenval(jjb) - eigenval(iib)) >= mp2_env%ri_grad%eps_canonical) cycle
2656 ij_counter = ij_counter + 1
2657 DO kkb = 1, homo_beta
2658 IF (ijk_marker(kkb, ij_counter))
THEN
2659 ijk_counter = ijk_counter + 1
2660 ijk_map(1, ijk_counter) = iib
2661 ijk_map(2, ijk_counter) = jjb
2662 ijk_map(3, ijk_counter) = kkb
2663 ijk_map(4, ijk_counter) = 1
2664 IF (mod(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1
2670 DEALLOCATE (ijk_marker)
2672 CALL comm_exchange%allgather(my_ijk, num_ijk)
2673 max_ijk = maxval(num_ijk)
2675 END SUBROUTINE find_quasi_degenerate_ij
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various routines to log and control the output. The idea is that decisions about where to log should ...
Counters to determine the performance of parallel DGEMMs.
elemental subroutine, public dgemm_counter_init(dgemm_counter, unit_nr, print_info)
Initialize a dgemm_counter.
subroutine, public dgemm_counter_write(dgemm_counter, para_env)
calculate and print flop rates
subroutine, public dgemm_counter_start(dgemm_counter)
start timer of the counter
subroutine, public dgemm_counter_stop(dgemm_counter, size1, size2, size3)
stop timer of the counter and provide matrix sizes
Types to describe group distributions.
elemental integer function, public maxsize(this)
...
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
subroutine, public local_gemm(opa, opb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, ctx)
...
integer, parameter, public local_gemm_pu_gpu
subroutine, public local_gemm_set_op_threshold_gpu(ctx, opthresholdgpu)
...
subroutine, public local_gemm_destroy(ctx)
release resources associated to a gemm context
subroutine, public local_gemm_create(ctx, pu)
create a context for handling gemm offloading
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Interface to the message passing library MPI.
Routines to calculate RI-GPW-MP2 energy using pw.
subroutine, public mp2_ri_gpw_compute_en(emp2_cou, emp2_ex, emp2_s, emp2_t, bib_c, mp2_env, para_env, para_env_sub, color_sub, gd_array, gd_b_virtual, eigenval, nmo, homo, dimen_ri, unit_nr, calc_forces, calc_ex)
...
Routines for calculating RI-MP2 gradients.
subroutine, public complete_gamma(mp2_env, b_ia_q, dimen_ri, homo, virtual, para_env, para_env_sub, ngroup, my_group_l_size, my_group_l_start, my_group_l_end, my_b_size, my_b_virtual_start, gd_array, gd_b_virtual, kspin)
complete the calculation of the Gamma matrices
Types needed for MP2 calculations.
stores all the informations relevant to an mpi environment