97 SUBROUTINE mp2_gpw_compute(Emp2, Emp2_Cou, Emp2_EX, qs_env, para_env, para_env_sub, color_sub, &
98 cell, particle_set, atomic_kind_set, qs_kind_set, Eigenval, nmo, homo, &
99 mat_munu, sab_orb_sub, mo_coeff_o, mo_coeff_v, eps_filter, unit_nr, &
100 mp2_memory, calc_ex, blacs_env_sub, Emp2_AB)
102 REAL(kind=
dp),
INTENT(OUT) :: emp2, emp2_cou, emp2_ex
105 INTEGER,
INTENT(IN) :: color_sub
109 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
110 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
111 INTEGER,
INTENT(IN) :: nmo
112 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
115 POINTER :: sab_orb_sub
116 TYPE(
dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: mo_coeff_o, mo_coeff_v
117 REAL(kind=
dp),
INTENT(IN) :: eps_filter
118 INTEGER,
INTENT(IN) :: unit_nr
119 REAL(kind=
dp),
INTENT(IN) :: mp2_memory
120 LOGICAL,
INTENT(IN) :: calc_ex
122 REAL(kind=
dp),
INTENT(OUT),
OPTIONAL :: emp2_ab
124 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_gpw_compute'
126 INTEGER :: a, a_group_counter, b, b_global, b_group_counter, col, col_offset, col_size, &
127 color_counter, ex_end, ex_end_send, ex_start, ex_start_send, group_counter, handle, &
128 handle2, handle3, i, i_counter, i_group_counter, index_proc_shift, ispin, j, max_b_size, &
129 max_batch_size_a, max_batch_size_i, max_row_col_local, my_a_batch_size, my_a_virtual_end, &
130 my_a_virtual_start, my_b_size, my_b_virtual_end, my_b_virtual_start, my_i_batch_size, &
131 my_i_occupied_end, my_i_occupied_start, my_q_position, ncol_local, nfullcols_total, &
132 nfullrows_total, ngroup, nrow_local, nspins, p, p_best, proc_receive
133 INTEGER :: proc_send, q, q_best, row, row_offset, row_size, size_ex, size_ex_send, &
134 sub_sub_color, wfn_calc, wfn_calc_best
135 INTEGER(KIND=int_8) :: mem
136 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: vector_b_sizes, &
137 vector_batch_a_size_group, &
138 vector_batch_i_size_group
139 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: color_array, local_col_row_info
140 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
141 INTEGER,
DIMENSION(SIZE(homo)) :: virtual
142 LOGICAL :: do_alpha_beta
143 REAL(kind=
dp) :: cutoff_old, mem_min, mem_real, mem_try, &
144 relative_cutoff_old, wfn_size
145 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: e_cutoff_old
146 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: my_cocc, my_cvirt
147 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: bib_c, bib_ex, bib_send
148 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: data_block
152 TYPE(
dbcsr_type),
DIMENSION(SIZE(homo)) :: matrix_ia_jb, matrix_ia_jnu
164 CALL timeset(routinen, handle)
166 do_alpha_beta = .false.
167 IF (
PRESENT(emp2_ab)) do_alpha_beta = .true.
174 CALL dbcsr_create(matrix_ia_jnu(ispin), template=mo_coeff_o(ispin)%matrix)
178 sym=dbcsr_type_no_symmetry)
181 CALL dbcsr_set(matrix_ia_jnu(ispin), 0.0_dp)
182 CALL dbcsr_set(matrix_ia_jb(ispin), 0.0_dp)
189 CALL dbcsr_get_info(matrix_ia_jb(1), nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
191 ncol_global=nfullcols_total, para_env=para_env_sub)
192 CALL cp_fm_create(fm_bib_jb, fm_struct, name=
"fm_BIb_jb")
198 nrow_local=nrow_local, &
199 ncol_local=ncol_local, &
200 row_indices=row_indices, &
201 col_indices=col_indices)
203 max_row_col_local = max(nrow_local, ncol_local)
204 CALL para_env_sub%max(max_row_col_local)
206 ALLOCATE (local_col_row_info(0:max_row_col_local, 2))
207 local_col_row_info = 0
209 local_col_row_info(0, 1) = nrow_local
210 local_col_row_info(1:nrow_local, 1) = row_indices(1:nrow_local)
212 local_col_row_info(0, 2) = ncol_local
213 local_col_row_info(1:ncol_local, 2) = col_indices(1:ncol_local)
217 CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
218 auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_a, sab_orb_sub)
220 wfn_size = real(
SIZE(rho_r%array), kind=
dp)
221 CALL para_env%max(wfn_size)
223 ngroup = para_env%num_pe/para_env_sub%num_pe
231 IF (p*q .NE. ngroup) cycle
233 CALL estimate_memory_usage(wfn_size, p, q, para_env_sub%num_pe, nmo, virtual(1), homo(1), calc_ex, mem_try)
235 IF (mem_try <= mem_min)
THEN
241 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T69,F9.2,A3)')
'Minimum required memory per MPI process for MP2:', &
245 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
246 CALL para_env%min(mem_real)
248 mem_real = mp2_memory - mem_real
249 mem_real = max(mem_real, mem_min)
250 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T69,F9.2,A3)')
'Available memory per MPI process for MP2:', &
253 wfn_calc_best = huge(wfn_calc_best)
256 IF (p*q .NE. ngroup) cycle
258 CALL estimate_memory_usage(wfn_size, p, q, para_env_sub%num_pe, nmo, virtual(1), homo(1), calc_ex, mem_try)
260 IF (mem_try > mem_real) cycle
261 wfn_calc = ((homo(1) + p - 1)/p) + ((virtual(1) + q - 1)/q)
262 IF (wfn_calc < wfn_calc_best)
THEN
263 wfn_calc_best = wfn_calc
269 max_batch_size_i = (homo(1) + p_best - 1)/p_best
270 max_batch_size_a = (virtual(1) + q_best - 1)/q_best
272 IF (unit_nr > 0)
THEN
273 WRITE (unit=unit_nr, fmt=
"(T3,A,T77,i4)") &
274 "MP2_GPW| max. batch size for the occupied states:", max_batch_size_i
275 WRITE (unit=unit_nr, fmt=
"(T3,A,T77,i4)") &
276 "MP2_GPW| max. batch size for the virtual states:", max_batch_size_a
279 CALL get_vector_batch(vector_batch_i_size_group, p_best, max_batch_size_i, homo(1))
280 CALL get_vector_batch(vector_batch_a_size_group, q_best, max_batch_size_a, virtual(1))
285 my_a_virtual_start = 1
287 my_i_occupied_start = 1
290 group_counter = group_counter + 1
291 IF (color_sub == group_counter - 1)
EXIT
292 my_i_occupied_start = my_i_occupied_start + vector_batch_i_size_group(i)
293 i_group_counter = i_group_counter + 1
296 IF (color_sub == group_counter - 1)
EXIT
297 my_a_virtual_start = my_a_virtual_start + vector_batch_a_size_group(j)
298 a_group_counter = a_group_counter + 1
302 my_i_occupied_end = my_i_occupied_start + vector_batch_i_size_group(i_group_counter) - 1
303 my_i_batch_size = vector_batch_i_size_group(i_group_counter)
304 my_a_virtual_end = my_a_virtual_start + vector_batch_a_size_group(a_group_counter) - 1
305 my_a_batch_size = vector_batch_a_size_group(a_group_counter)
307 DEALLOCATE (vector_batch_i_size_group)
308 DEALLOCATE (vector_batch_a_size_group)
313 CALL grep_occ_virt_wavefunc(para_env_sub, nmo, &
314 my_i_occupied_start, my_i_occupied_end, my_i_batch_size, &
315 my_a_virtual_start, my_a_virtual_end, my_a_batch_size, &
316 mo_coeff_o(1)%matrix, mo_coeff_v(1)%matrix, my_cocc, my_cvirt)
320 max_b_size = (virtual(1) + para_env_sub%num_pe - 1)/para_env_sub%num_pe
321 CALL get_vector_batch(vector_b_sizes, para_env_sub%num_pe, max_b_size, virtual(1))
325 my_b_virtual_start = 1
326 DO j = 0, para_env_sub%num_pe - 1
327 b_group_counter = b_group_counter + 1
328 IF (b_group_counter - 1 == para_env_sub%mepos)
EXIT
329 my_b_virtual_start = my_b_virtual_start + vector_b_sizes(j)
331 my_b_virtual_end = my_b_virtual_start + vector_b_sizes(para_env_sub%mepos) - 1
332 my_b_size = vector_b_sizes(para_env_sub%mepos)
334 DEALLOCATE (vector_b_sizes)
339 ALLOCATE (color_array(0:para_env_sub%num_pe - 1, 0:q_best - 1))
343 DO i = 0, para_env_sub%num_pe - 1
344 color_counter = color_counter + 1
345 color_array(i, j) = color_counter
348 sub_sub_color = color_array(para_env_sub%mepos, my_q_position)
350 DEALLOCATE (color_array)
354 CALL comm_exchange%from_split(para_env, sub_sub_color)
357 CALL create_group_dist(gd_exchange, my_i_occupied_start, my_i_occupied_end, my_i_batch_size, comm_exchange)
359 ALLOCATE (psi_i(my_i_occupied_start:my_i_occupied_end))
360 DO i = my_i_occupied_start, my_i_occupied_end
361 CALL auxbas_pw_pool%create_pw(psi_i(i))
363 psi_i(i), rho_g, atomic_kind_set, qs_kind_set, cell, particle_set, &
364 pw_env_sub, dft_control%qs_control%eps_rho_rspace)
370 IF (do_alpha_beta) emp2_ab = 0.0_dp
372 ALLOCATE (bib_c(my_b_size, homo(1), my_i_batch_size))
375 CALL timeset(routinen//
"_loop", handle2)
376 DO a = homo(1) + my_a_virtual_start, homo(1) + my_a_virtual_end
378 IF (calc_ex) bib_c = 0.0_dp
382 psi_a, rho_g, atomic_kind_set, qs_kind_set, cell, particle_set, &
383 pw_env_sub, dft_control%qs_control%eps_rho_rspace)
385 DO i = my_i_occupied_start, my_i_occupied_end
386 i_counter = i_counter + 1
392 CALL calc_potential_gpw(rho_r, rho_g, poisson_env, pot_g, qs_env%mp2_env%potential_parameter)
395 CALL timeset(routinen//
"_int", handle3)
397 CALL integrate_v_rspace(rho_r, hmat=mat_munu, qs_env=qs_env, &
398 calculate_forces=.false., compute_tau=.false., gapw=.false., &
399 pw_env_external=pw_env_sub, task_list_external=task_list_sub)
400 CALL timestop(handle3)
403 CALL timeset(routinen//
"_mult_o", handle3)
405 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, mat_munu%matrix, mo_coeff_o(ispin)%matrix, &
406 0.0_dp, matrix_ia_jnu(ispin), filter_eps=eps_filter)
408 CALL timestop(handle3)
409 CALL timeset(routinen//
"_mult_v", handle3)
411 CALL dbcsr_multiply(
"T",
"N", 1.0_dp, matrix_ia_jnu(ispin), mo_coeff_v(ispin)%matrix, &
412 0.0_dp, matrix_ia_jb(ispin), filter_eps=eps_filter)
414 CALL timestop(handle3)
416 CALL timeset(routinen//
"_E_Cou", handle3)
420 row_size=row_size, col_size=col_size, &
421 row_offset=row_offset, col_offset=col_offset)
425 emp2_cou = emp2_cou - 2.0_dp*data_block(j, b)**2/ &
426 (eigenval(a, 1) + eigenval(homo(1) + col_offset + b - 1, 1) - eigenval(i, 1) - eigenval(row_offset + j - 1, 1))
431 IF (do_alpha_beta)
THEN
436 row_size=row_size, col_size=col_size, &
437 row_offset=row_offset, col_offset=col_offset)
441 emp2_ab = emp2_ab - data_block(j, b)**2/ &
442 (eigenval(a, 1) + eigenval(homo(2) + col_offset + b - 1, 2) - eigenval(i, 1) - eigenval(row_offset + j - 1, 2))
448 CALL timestop(handle3)
453 CALL timeset(routinen//
"_E_Ex_1", handle3)
455 CALL grep_my_integrals(para_env_sub, fm_bib_jb, bib_c(1:my_b_size, 1:homo(1), i_counter), max_row_col_local, &
456 local_col_row_info, my_b_virtual_end, my_b_virtual_start)
457 CALL timestop(handle3)
464 associate(mepos_in_ex_group => comm_exchange%mepos, size_of_exchange_group => comm_exchange%num_pe)
465 CALL timeset(routinen//
"_E_Ex_2", handle3)
467 DO i = 1, my_i_batch_size
468 DO j = my_i_occupied_start, my_i_occupied_end
470 b_global = b - 1 + my_b_virtual_start
471 emp2_ex = emp2_ex + bib_c(b, j, i)*bib_c(b, i + my_i_occupied_start - 1, j - my_i_occupied_start + 1) &
472 /(eigenval(a, 1) + eigenval(homo(1) + b_global, 1) - eigenval(i + my_i_occupied_start - 1, 1) - eigenval(j, 1))
479 DO index_proc_shift = 1, size_of_exchange_group - 1
480 proc_send =
modulo(mepos_in_ex_group + index_proc_shift, size_of_exchange_group)
481 proc_receive =
modulo(mepos_in_ex_group - index_proc_shift, size_of_exchange_group)
483 CALL get_group_dist(gd_exchange, proc_receive, ex_start, ex_end, size_ex)
485 ALLOCATE (bib_ex(my_b_size, my_i_batch_size, size_ex))
488 CALL get_group_dist(gd_exchange, proc_send, ex_start_send, ex_end_send, size_ex_send)
490 ALLOCATE (bib_send(my_b_size, size_ex_send, my_i_batch_size))
491 bib_send(1:my_b_size, 1:size_ex_send, 1:my_i_batch_size) = &
492 bib_c(1:my_b_size, ex_start_send:ex_end_send, 1:my_i_batch_size)
495 CALL comm_exchange%sendrecv(bib_send, proc_send, bib_ex, proc_receive)
497 DO i = 1, my_i_batch_size
500 b_global = b - 1 + my_b_virtual_start
501 emp2_ex = emp2_ex + bib_c(b, j + ex_start - 1, i)*bib_ex(b, i, j) &
502 /(eigenval(a, 1) + eigenval(homo(1) + b_global, 1) - eigenval(i + my_i_occupied_start - 1, 1) &
503 - eigenval(j + ex_start - 1, 1))
509 DEALLOCATE (bib_send)
512 CALL timestop(handle3)
517 CALL timestop(handle2)
519 CALL para_env%sum(emp2_cou)
520 CALL para_env%sum(emp2_ex)
521 emp2 = emp2_cou + emp2_ex
522 IF (do_alpha_beta)
CALL para_env%sum(emp2_ab)
525 DEALLOCATE (my_cvirt)
529 DEALLOCATE (local_col_row_info)
534 CALL comm_exchange%free()
541 DO i = my_i_occupied_start, my_i_occupied_end
542 CALL psi_i(i)%release()
546 CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
547 task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_a)
549 CALL timestop(handle)