98 SUBROUTINE mp2_gpw_compute(Emp2, Emp2_Cou, Emp2_EX, qs_env, para_env, para_env_sub, color_sub, &
99 cell, particle_set, atomic_kind_set, qs_kind_set, mo_coeff, Eigenval, nmo, homo, &
100 mat_munu, sab_orb_sub, mo_coeff_o, mo_coeff_v, eps_filter, unit_nr, &
101 mp2_memory, calc_ex, blacs_env_sub, Emp2_AB)
103 REAL(kind=
dp),
INTENT(OUT) :: emp2, emp2_cou, emp2_ex
106 INTEGER,
INTENT(IN) :: color_sub
110 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
112 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
113 INTEGER,
INTENT(IN) :: nmo
114 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo
115 TYPE(dbcsr_p_type),
INTENT(INOUT) :: mat_munu
117 POINTER :: sab_orb_sub
118 TYPE(dbcsr_p_type),
DIMENSION(:),
INTENT(IN) :: mo_coeff_o, mo_coeff_v
119 REAL(kind=
dp),
INTENT(IN) :: eps_filter
120 INTEGER,
INTENT(IN) :: unit_nr
121 REAL(kind=
dp),
INTENT(IN) :: mp2_memory
122 LOGICAL,
INTENT(IN) :: calc_ex
124 REAL(kind=
dp),
INTENT(OUT),
OPTIONAL :: emp2_ab
126 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mp2_gpw_compute'
128 INTEGER :: a, a_group_counter, b, b_global, b_group_counter, blk, col, col_offset, col_size, &
129 color_counter, ex_end, ex_end_send, ex_start, ex_start_send, group_counter, handle, &
130 handle2, handle3, i, i_counter, i_group_counter, index_proc_shift, ispin, j, max_b_size, &
131 max_batch_size_a, max_batch_size_i, max_row_col_local, my_a_batch_size, my_a_virtual_end, &
132 my_a_virtual_start, my_b_size, my_b_virtual_end, my_b_virtual_start, my_i_batch_size, &
133 my_i_occupied_end, my_i_occupied_start, my_q_position, ncol_local, nfullcols_total, &
134 nfullrows_total, ngroup, nrow_local, nspins, p, p_best, proc_receive
135 INTEGER :: proc_send, q, q_best, row, row_offset, row_size, size_ex, size_ex_send, &
136 sub_sub_color, wfn_calc, wfn_calc_best
137 INTEGER(KIND=int_8) :: mem
138 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: vector_b_sizes, &
139 vector_batch_a_size_group, &
140 vector_batch_i_size_group
141 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: color_array, local_col_row_info
142 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
143 INTEGER,
DIMENSION(SIZE(homo)) :: virtual
144 LOGICAL :: do_alpha_beta
145 REAL(kind=
dp) :: cutoff_old, mem_min, mem_real, mem_try, &
146 relative_cutoff_old, wfn_size
147 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: e_cutoff_old
148 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: my_cocc, my_cvirt
149 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: bib_c, bib_ex, bib_send
150 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: data_block
153 TYPE(dbcsr_iterator_type) :: iter
154 TYPE(dbcsr_type),
DIMENSION(SIZE(homo)) :: matrix_ia_jb, matrix_ia_jnu
166 CALL timeset(routinen, handle)
168 do_alpha_beta = .false.
169 IF (
PRESENT(emp2_ab)) do_alpha_beta = .true.
176 CALL dbcsr_create(matrix_ia_jnu(ispin), template=mo_coeff_o(ispin)%matrix)
180 sym=dbcsr_type_no_symmetry)
183 CALL dbcsr_set(matrix_ia_jnu(ispin), 0.0_dp)
184 CALL dbcsr_set(matrix_ia_jb(ispin), 0.0_dp)
186 CALL dbcsr_set(mat_munu%matrix, 0.0_dp)
191 CALL dbcsr_get_info(matrix_ia_jb(1), nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
193 ncol_global=nfullcols_total, para_env=para_env_sub)
194 CALL cp_fm_create(fm_bib_jb, fm_struct, name=
"fm_BIb_jb")
200 nrow_local=nrow_local, &
201 ncol_local=ncol_local, &
202 row_indices=row_indices, &
203 col_indices=col_indices)
205 max_row_col_local = max(nrow_local, ncol_local)
206 CALL para_env_sub%max(max_row_col_local)
208 ALLOCATE (local_col_row_info(0:max_row_col_local, 2))
209 local_col_row_info = 0
211 local_col_row_info(0, 1) = nrow_local
212 local_col_row_info(1:nrow_local, 1) = row_indices(1:nrow_local)
214 local_col_row_info(0, 2) = ncol_local
215 local_col_row_info(1:ncol_local, 2) = col_indices(1:ncol_local)
219 CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
220 auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_a, sab_orb_sub)
222 wfn_size = real(
SIZE(rho_r%array), kind=
dp)
223 CALL para_env%max(wfn_size)
225 ngroup = para_env%num_pe/para_env_sub%num_pe
233 IF (p*q .NE. ngroup) cycle
235 CALL estimate_memory_usage(wfn_size, p, q, para_env_sub%num_pe, nmo, virtual(1), homo(1), calc_ex, mem_try)
237 IF (mem_try <= mem_min)
THEN
243 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T69,F9.2,A3)')
'Minimum required memory per MPI process for MP2:', &
247 mem_real = (mem + 1024*1024 - 1)/(1024*1024)
248 CALL para_env%min(mem_real)
250 mem_real = mp2_memory - mem_real
251 mem_real = max(mem_real, mem_min)
252 IF (unit_nr > 0)
WRITE (unit_nr,
'(T3,A,T69,F9.2,A3)')
'Available memory per MPI process for MP2:', &
255 wfn_calc_best = huge(wfn_calc_best)
258 IF (p*q .NE. ngroup) cycle
260 CALL estimate_memory_usage(wfn_size, p, q, para_env_sub%num_pe, nmo, virtual(1), homo(1), calc_ex, mem_try)
262 IF (mem_try > mem_real) cycle
263 wfn_calc = ((homo(1) + p - 1)/p) + ((virtual(1) + q - 1)/q)
264 IF (wfn_calc < wfn_calc_best)
THEN
265 wfn_calc_best = wfn_calc
271 max_batch_size_i = (homo(1) + p_best - 1)/p_best
272 max_batch_size_a = (virtual(1) + q_best - 1)/q_best
274 IF (unit_nr > 0)
THEN
275 WRITE (unit=unit_nr, fmt=
"(T3,A,T77,i4)") &
276 "MP2_GPW| max. batch size for the occupied states:", max_batch_size_i
277 WRITE (unit=unit_nr, fmt=
"(T3,A,T77,i4)") &
278 "MP2_GPW| max. batch size for the virtual states:", max_batch_size_a
281 CALL get_vector_batch(vector_batch_i_size_group, p_best, max_batch_size_i, homo(1))
282 CALL get_vector_batch(vector_batch_a_size_group, q_best, max_batch_size_a, virtual(1))
287 my_a_virtual_start = 1
289 my_i_occupied_start = 1
292 group_counter = group_counter + 1
293 IF (color_sub == group_counter - 1)
EXIT
294 my_i_occupied_start = my_i_occupied_start + vector_batch_i_size_group(i)
295 i_group_counter = i_group_counter + 1
298 IF (color_sub == group_counter - 1)
EXIT
299 my_a_virtual_start = my_a_virtual_start + vector_batch_a_size_group(j)
300 a_group_counter = a_group_counter + 1
304 my_i_occupied_end = my_i_occupied_start + vector_batch_i_size_group(i_group_counter) - 1
305 my_i_batch_size = vector_batch_i_size_group(i_group_counter)
306 my_a_virtual_end = my_a_virtual_start + vector_batch_a_size_group(a_group_counter) - 1
307 my_a_batch_size = vector_batch_a_size_group(a_group_counter)
309 DEALLOCATE (vector_batch_i_size_group)
310 DEALLOCATE (vector_batch_a_size_group)
315 CALL grep_occ_virt_wavefunc(para_env_sub, nmo, &
316 my_i_occupied_start, my_i_occupied_end, my_i_batch_size, &
317 my_a_virtual_start, my_a_virtual_end, my_a_batch_size, &
318 mo_coeff_o(1)%matrix, mo_coeff_v(1)%matrix, my_cocc, my_cvirt)
322 max_b_size = (virtual(1) + para_env_sub%num_pe - 1)/para_env_sub%num_pe
323 CALL get_vector_batch(vector_b_sizes, para_env_sub%num_pe, max_b_size, virtual(1))
327 my_b_virtual_start = 1
328 DO j = 0, para_env_sub%num_pe - 1
329 b_group_counter = b_group_counter + 1
330 IF (b_group_counter - 1 == para_env_sub%mepos)
EXIT
331 my_b_virtual_start = my_b_virtual_start + vector_b_sizes(j)
333 my_b_virtual_end = my_b_virtual_start + vector_b_sizes(para_env_sub%mepos) - 1
334 my_b_size = vector_b_sizes(para_env_sub%mepos)
336 DEALLOCATE (vector_b_sizes)
341 ALLOCATE (color_array(0:para_env_sub%num_pe - 1, 0:q_best - 1))
345 DO i = 0, para_env_sub%num_pe - 1
346 color_counter = color_counter + 1
347 color_array(i, j) = color_counter
350 sub_sub_color = color_array(para_env_sub%mepos, my_q_position)
352 DEALLOCATE (color_array)
356 CALL comm_exchange%from_split(para_env, sub_sub_color)
359 CALL create_group_dist(gd_exchange, my_i_occupied_start, my_i_occupied_end, my_i_batch_size, comm_exchange)
361 ALLOCATE (psi_i(my_i_occupied_start:my_i_occupied_end))
362 DO i = my_i_occupied_start, my_i_occupied_end
363 CALL auxbas_pw_pool%create_pw(psi_i(i))
365 qs_kind_set, cell, dft_control, particle_set, &
366 pw_env_sub, external_vector=my_cocc(:, i - my_i_occupied_start + 1))
372 IF (do_alpha_beta) emp2_ab = 0.0_dp
374 ALLOCATE (bib_c(my_b_size, homo(1), my_i_batch_size))
377 CALL timeset(routinen//
"_loop", handle2)
378 DO a = homo(1) + my_a_virtual_start, homo(1) + my_a_virtual_end
380 IF (calc_ex) bib_c = 0.0_dp
384 qs_kind_set, cell, dft_control, particle_set, &
385 pw_env_sub, external_vector=my_cvirt(:, a - (homo(1) + my_a_virtual_start) + 1))
387 DO i = my_i_occupied_start, my_i_occupied_end
388 i_counter = i_counter + 1
394 CALL calc_potential_gpw(rho_r, rho_g, poisson_env, pot_g, qs_env%mp2_env%potential_parameter)
397 CALL timeset(routinen//
"_int", handle3)
398 CALL dbcsr_set(mat_munu%matrix, 0.0_dp)
399 CALL integrate_v_rspace(rho_r, hmat=mat_munu, qs_env=qs_env, &
400 calculate_forces=.false., compute_tau=.false., gapw=.false., &
401 pw_env_external=pw_env_sub, task_list_external=task_list_sub)
402 CALL timestop(handle3)
405 CALL timeset(routinen//
"_mult_o", handle3)
407 CALL dbcsr_multiply(
"N",
"N", 1.0_dp, mat_munu%matrix, mo_coeff_o(ispin)%matrix, &
408 0.0_dp, matrix_ia_jnu(ispin), filter_eps=eps_filter)
410 CALL timestop(handle3)
411 CALL timeset(routinen//
"_mult_v", handle3)
413 CALL dbcsr_multiply(
"T",
"N", 1.0_dp, matrix_ia_jnu(ispin), mo_coeff_v(ispin)%matrix, &
414 0.0_dp, matrix_ia_jb(ispin), filter_eps=eps_filter)
416 CALL timestop(handle3)
418 CALL timeset(routinen//
"_E_Cou", handle3)
419 CALL dbcsr_iterator_start(iter, matrix_ia_jb(1))
420 DO WHILE (dbcsr_iterator_blocks_left(iter))
421 CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
422 row_size=row_size, col_size=col_size, &
423 row_offset=row_offset, col_offset=col_offset)
427 emp2_cou = emp2_cou - 2.0_dp*data_block(j, b)**2/ &
428 (eigenval(a, 1) + eigenval(homo(1) + col_offset + b - 1, 1) - eigenval(i, 1) - eigenval(row_offset + j - 1, 1))
432 CALL dbcsr_iterator_stop(iter)
433 IF (do_alpha_beta)
THEN
435 CALL dbcsr_iterator_start(iter, matrix_ia_jb(2))
436 DO WHILE (dbcsr_iterator_blocks_left(iter))
437 CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
438 row_size=row_size, col_size=col_size, &
439 row_offset=row_offset, col_offset=col_offset)
443 emp2_ab = emp2_ab - data_block(j, b)**2/ &
444 (eigenval(a, 1) + eigenval(homo(2) + col_offset + b - 1, 2) - eigenval(i, 1) - eigenval(row_offset + j - 1, 2))
448 CALL dbcsr_iterator_stop(iter)
450 CALL timestop(handle3)
455 CALL timeset(routinen//
"_E_Ex_1", handle3)
457 CALL grep_my_integrals(para_env_sub, fm_bib_jb, bib_c(1:my_b_size, 1:homo(1), i_counter), max_row_col_local, &
458 local_col_row_info, my_b_virtual_end, my_b_virtual_start)
459 CALL timestop(handle3)
466 associate(mepos_in_ex_group => comm_exchange%mepos, size_of_exchange_group => comm_exchange%num_pe)
467 CALL timeset(routinen//
"_E_Ex_2", handle3)
469 DO i = 1, my_i_batch_size
470 DO j = my_i_occupied_start, my_i_occupied_end
472 b_global = b - 1 + my_b_virtual_start
473 emp2_ex = emp2_ex + bib_c(b, j, i)*bib_c(b, i + my_i_occupied_start - 1, j - my_i_occupied_start + 1) &
474 /(eigenval(a, 1) + eigenval(homo(1) + b_global, 1) - eigenval(i + my_i_occupied_start - 1, 1) - eigenval(j, 1))
481 DO index_proc_shift = 1, size_of_exchange_group - 1
482 proc_send =
modulo(mepos_in_ex_group + index_proc_shift, size_of_exchange_group)
483 proc_receive =
modulo(mepos_in_ex_group - index_proc_shift, size_of_exchange_group)
485 CALL get_group_dist(gd_exchange, proc_receive, ex_start, ex_end, size_ex)
487 ALLOCATE (bib_ex(my_b_size, my_i_batch_size, size_ex))
490 CALL get_group_dist(gd_exchange, proc_send, ex_start_send, ex_end_send, size_ex_send)
492 ALLOCATE (bib_send(my_b_size, size_ex_send, my_i_batch_size))
493 bib_send(1:my_b_size, 1:size_ex_send, 1:my_i_batch_size) = &
494 bib_c(1:my_b_size, ex_start_send:ex_end_send, 1:my_i_batch_size)
497 CALL comm_exchange%sendrecv(bib_send, proc_send, bib_ex, proc_receive)
499 DO i = 1, my_i_batch_size
502 b_global = b - 1 + my_b_virtual_start
503 emp2_ex = emp2_ex + bib_c(b, j + ex_start - 1, i)*bib_ex(b, i, j) &
504 /(eigenval(a, 1) + eigenval(homo(1) + b_global, 1) - eigenval(i + my_i_occupied_start - 1, 1) &
505 - eigenval(j + ex_start - 1, 1))
511 DEALLOCATE (bib_send)
514 CALL timestop(handle3)
519 CALL timestop(handle2)
521 CALL para_env%sum(emp2_cou)
522 CALL para_env%sum(emp2_ex)
523 emp2 = emp2_cou + emp2_ex
524 IF (do_alpha_beta)
CALL para_env%sum(emp2_ab)
527 DEALLOCATE (my_cvirt)
531 DEALLOCATE (local_col_row_info)
536 CALL comm_exchange%free()
539 CALL dbcsr_release(matrix_ia_jnu(ispin))
540 CALL dbcsr_release(matrix_ia_jb(ispin))
543 DO i = my_i_occupied_start, my_i_occupied_end
544 CALL psi_i(i)%release()
548 CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
549 task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_a)
551 CALL timestop(handle)