(git:374b731)
Loading...
Searching...
No Matches
hfx_ri_kp.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief RI-methods for HFX and K-points.
10!> \auhtor Augustin Bussy (01.2023)
11! **************************************************************************************************
12
14 USE admm_types, ONLY: get_admm_env
19 USE cell_types, ONLY: cell_type,&
20 pbc,&
35 USE dbcsr_api, ONLY: &
36 dbcsr_add, dbcsr_clear, dbcsr_copy, dbcsr_create, dbcsr_distribution_get, &
37 dbcsr_distribution_new, dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_dot, &
38 dbcsr_filter, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
39 dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
40 dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_p_type, dbcsr_put_block, dbcsr_release, &
41 dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_symmetric
42 USE dbt_api, ONLY: &
43 dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
44 dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, dbt_destroy, &
45 dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, dbt_filter, &
46 dbt_finalize, dbt_get_block, dbt_get_info, dbt_get_stored_coordinates, &
47 dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
48 dbt_iterator_type, dbt_mp_environ_pgrid, dbt_pgrid_create, dbt_pgrid_destroy, &
49 dbt_pgrid_type, dbt_put_block, dbt_scale, dbt_type
52 USE hfx_ri, ONLY: get_idx_to_atom,&
54 USE hfx_types, ONLY: hfx_ri_type
59 USE input_cp2k_hfx, ONLY: ri_pmat
65 USE kinds, ONLY: dp,&
66 int_8
67 USE kpoint_types, ONLY: get_kpoint_info,&
70 USE machine, ONLY: m_flush,&
72 USE mathlib, ONLY: erfc_cutoff
73 USE message_passing, ONLY: mp_cart_type,&
79 USE physcon, ONLY: angstrom
94 USE qs_tensors, ONLY: &
107 USE util, ONLY: get_limit
108 USE virial_types, ONLY: virial_type
109#include "./base/base_uses.f90"
110
111!$ USE OMP_LIB, ONLY: omp_get_num_threads
112
113 IMPLICIT NONE
114 PRIVATE
115
117
118 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri_kp'
119CONTAINS
120
121! NOTES: for a start, we do not seek performance, but accuracy. So in this first implementation,
122! we give little consideration to batching, load balance and such.
123! We also put everything here, even if there is some code replication with the original RI_HFX
124! We will only work in the RHO flavor
125! For now, we will also always assume that there is a single para_env, and that there is no
126! K-point subgroup. This might change in the future
127
128! **************************************************************************************************
129!> \brief I_1nitialize the ri_data for K-point. For now, we take the normal, usual existing ri_data
130!> and we adapt it to our needs
131!> \param dbcsr_template ...
132!> \param ri_data ...
133!> \param qs_env ...
134! **************************************************************************************************
135 SUBROUTINE adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
136 TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
137 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
138 TYPE(qs_environment_type), POINTER :: qs_env
139
140 INTEGER :: i_img, i_RI, i_spin, iatom, natom, &
141 nblks_RI, nimg, nkind, nspins
142 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, dist1, dist2, dist3
143 TYPE(dft_control_type), POINTER :: dft_control
144 TYPE(mp_para_env_type), POINTER :: para_env
145
146 NULLIFY (dft_control, para_env)
147
148 !The main thing that we need to do is to allocate more space for the integrals, such that there
149 !is room for each periodic image. Note that we only go in 1D, i.e. we store (mu^0 sigma^a|P^0),
150 !and (P^0|Q^a) => the RI basis is always in the main cell.
151
152 !Get kpoint info
153 CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, para_env=para_env, nkind=nkind)
154 nimg = ri_data%nimg
155
156 !Along the RI direction we have basis elements spread accross ncell_RI images.
157 nblks_ri = SIZE(ri_data%bsizes_RI_split)
158 ALLOCATE (bsizes_ri_ext(nblks_ri*ri_data%ncell_RI))
159 DO i_ri = 1, ri_data%ncell_RI
160 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
161 END DO
162
163 ALLOCATE (ri_data%t_3c_int_ctr_1(1, nimg))
164 CALL create_3c_tensor(ri_data%t_3c_int_ctr_1(1, 1), dist1, dist2, dist3, &
165 ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_ri_ext, &
166 ri_data%bsizes_AO_split, [1, 2], [3], name="(AO RI | AO)")
167
168 DO i_img = 2, nimg
169 CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_1(1, i_img))
170 END DO
171 DEALLOCATE (dist1, dist2, dist3)
172
173 ALLOCATE (ri_data%t_3c_int_ctr_2(1, 1))
174 CALL create_3c_tensor(ri_data%t_3c_int_ctr_2(1, 1), dist1, dist2, dist3, &
175 ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_ri_ext, &
176 ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
177 DEALLOCATE (dist1, dist2, dist3)
178
179 !We use full block sizes for the 2c quantities
180 DEALLOCATE (bsizes_ri_ext)
181 nblks_ri = SIZE(ri_data%bsizes_RI)
182 ALLOCATE (bsizes_ri_ext(nblks_ri*ri_data%ncell_RI))
183 DO i_ri = 1, ri_data%ncell_RI
184 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI(:)
185 END DO
186
187 ALLOCATE (ri_data%t_2c_inv(1, natom), ri_data%t_2c_int(1, natom), ri_data%t_2c_pot(1, natom))
188 CALL create_2c_tensor(ri_data%t_2c_inv(1, 1), dist1, dist2, ri_data%pgrid_2d, &
189 bsizes_ri_ext, bsizes_ri_ext, &
190 name="(RI | RI)")
191 DEALLOCATE (dist1, dist2)
192 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, 1))
193 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1))
194 DO iatom = 2, natom
195 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_inv(1, iatom))
196 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, iatom))
197 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, iatom))
198 END DO
199
200 ALLOCATE (ri_data%kp_cost(natom, natom, nimg))
201 ri_data%kp_cost = 0.0_dp
202
203 !We store the density and KS matrix in tensor format
204 nspins = dft_control%nspins
205 ALLOCATE (ri_data%rho_ao_t(nspins, nimg), ri_data%ks_t(nspins, nimg))
206 CALL create_2c_tensor(ri_data%rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
207 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
208 name="(AO | AO)")
209 DEALLOCATE (dist1, dist2)
210
211 CALL dbt_create(dbcsr_template, ri_data%ks_t(1, 1))
212
213 IF (nspins == 2) THEN
214 CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(2, 1))
215 CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(2, 1))
216 END IF
217 DO i_img = 2, nimg
218 DO i_spin = 1, nspins
219 CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(i_spin, i_img))
220 CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(i_spin, i_img))
221 END DO
222 END DO
223
224 END SUBROUTINE adapt_ri_data_to_kp
225
226! **************************************************************************************************
227!> \brief The pre-scf steps for RI-HFX k-points calculation. Namely the calculation of the integrals
228!> \param dbcsr_template ...
229!> \param ri_data ...
230!> \param qs_env ...
231! **************************************************************************************************
232 SUBROUTINE hfx_ri_pre_scf_kp(dbcsr_template, ri_data, qs_env)
233 TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
234 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
235 TYPE(qs_environment_type), POINTER :: qs_env
236
237 CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_kp'
238
239 INTEGER :: handle, i_img, iatom, natom, nimg, nkind
240 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: t_2c_op_pot, t_2c_op_RI
241 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
242 TYPE(dft_control_type), POINTER :: dft_control
243
244 NULLIFY (dft_control)
245
246 CALL timeset(routinen, handle)
247
248 CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, nkind=nkind)
249
250 CALL cleanup_kp(ri_data)
251
252 !We do all the checks on what we allow in this initial implementation
253 IF (ri_data%flavor .NE. ri_pmat) cpabort("K-points RI-HFX only with RHO flavor")
254 IF (ri_data%same_op) ri_data%same_op = .false. !force the full calculation with RI metric
255 IF (abs(ri_data%eps_pgf_orb - dft_control%qs_control%eps_pgf_orb) > 1.0e-16_dp) &
256 cpabort("RI%EPS_PGF_ORB and QS%EPS_PGF_ORB must be identical for RI-HFX k-points")
257
258 CALL get_kp_and_ri_images(ri_data, qs_env)
259 nimg = ri_data%nimg
260
261 !Calculate the integrals
262 ALLOCATE (t_2c_op_pot(nimg), t_2c_op_ri(nimg))
263 ALLOCATE (t_3c_int(1, nimg))
264 CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_ri, t_2c_op_pot, t_3c_int, do_kpoints=.true.)
265
266 !Make sure the internals have the k-point format
267 CALL adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
268
269 !For each atom i, we calculate the inverse RI metric (P^0 | Q^0)^-1 without external bumping yet
270 !Also store the off-diagonal integrals of the RI metric in case of forces, bumped from the left
271 DO iatom = 1, natom
272 CALL get_ext_2c_int(ri_data%t_2c_inv(1, iatom), t_2c_op_ri, iatom, iatom, 1, ri_data, qs_env, &
273 do_inverse=.true.)
274 !for the forces:
275 !off-diagonl RI metric bumped from the left
276 CALL get_ext_2c_int(ri_data%t_2c_int(1, iatom), t_2c_op_ri, iatom, iatom, 1, ri_data, &
277 qs_env, off_diagonal=.true.)
278 CALL apply_bump(ri_data%t_2c_int(1, iatom), iatom, ri_data, qs_env, from_left=.true., from_right=.false.)
279
280 !RI metric with bumped off-diagonal blocks (but not inverted), depumed from left and right
281 CALL get_ext_2c_int(ri_data%t_2c_pot(1, iatom), t_2c_op_ri, iatom, iatom, 1, ri_data, qs_env, &
282 do_inverse=.true., skip_inverse=.true.)
283 CALL apply_bump(ri_data%t_2c_pot(1, iatom), iatom, ri_data, qs_env, from_left=.true., &
284 from_right=.true., debump=.true.)
285
286 END DO
287
288 DO i_img = 1, nimg
289 CALL dbcsr_release(t_2c_op_ri(i_img))
290 END DO
291
292 ALLOCATE (ri_data%kp_mat_2c_pot(1, nimg))
293 DO i_img = 1, nimg
294 CALL dbcsr_create(ri_data%kp_mat_2c_pot(1, i_img), template=t_2c_op_pot(i_img))
295 CALL dbcsr_copy(ri_data%kp_mat_2c_pot(1, i_img), t_2c_op_pot(i_img))
296 CALL dbcsr_release(t_2c_op_pot(i_img))
297 END DO
298
299 !Pre-contract all 3c integrals with the bumped inverse RI metric (P^0|Q^0)^-1,
300 !and store in ri_data%t_3c_int_ctr_1
301 CALL precontract_3c_ints(t_3c_int, ri_data, qs_env)
302
303 !reorder the 3c integrals such that empty images are bunched up together
304 CALL reorder_3c_ints(ri_data%t_3c_int_ctr_1(1, :), ri_data)
305
306 CALL timestop(handle)
307
308 END SUBROUTINE hfx_ri_pre_scf_kp
309
310! **************************************************************************************************
311!> \brief clean-up the KP specific data from ri_data
312!> \param ri_data ...
313! **************************************************************************************************
314 SUBROUTINE cleanup_kp(ri_data)
315 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
316
317 INTEGER :: i, j
318
319 IF (ALLOCATED(ri_data%kp_cost)) DEALLOCATE (ri_data%kp_cost)
320 IF (ALLOCATED(ri_data%idx_to_img)) DEALLOCATE (ri_data%idx_to_img)
321 IF (ALLOCATED(ri_data%img_to_idx)) DEALLOCATE (ri_data%img_to_idx)
322 IF (ALLOCATED(ri_data%present_images)) DEALLOCATE (ri_data%present_images)
323 IF (ALLOCATED(ri_data%img_to_RI_cell)) DEALLOCATE (ri_data%img_to_RI_cell)
324 IF (ALLOCATED(ri_data%RI_cell_to_img)) DEALLOCATE (ri_data%RI_cell_to_img)
325
326 IF (ALLOCATED(ri_data%kp_mat_2c_pot)) THEN
327 DO j = 1, SIZE(ri_data%kp_mat_2c_pot, 2)
328 DO i = 1, SIZE(ri_data%kp_mat_2c_pot, 1)
329 CALL dbcsr_release(ri_data%kp_mat_2c_pot(i, j))
330 END DO
331 END DO
332 DEALLOCATE (ri_data%kp_mat_2c_pot)
333 END IF
334
335 IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
336 DO i = 1, SIZE(ri_data%kp_t_3c_int)
337 CALL dbt_destroy(ri_data%kp_t_3c_int(i))
338 END DO
339 DEALLOCATE (ri_data%kp_t_3c_int)
340 END IF
341
342 IF (ALLOCATED(ri_data%t_2c_inv)) THEN
343 DO j = 1, SIZE(ri_data%t_2c_inv, 2)
344 DO i = 1, SIZE(ri_data%t_2c_inv, 1)
345 CALL dbt_destroy(ri_data%t_2c_inv(i, j))
346 END DO
347 END DO
348 DEALLOCATE (ri_data%t_2c_inv)
349 END IF
350
351 IF (ALLOCATED(ri_data%t_2c_int)) THEN
352 DO j = 1, SIZE(ri_data%t_2c_int, 2)
353 DO i = 1, SIZE(ri_data%t_2c_int, 1)
354 CALL dbt_destroy(ri_data%t_2c_int(i, j))
355 END DO
356 END DO
357 DEALLOCATE (ri_data%t_2c_int)
358 END IF
359
360 IF (ALLOCATED(ri_data%t_2c_pot)) THEN
361 DO j = 1, SIZE(ri_data%t_2c_pot, 2)
362 DO i = 1, SIZE(ri_data%t_2c_pot, 1)
363 CALL dbt_destroy(ri_data%t_2c_pot(i, j))
364 END DO
365 END DO
366 DEALLOCATE (ri_data%t_2c_pot)
367 END IF
368
369 IF (ALLOCATED(ri_data%t_3c_int_ctr_1)) THEN
370 DO j = 1, SIZE(ri_data%t_3c_int_ctr_1, 2)
371 DO i = 1, SIZE(ri_data%t_3c_int_ctr_1, 1)
372 CALL dbt_destroy(ri_data%t_3c_int_ctr_1(i, j))
373 END DO
374 END DO
375 DEALLOCATE (ri_data%t_3c_int_ctr_1)
376 END IF
377
378 IF (ALLOCATED(ri_data%t_3c_int_ctr_2)) THEN
379 DO j = 1, SIZE(ri_data%t_3c_int_ctr_2, 2)
380 DO i = 1, SIZE(ri_data%t_3c_int_ctr_2, 1)
381 CALL dbt_destroy(ri_data%t_3c_int_ctr_2(i, j))
382 END DO
383 END DO
384 DEALLOCATE (ri_data%t_3c_int_ctr_2)
385 END IF
386
387 IF (ALLOCATED(ri_data%rho_ao_t)) THEN
388 DO j = 1, SIZE(ri_data%rho_ao_t, 2)
389 DO i = 1, SIZE(ri_data%rho_ao_t, 1)
390 CALL dbt_destroy(ri_data%rho_ao_t(i, j))
391 END DO
392 END DO
393 DEALLOCATE (ri_data%rho_ao_t)
394 END IF
395
396 IF (ALLOCATED(ri_data%ks_t)) THEN
397 DO j = 1, SIZE(ri_data%ks_t, 2)
398 DO i = 1, SIZE(ri_data%ks_t, 1)
399 CALL dbt_destroy(ri_data%ks_t(i, j))
400 END DO
401 END DO
402 DEALLOCATE (ri_data%ks_t)
403 END IF
404
405 END SUBROUTINE cleanup_kp
406
407! **************************************************************************************************
408!> \brief Update the KS matrices for each real-space image
409!> \param qs_env ...
410!> \param ri_data ...
411!> \param ks_matrix ...
412!> \param ehfx ...
413!> \param rho_ao ...
414!> \param geometry_did_change ...
415!> \param nspins ...
416!> \param hf_fraction ...
417! **************************************************************************************************
418 SUBROUTINE hfx_ri_update_ks_kp(qs_env, ri_data, ks_matrix, ehfx, rho_ao, &
419 geometry_did_change, nspins, hf_fraction)
420
421 TYPE(qs_environment_type), POINTER :: qs_env
422 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
423 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
424 REAL(kind=dp), INTENT(OUT) :: ehfx
425 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
426 LOGICAL, INTENT(IN) :: geometry_did_change
427 INTEGER, INTENT(IN) :: nspins
428 REAL(kind=dp), INTENT(IN) :: hf_fraction
429
430 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_ks_kp'
431
432 INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_spin, iatom, &
433 iblk, igroup, jatom, mb_img, n_batch_nze, natom, ngroups, nimg, nimg_nze
434 INTEGER(int_8) :: nflop, nze
435 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_at, batch_ranges_nze, &
436 idx_to_at_ao
437 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
438 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: sparsity_pattern
439 LOGICAL :: use_delta_p
440 REAL(dp) :: etmp, fac, occ, pfac, pref, t1, t2, t3, &
441 t4
442 TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
443 TYPE(dbcsr_type) :: ks_desymm, rho_desymm, tmp
444 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
445 TYPE(dbcsr_type), POINTER :: dbcsr_template
446 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ks_t_split, t_2c_ao_tmp, t_2c_work, &
447 t_3c_int, t_3c_work_2, t_3c_work_3
448 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: ks_t, ks_t_sub, t_3c_apc, t_3c_apc_sub
449 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
450 TYPE(section_vals_type), POINTER :: hfx_section
451
452 NULLIFY (para_env, para_env_sub, blacs_env_sub, hfx_section, dbcsr_template)
453
454 CALL timeset(routinen, handle)
455
456 CALL get_qs_env(qs_env, para_env=para_env, natom=natom)
457
458 IF (nspins == 1) THEN
459 fac = 0.5_dp*hf_fraction
460 ELSE
461 fac = 1.0_dp*hf_fraction
462 END IF
463
464 IF (geometry_did_change) THEN
465 CALL hfx_ri_pre_scf_kp(ks_matrix(1, 1)%matrix, ri_data, qs_env)
466 END IF
467 nimg = ri_data%nimg
468 nimg_nze = ri_data%nimg_nze
469
470 !We need to calculate the KS matrix for each periodic cell with index b: F_mu^0,nu^b
471 !F_mu^0,nu^b = -0.5 sum_a,c P_sigma^0,lambda^c (mu^0, sigma^a| P^0) V_P^0,Q^b (Q^b| nu^b lambda^a+c)
472 !with V_P^0,Q^b = (P^0|R^0)^-1 * (R^0|S^b) * (S^b|Q^b)^-1
473
474 !We use a local RI basis set for each atom in the system, which inlcudes RI basis elements for
475 !each neighboring atom standing within the KIND radius (decay of Gaussian with smallest exponent)
476
477 !We also limit the number of periodic images we consider accorrding to the HFX potentail in the
478 !RI basis, because if V_P^0,Q^b is zero everywhere, then image b can be ignored (RI basis less diffuse)
479
480 !We manage to calculate each KS matrix doing a double loop on iamges, and a double loop on atoms
481 !First, we pre-contract and store P_sigma^0,lambda^c (mu^0, sigma^a| P^0) (P^0|R^0)^-1 into T_mu^0,lambda^a+c,P^0
482 !Then, we loop over b_img, iatom, jatom to get (R^0|S^b)
483 !Finally, we do an additional loop over a+c images where we do (R^0|S^b) (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c)
484 !and the final contraction with T_mu^0,lambda^a+c,P^0
485
486 !Note that the 3-center integrals are pre-contracted with the RI metric, and that the same tensor can be used
487 !(mu^0, sigma^a| P^0) (P^0|R^0) <===> (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c) by relabelling the images
488
489 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
490 CALL section_vals_val_get(hfx_section, "KP_USE_DELTA_P", l_val=use_delta_p)
491
492 !By default, build the density tensor based on the difference of this SCF P and that of the prev. SCF
493 pfac = -1.0_dp
494 IF (.NOT. use_delta_p) pfac = 0.0_dp
495 CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, pfac, ri_data, qs_env)
496
497 ALLOCATE (ks_t(nspins, nimg))
498 DO i_img = 1, nimg
499 DO i_spin = 1, nspins
500 CALL dbt_create(ri_data%ks_t(1, 1), ks_t(i_spin, i_img))
501 END DO
502 END DO
503
504 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
505 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
506
507 !First we calculate and store T^1_mu^0,lambda^a+c,P = P_mu^0,lambda^c * (mu_0 sigma^a | P^0) (P^0|R^0)^-1
508 !To avoid doing nimg**2 tiny contractions that do not scale well with a large number of CPUs,
509 !we instead do a single loop over the a+c image index. For each a+c, we get a list of allowed
510 !combination of a,c indices. Then we build TAS tensors P_mu^0,lambda^c with all concerned c's
511 !and (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 with all a's. Then we perform a single contraction with larger tensors,
512 !were the sum over a,c is automatically taken care of
513 ALLOCATE (t_3c_apc(nspins, nimg))
514 DO i_img = 1, nimg
515 DO i_spin = 1, nspins
516 CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
517 END DO
518 END DO
519 CALL contract_pmat_3c(t_3c_apc, ri_data%rho_ao_t, ri_data, qs_env)
520
521 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
522 CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
523 CALL section_vals_val_get(hfx_section, "KP_STACK_SIZE", i_val=batch_size)
524 ri_data%kp_stack_size = batch_size
525
526 IF (mod(para_env%num_pe, ngroups) .NE. 0) THEN
527 cpwarn("KP_NGROUPS must be an integer divisor of the total number of MPI ranks. It was set to 1.")
528 ngroups = 1
529 CALL section_vals_val_set(hfx_section, "KP_NGROUPS", i_val=ngroups)
530 END IF
531 IF ((mod(ngroups, natom) .NE. 0) .AND. (mod(natom, ngroups) .NE. 0) .AND. geometry_did_change) THEN
532 IF (ngroups > 1) &
533 cpwarn("Better load balancing is reached if NGROUPS is a multiple/divisor of the number of atoms")
534 END IF
535 group_size = para_env%num_pe/ngroups
536 igroup = para_env%mepos/group_size
537
538 ALLOCATE (para_env_sub)
539 CALL para_env_sub%from_split(para_env, igroup)
540 CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
541
542 ! The sparsity pattern of each iatom, jatom pair, on each b_img, and on which subgroup
543 ALLOCATE (sparsity_pattern(natom, natom, nimg))
544 CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
545 CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
546
547 !Get all the required tensors in the subgroups
548 ALLOCATE (mat_2c_pot(nimg), ks_t_sub(nspins, nimg), t_2c_ao_tmp(1), ks_t_split(2), t_2c_work(3))
549 CALL get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
550 group_size, ngroups, para_env, para_env_sub, ri_data)
551
552 ALLOCATE (t_3c_int(nimg), t_3c_apc_sub(nspins, nimg), t_3c_work_2(3), t_3c_work_3(3))
553 CALL get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
554 group_size, ngroups, para_env, para_env_sub, ri_data)
555
556 !We go atom by atom, therefore there is an automatic batching along that direction
557 !Also, because we stack the 3c tensors nimg times, we naturally do some batching there too
558 ALLOCATE (batch_ranges_at(natom + 1))
559 batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
560 iatom = 0
561 DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
562 IF (idx_to_at_ao(iblk) == iatom + 1) THEN
563 iatom = iatom + 1
564 batch_ranges_at(iatom) = iblk
565 END IF
566 END DO
567
568 n_batch_nze = nimg_nze/batch_size
569 IF (modulo(nimg_nze, batch_size) .NE. 0) n_batch_nze = n_batch_nze + 1
570 ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
571 DO i_batch = 1, n_batch_nze
572 batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
573 END DO
574 batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
575
576 CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
577 CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
578 CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
579 CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
580
581 t1 = m_walltime()
582 ri_data%kp_cost(:, :, :) = 0.0_dp
583 ALLOCATE (iapc_pairs(nimg, 2))
584 DO b_img = 1, nimg
585 CALL dbt_batched_contract_init(ks_t_split(1))
586 CALL dbt_batched_contract_init(ks_t_split(2))
587 DO jatom = 1, natom
588 DO iatom = 1, natom
589 IF (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup) cycle
590 pref = 1.0_dp
591 IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp
592
593 !measure the cost of the given i, j, b configuration
594 t3 = m_walltime()
595
596 !Get the proper HFX potential 2c integrals (R_i^0|S_j^b)
597 CALL timeset(routinen//"_2c", handle2)
598 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
599 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
600 dbcsr_template=dbcsr_template)
601 CALL dbt_copy(t_2c_work(1), t_2c_work(2), move_data=.true.) !move to split blocks
602 CALL dbt_filter(t_2c_work(2), ri_data%filter_eps)
603 CALL timestop(handle2)
604
605 CALL dbt_batched_contract_init(t_2c_work(2))
606 CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env)
607 CALL timeset(routinen//"_3c", handle2)
608
609 !Stack the (S^b|Q^b)^-1 * (Q^b| nu^b lambda^a+c) integrals over a+c and multiply by (R_i^0|S_j^b)
610 DO i_batch = 1, n_batch_nze
611 CALL fill_3c_stack(t_3c_work_3(3), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
612 filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_ao, &
613 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
614 CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1), move_data=.true.)
615
616 CALL dbt_contract(1.0_dp, t_2c_work(2), t_3c_work_3(1), &
617 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
618 contract_1=[2], notcontract_1=[1], &
619 contract_2=[1], notcontract_2=[2, 3], &
620 filter_eps=ri_data%filter_eps, flop=nflop)
621 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
622 CALL dbt_copy(t_3c_work_3(2), t_3c_work_2(2), order=[2, 1, 3], move_data=.true.)
623 CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1))
624
625 !Stack the P_sigma^a,lambda^a+c * (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 integrals over a+c and contract
626 !to get the final block of the KS matrix
627 DO i_spin = 1, nspins
628 CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
629 ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_ao, &
630 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
631 CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
632 IF (nze == 0) cycle
633 CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.true.)
634 CALL dbt_contract(-pref*fac, t_3c_work_2(1), t_3c_work_2(2), &
635 1.0_dp, ks_t_split(i_spin), map_1=[1], map_2=[2], &
636 contract_1=[2, 3], notcontract_1=[1], &
637 contract_2=[2, 3], notcontract_2=[1], &
638 filter_eps=ri_data%filter_eps, &
639 move_data=i_spin == nspins, flop=nflop)
640 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
641 END DO
642 END DO !i_batch
643 CALL timestop(handle2)
644 CALL dbt_batched_contract_finalize(t_2c_work(2))
645
646 t4 = m_walltime()
647 ri_data%kp_cost(iatom, jatom, b_img) = t4 - t3
648 END DO !iatom
649 END DO !jatom
650 CALL dbt_batched_contract_finalize(ks_t_split(1))
651 CALL dbt_batched_contract_finalize(ks_t_split(2))
652
653 DO i_spin = 1, nspins
654 CALL dbt_copy(ks_t_split(i_spin), t_2c_ao_tmp(1), move_data=.true.)
655 CALL dbt_copy(t_2c_ao_tmp(1), ks_t_sub(i_spin, b_img), summation=.true.)
656 END DO
657 END DO !b_img
658 CALL dbt_batched_contract_finalize(t_3c_work_3(1))
659 CALL dbt_batched_contract_finalize(t_3c_work_3(2))
660 CALL dbt_batched_contract_finalize(t_3c_work_2(1))
661 CALL dbt_batched_contract_finalize(t_3c_work_2(2))
662 CALL para_env%sync()
663 CALL para_env%sum(ri_data%dbcsr_nflop)
664 CALL para_env%sum(ri_data%kp_cost)
665 t2 = m_walltime()
666 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
667
668 !transfer KS tensor from subgroup to main group
669 CALL gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
670
671 !Keep the 3c integrals on the subgroups to avoid communication at next SCF step
672 DO i_img = 1, nimg
673 CALL dbt_copy(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img), move_data=.true.)
674 END DO
675
676 !clean-up subgroup tensors
677 CALL dbt_destroy(t_2c_ao_tmp(1))
678 CALL dbt_destroy(ks_t_split(1))
679 CALL dbt_destroy(ks_t_split(2))
680 CALL dbt_destroy(t_2c_work(1))
681 CALL dbt_destroy(t_2c_work(2))
682 CALL dbt_destroy(t_3c_work_2(1))
683 CALL dbt_destroy(t_3c_work_2(2))
684 CALL dbt_destroy(t_3c_work_2(3))
685 CALL dbt_destroy(t_3c_work_3(1))
686 CALL dbt_destroy(t_3c_work_3(2))
687 CALL dbt_destroy(t_3c_work_3(3))
688 DO i_img = 1, nimg
689 CALL dbt_destroy(t_3c_int(i_img))
690 CALL dbcsr_release(mat_2c_pot(i_img))
691 DO i_spin = 1, nspins
692 CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
693 CALL dbt_destroy(ks_t_sub(i_spin, i_img))
694 END DO
695 END DO
696 IF (ASSOCIATED(dbcsr_template)) THEN
697 CALL dbcsr_release(dbcsr_template)
698 DEALLOCATE (dbcsr_template)
699 END IF
700
701 !End of subgroup parallelization
702 CALL cp_blacs_env_release(blacs_env_sub)
703 CALL para_env_sub%free()
704 DEALLOCATE (para_env_sub)
705
706 !Currently, rho_ao_t holds the density difference (wrt to pref SCF step).
707 !ks_t also hold that diff, while only having half the blocks => need to add to prev ks_t and symmetrize
708 !We need the full thing for the energy, on the next SCF step
709 CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
710 DO i_spin = 1, nspins
711 DO b_img = 1, nimg
712 CALL dbt_copy(ks_t(i_spin, b_img), ri_data%ks_t(i_spin, b_img), summation=.true.)
713
714 !desymmetrize
715 mb_img = get_opp_index(b_img, qs_env)
716 IF (mb_img > 0 .AND. mb_img .LE. nimg) THEN
717 CALL dbt_copy(ks_t(i_spin, mb_img), ri_data%ks_t(i_spin, b_img), order=[2, 1], summation=.true.)
718 END IF
719 END DO
720 END DO
721 DO b_img = 1, nimg
722 DO i_spin = 1, nspins
723 CALL dbt_destroy(ks_t(i_spin, b_img))
724 END DO
725 END DO
726
727 !calculate the energy
728 CALL dbt_create(ri_data%ks_t(1, 1), t_2c_ao_tmp(1))
729 CALL dbcsr_create(tmp, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_symmetric)
730 CALL dbcsr_create(ks_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
731 CALL dbcsr_create(rho_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
732 ehfx = 0.0_dp
733 DO i_img = 1, nimg
734 DO i_spin = 1, nspins
735 CALL dbt_filter(ri_data%ks_t(i_spin, i_img), ri_data%filter_eps)
736 CALL dbt_copy(ri_data%ks_t(i_spin, i_img), t_2c_ao_tmp(1))
737 CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), ks_desymm)
738 CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), tmp)
739 CALL dbcsr_add(ks_matrix(i_spin, i_img)%matrix, tmp, 1.0_dp, 1.0_dp)
740
741 CALL dbt_copy(ri_data%rho_ao_t(i_spin, i_img), t_2c_ao_tmp(1))
742 CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), rho_desymm)
743
744 CALL dbcsr_dot(ks_desymm, rho_desymm, etmp)
745 ehfx = ehfx + 0.5_dp*etmp
746
747 IF (.NOT. use_delta_p) CALL dbt_clear(ri_data%ks_t(i_spin, i_img))
748 END DO
749 END DO
750 CALL dbcsr_release(rho_desymm)
751 CALL dbcsr_release(ks_desymm)
752 CALL dbcsr_release(tmp)
753 CALL dbt_destroy(t_2c_ao_tmp(1))
754
755 CALL timestop(handle)
756
757 END SUBROUTINE hfx_ri_update_ks_kp
758
759! **************************************************************************************************
760!> \brief Update the K-points RI-HFX forces
761!> \param qs_env ...
762!> \param ri_data ...
763!> \param nspins ...
764!> \param hf_fraction ...
765!> \param rho_ao ...
766!> \param use_virial ...
767!> \note Because this routine uses stored quantities calculated in the energy calculation, they should
768!> always be called by pairs, and with the same input densities
769! **************************************************************************************************
770 SUBROUTINE hfx_ri_update_forces_kp(qs_env, ri_data, nspins, hf_fraction, rho_ao, use_virial)
771
772 TYPE(qs_environment_type), POINTER :: qs_env
773 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
774 INTEGER, INTENT(IN) :: nspins
775 REAL(kind=dp), INTENT(IN) :: hf_fraction
776 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
777 LOGICAL, INTENT(IN), OPTIONAL :: use_virial
778
779 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_forces_kp'
780
781 INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_loop, i_spin, &
782 i_xyz, iatom, iblk, igroup, j_xyz, jatom, k_xyz, n_batch, natom, ngroups, nimg, nimg_nze
783 INTEGER(int_8) :: nflop, nze
784 INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_ranges_at, &
785 batch_ranges_nze, dist1, dist2, &
786 i_images, idx_to_at_ao, idx_to_at_ri, &
787 kind_of
788 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
789 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: force_pattern, sparsity_pattern
790 INTEGER, DIMENSION(2, 1) :: bounds_iat, bounds_jat
791 LOGICAL :: use_virial_prv
792 REAL(dp) :: fac, occ, pref, t1, t2
793 REAL(dp), DIMENSION(3, 3) :: work_virial
794 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
795 TYPE(cell_type), POINTER :: cell
796 TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
797 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
798 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_pot, mat_der_pot_sub
799 TYPE(dbcsr_type), POINTER :: dbcsr_template
800 TYPE(dbt_type) :: t_2c_r, t_2c_r_split
801 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_bint, t_2c_binv, t_2c_der_pot, &
802 t_2c_inv, t_2c_metric, t_2c_work, &
803 t_3c_der_stack, t_3c_work_2, &
804 t_3c_work_3
805 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
806 t_2c_der_metric_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_ao, t_3c_der_ao_sub, t_3c_der_ri, &
807 t_3c_der_ri_sub
808 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
809 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
810 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
811 TYPE(section_vals_type), POINTER :: hfx_section
812 TYPE(virial_type), POINTER :: virial
813
814 NULLIFY (para_env, para_env_sub, hfx_section, blacs_env_sub, dbcsr_template, force, atomic_kind_set, &
815 virial, particle_set, cell)
816
817 CALL timeset(routinen, handle)
818
819 use_virial_prv = .false.
820 IF (PRESENT(use_virial)) use_virial_prv = use_virial
821
822 IF (nspins == 1) THEN
823 fac = 0.5_dp*hf_fraction
824 ELSE
825 fac = 1.0_dp*hf_fraction
826 END IF
827
828 CALL get_qs_env(qs_env, natom=natom, para_env=para_env, force=force, cell=cell, virial=virial, &
829 atomic_kind_set=atomic_kind_set, particle_set=particle_set)
830 CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
831
832 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
833 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
834
835 ALLOCATE (idx_to_at_ri(SIZE(ri_data%bsizes_RI_split)))
836 CALL get_idx_to_atom(idx_to_at_ri, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
837
838 nimg = ri_data%nimg
839 ALLOCATE (t_3c_der_ri(nimg, 3), t_3c_der_ao(nimg, 3), mat_der_pot(nimg, 3), t_2c_der_metric(natom, 3))
840
841 !We assume that the integrals are available from the SCF
842 !pre-calculate the derivs. 3c tensors as (P^0| sigma^a mu^0), with t_3c_der_AO holding deriv wrt mu^0
843 CALL precalc_derivatives(t_3c_der_ri, t_3c_der_ao, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
844
845 !Calculate the density matrix at each image
846 ALLOCATE (rho_ao_t(nspins, nimg))
847 CALL create_2c_tensor(rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
848 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
849 name="(AO | AO)")
850 DEALLOCATE (dist1, dist2)
851 IF (nspins == 2) CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(2, 1))
852 DO i_img = 2, nimg
853 DO i_spin = 1, nspins
854 CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(i_spin, i_img))
855 END DO
856 END DO
857 CALL get_pmat_images(rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
858
859 !Contract integrals with the density matrix
860 ALLOCATE (t_3c_apc(nspins, nimg))
861 DO i_img = 1, nimg
862 DO i_spin = 1, nspins
863 CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
864 END DO
865 END DO
866 CALL contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
867
868 !Setup the subgroups
869 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
870 CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
871 group_size = para_env%num_pe/ngroups
872 igroup = para_env%mepos/group_size
873
874 ALLOCATE (para_env_sub)
875 CALL para_env_sub%from_split(para_env, igroup)
876 CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
877
878 !Get the ususal sparsity pattern
879 ALLOCATE (sparsity_pattern(natom, natom, nimg))
880 CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
881 CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
882
883 !Get the 2-center quantities in the subgroups (note: main group derivs are deleted wihtin)
884 ALLOCATE (t_2c_inv(natom), mat_2c_pot(nimg), rho_ao_t_sub(nspins, nimg), t_2c_work(5), &
885 t_2c_der_metric_sub(natom, 3), mat_der_pot_sub(nimg, 3), t_2c_bint(natom), &
886 t_2c_metric(natom), t_2c_binv(natom))
887 CALL get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
888 rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
889 mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
890 CALL dbt_create(t_2c_work(1), t_2c_r) !nRI x nRI
891 CALL dbt_create(t_2c_work(5), t_2c_r_split) !nRI x nRI with split blocks
892
893 ALLOCATE (t_2c_der_pot(3))
894 DO i_xyz = 1, 3
895 CALL dbt_create(t_2c_r, t_2c_der_pot(i_xyz))
896 END DO
897
898 !Get the 3-center quantities in the subgroups. The integrals and t_3c_apc already there
899 ALLOCATE (t_3c_work_2(3), t_3c_work_3(4), t_3c_der_stack(6), t_3c_der_ao_sub(nimg, 3), &
900 t_3c_der_ri_sub(nimg, 3), t_3c_apc_sub(nspins, nimg))
901 CALL get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_ao, t_3c_der_ao_sub, &
902 t_3c_der_ri, t_3c_der_ri_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_stack, &
903 group_size, ngroups, para_env, para_env_sub, ri_data)
904
905 !Set up batched contraction (go atom by atom)
906 ALLOCATE (batch_ranges_at(natom + 1))
907 batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
908 iatom = 0
909 DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
910 IF (idx_to_at_ao(iblk) == iatom + 1) THEN
911 iatom = iatom + 1
912 batch_ranges_at(iatom) = iblk
913 END IF
914 END DO
915
916 CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
917 CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
918 CALL dbt_batched_contract_init(t_3c_work_3(3), batch_range_2=batch_ranges_at)
919 CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
920 CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
921
922 !Preparing for the stacking of 3c tensors
923 nimg_nze = ri_data%nimg_nze
924 batch_size = ri_data%kp_stack_size
925 n_batch = nimg_nze/batch_size
926 IF (modulo(nimg_nze, batch_size) .NE. 0) n_batch = n_batch + 1
927 ALLOCATE (batch_ranges_nze(n_batch + 1))
928 DO i_batch = 1, n_batch
929 batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
930 END DO
931 batch_ranges_nze(n_batch + 1) = nimg_nze + 1
932
933 !Applying the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 from left and right
934 !And keep the bump on LHS only version as well, with B*M^-1 = (M^-1*B)^T
935 DO iatom = 1, natom
936 CALL dbt_create(t_2c_inv(iatom), t_2c_binv(iatom))
937 CALL dbt_copy(t_2c_inv(iatom), t_2c_binv(iatom))
938 CALL apply_bump(t_2c_binv(iatom), iatom, ri_data, qs_env, from_left=.true., from_right=.false.)
939 CALL apply_bump(t_2c_inv(iatom), iatom, ri_data, qs_env, from_left=.true., from_right=.true.)
940 END DO
941
942 t1 = m_walltime()
943 work_virial = 0.0_dp
944 ALLOCATE (iapc_pairs(nimg, 2), i_images(nimg))
945 ALLOCATE (force_pattern(natom, natom, nimg))
946 force_pattern(:, :, :) = -1
947 !We proceed with 2 loops: one over the sparsity pattern from the SCF, one over the rest
948 !We use the SCF cost model for the first loop, while we calculate the cost of the upcoming loop
949 DO i_loop = 1, 2
950 DO b_img = 1, nimg
951 DO jatom = 1, natom
952 DO iatom = 1, natom
953
954 pref = -0.5_dp*fac
955 IF (i_loop == 1 .AND. (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup)) cycle
956 IF (i_loop == 2 .AND. (.NOT. force_pattern(iatom, jatom, b_img) == igroup)) cycle
957
958 !Get the proper HFX potential 2c integrals (R_i^0|S_j^b), times (S_j^b|Q_j^b)^-1
959 CALL timeset(routinen//"_2c_1", handle2)
960 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
961 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
962 dbcsr_template=dbcsr_template)
963 CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_inv(jatom), &
964 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
965 contract_1=[2], notcontract_1=[1], &
966 contract_2=[1], notcontract_2=[2], &
967 filter_eps=ri_data%filter_eps, flop=nflop)
968 CALL dbt_copy(t_2c_work(2), t_2c_work(5), move_data=.true.) !move to split blocks
969 CALL dbt_filter(t_2c_work(5), ri_data%filter_eps)
970 CALL timestop(handle2)
971
972 CALL timeset(routinen//"_3c", handle2)
973 bounds_iat(:, 1) = [sum(ri_data%bsizes_AO(1:iatom - 1)) + 1, sum(ri_data%bsizes_AO(1:iatom))]
974 bounds_jat(:, 1) = [sum(ri_data%bsizes_AO(1:jatom - 1)) + 1, sum(ri_data%bsizes_AO(1:jatom))]
975 CALL dbt_clear(t_2c_r_split)
976
977 DO i_spin = 1, nspins
978 CALL dbt_batched_contract_init(rho_ao_t_sub(i_spin, b_img))
979 END DO
980
981 CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env, i_images) !i = a+c-b
982 DO i_batch = 1, n_batch
983
984 !Stack the 3c derivatives to take the trace later on
985 DO i_xyz = 1, 3
986 CALL dbt_clear(t_3c_der_stack(i_xyz))
987 CALL fill_3c_stack(t_3c_der_stack(i_xyz), t_3c_der_ri_sub(:, i_xyz), &
988 iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
989 filter_dim=2, idx_to_at=idx_to_at_ao, &
990 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
991
992 CALL dbt_clear(t_3c_der_stack(3 + i_xyz))
993 CALL fill_3c_stack(t_3c_der_stack(3 + i_xyz), t_3c_der_ao_sub(:, i_xyz), &
994 iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
995 filter_dim=2, idx_to_at=idx_to_at_ao, &
996 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
997 END DO
998
999 DO i_spin = 1, nspins
1000 !stack the t_3c_apc tensors
1001 CALL dbt_clear(t_3c_work_2(3))
1002 CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
1003 ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_ao, &
1004 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1005 CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
1006 IF (nze == 0) cycle
1007 CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.true.)
1008
1009 !Contract with the second density matrix: P_mu^0,nu^b * t_3c_apc,
1010 !where t_3c_apc = P_sigma^a,lambda^a+c (mu^0 P^0 sigma^a) *(P^0|R^0)^-1 (stacked along a+c)
1011 CALL dbt_contract(1.0_dp, rho_ao_t_sub(i_spin, b_img), t_3c_work_2(1), &
1012 0.0_dp, t_3c_work_2(2), map_1=[1], map_2=[2, 3], &
1013 contract_1=[1], notcontract_1=[2], &
1014 contract_2=[1], notcontract_2=[2, 3], &
1015 bounds_1=bounds_iat, bounds_2=bounds_jat, &
1016 filter_eps=ri_data%filter_eps, flop=nflop)
1017 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1018
1019 CALL get_tensor_occupancy(t_3c_work_2(2), nze, occ)
1020 IF (nze == 0) cycle
1021
1022 !Contract with V_PQ so that we can take the trace with (Q^b|nu^b lmabda^a+c)^(x)
1023 CALL dbt_copy(t_3c_work_2(2), t_3c_work_3(1), order=[2, 1, 3], move_data=.true.)
1024 CALL dbt_batched_contract_init(t_2c_work(5))
1025 CALL dbt_contract(1.0_dp, t_2c_work(5), t_3c_work_3(1), &
1026 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
1027 contract_1=[1], notcontract_1=[2], &
1028 contract_2=[1], notcontract_2=[2, 3], &
1029 filter_eps=ri_data%filter_eps, flop=nflop)
1030 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1031 CALL dbt_batched_contract_finalize(t_2c_work(5))
1032
1033 !Contract with the 3c derivatives to get the force/virial
1034 CALL dbt_copy(t_3c_work_3(2), t_3c_work_3(4), move_data=.true.)
1035 IF (use_virial_prv) THEN
1036 CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1037 t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1038 idx_to_at_ri, idx_to_at_ao, i_images, &
1039 batch_ranges_nze(i_batch), 2.0_dp*pref, &
1040 ri_data, qs_env, work_virial, cell, particle_set)
1041 ELSE
1042 CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1043 t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1044 idx_to_at_ri, idx_to_at_ao, i_images, &
1045 batch_ranges_nze(i_batch), 2.0_dp*pref, &
1046 ri_data, qs_env)
1047 END IF
1048 CALL dbt_clear(t_3c_work_3(4))
1049
1050 !Contract with the 3-center integrals in order to have a matrix R_PQ such that
1051 !we can take the trace sum_PQ R_PQ (P^0|Q^b)^(x)
1052 IF (i_loop == 2) cycle
1053
1054 !Stack the 3c integrals
1055 CALL fill_3c_stack(t_3c_work_3(4), ri_data%kp_t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
1056 filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_ao, &
1057 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1058 CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(3), move_data=.true.)
1059
1060 CALL dbt_batched_contract_init(t_2c_r_split)
1061 CALL dbt_contract(1.0_dp, t_3c_work_3(1), t_3c_work_3(3), &
1062 1.0_dp, t_2c_r_split, map_1=[1], map_2=[2], &
1063 contract_1=[2, 3], notcontract_1=[1], &
1064 contract_2=[2, 3], notcontract_2=[1], &
1065 filter_eps=ri_data%filter_eps, flop=nflop)
1066 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1067 CALL dbt_batched_contract_finalize(t_2c_r_split)
1068 CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(1))
1069 END DO
1070 END DO
1071 DO i_spin = 1, nspins
1072 CALL dbt_batched_contract_finalize(rho_ao_t_sub(i_spin, b_img))
1073 END DO
1074 CALL timestop(handle2)
1075
1076 IF (i_loop == 2) cycle
1077 pref = 2.0_dp*pref
1078 IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp*pref
1079
1080 CALL timeset(routinen//"_2c_2", handle2)
1081 !Note that the derivatives are in atomic block format (not split)
1082 CALL dbt_copy(t_2c_r_split, t_2c_r, move_data=.true.)
1083
1084 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
1085 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
1086 dbcsr_template=dbcsr_template)
1087
1088 !We have to calculate: S^-1(iat) * R_PQ * S^-1(jat) to trace with HFX pot der
1089 ! + R_PQ * S^-1(jat) * pot^T to trace with S^(x) (iat)
1090 ! + pot^T * S^-1(iat) *R_PQ to trace with S^(x) (jat)
1091
1092 !Because 3c tensors are all precontracted with the inverse RI metric,
1093 !t_2c_R is currently implicitely multiplied by S^-1(iat) from the left
1094 !and S^-1(jat) from the right, directly in the proper format for the trace
1095 !with the HFX potential derivative
1096
1097 !Trace with HFX pot deriv, that we need to build first
1098 DO i_xyz = 1, 3
1099 CALL get_ext_2c_int(t_2c_der_pot(i_xyz), mat_der_pot_sub(:, i_xyz), iatom, jatom, &
1100 b_img, ri_data, qs_env, blacs_env_ext=blacs_env_sub, &
1101 para_env_ext=para_env_sub, dbcsr_template=dbcsr_template)
1102 END DO
1103
1104 IF (use_virial_prv) THEN
1105 CALL get_2c_der_force(force, t_2c_r, t_2c_der_pot, atom_of_kind, kind_of, &
1106 b_img, pref, ri_data, qs_env, work_virial, cell, particle_set)
1107 ELSE
1108 CALL get_2c_der_force(force, t_2c_r, t_2c_der_pot, atom_of_kind, kind_of, &
1109 b_img, pref, ri_data, qs_env)
1110 END IF
1111
1112 DO i_xyz = 1, 3
1113 CALL dbt_clear(t_2c_der_pot(i_xyz))
1114 END DO
1115
1116 !R_PQ * S^-1(jat) * pot^T (=A)
1117 CALL dbt_contract(1.0_dp, t_2c_metric(iatom), t_2c_r, & !get rid of implicit S^-1(iat)
1118 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1119 contract_1=[2], notcontract_1=[1], &
1120 contract_2=[1], notcontract_2=[2], &
1121 filter_eps=ri_data%filter_eps, flop=nflop)
1122 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1123 CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_work(1), &
1124 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1125 contract_1=[2], notcontract_1=[1], &
1126 contract_2=[2], notcontract_2=[1], &
1127 filter_eps=ri_data%filter_eps, flop=nflop)
1128 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1129
1130 !With the RI bump function, things get more complex. M = (S|P)_D + B*(S|P)_OD*B
1131 !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1132 CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(iatom), &
1133 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1134 contract_1=[2], notcontract_1=[1], &
1135 contract_2=[1], notcontract_2=[2], &
1136 filter_eps=ri_data%filter_eps, flop=nflop)
1137 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1138
1139 CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1140 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1141 contract_1=[1], notcontract_1=[2], &
1142 contract_2=[1], notcontract_2=[2], &
1143 filter_eps=ri_data%filter_eps, flop=nflop)
1144 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1145
1146 CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.true.)
1147 CALL get_2c_bump_forces(force, t_2c_work(4), iatom, atom_of_kind, kind_of, pref, &
1148 ri_data, qs_env, work_virial)
1149
1150 !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1151 CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(2), &
1152 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1153 contract_1=[1], notcontract_1=[2], &
1154 contract_2=[1], notcontract_2=[2], &
1155 filter_eps=ri_data%filter_eps, flop=nflop)
1156 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1157
1158 IF (use_virial_prv) THEN
1159 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1160 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1161 diag=.true., offdiag=.false.)
1162 ELSE
1163 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1164 kind_of, 1, -pref, ri_data, qs_env, diag=.true., offdiag=.false.)
1165 END IF
1166
1167 !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1168 CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1169 CALL apply_bump(t_2c_work(2), iatom, ri_data, qs_env, from_left=.true., from_right=.true.)
1170
1171 IF (use_virial_prv) THEN
1172 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1173 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1174 diag=.false., offdiag=.true.)
1175 ELSE
1176 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1177 kind_of, 1, -pref, ri_data, qs_env, diag=.false., offdiag=.true.)
1178 END IF
1179
1180 !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1181 !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1182 CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(iatom), &
1183 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1184 contract_1=[2], notcontract_1=[1], &
1185 contract_2=[1], notcontract_2=[2], &
1186 filter_eps=ri_data%filter_eps, flop=nflop)
1187 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1188
1189 CALL dbt_contract(1.0_dp, t_2c_bint(iatom), t_2c_work(4), &
1190 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1191 contract_1=[1], notcontract_1=[2], &
1192 contract_2=[1], notcontract_2=[2], &
1193 filter_eps=ri_data%filter_eps, flop=nflop)
1194 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1195
1196 CALL get_2c_bump_forces(force, t_2c_work(2), iatom, atom_of_kind, kind_of, -pref, &
1197 ri_data, qs_env, work_virial)
1198
1199 ! pot^T * S^-1(iat) * R_PQ (=A)
1200 CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_r, &
1201 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1202 contract_1=[1], notcontract_1=[2], &
1203 contract_2=[1], notcontract_2=[2], &
1204 filter_eps=ri_data%filter_eps, flop=nflop)
1205 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1206
1207 CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_metric(jatom), & !get rid of implicit S^-1(jat)
1208 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1209 contract_1=[2], notcontract_1=[1], &
1210 contract_2=[1], notcontract_2=[2], &
1211 filter_eps=ri_data%filter_eps, flop=nflop)
1212 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1213
1214 !Do the same shenanigans with the S^(x) (jatom)
1215 !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1216 CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(jatom), &
1217 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1218 contract_1=[2], notcontract_1=[1], &
1219 contract_2=[1], notcontract_2=[2], &
1220 filter_eps=ri_data%filter_eps, flop=nflop)
1221 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1222
1223 CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1224 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1225 contract_1=[1], notcontract_1=[2], &
1226 contract_2=[1], notcontract_2=[2], &
1227 filter_eps=ri_data%filter_eps, flop=nflop)
1228 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1229
1230 CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.true.)
1231 CALL get_2c_bump_forces(force, t_2c_work(4), jatom, atom_of_kind, kind_of, pref, &
1232 ri_data, qs_env, work_virial)
1233
1234 !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1235 CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(2), &
1236 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1237 contract_1=[1], notcontract_1=[2], &
1238 contract_2=[1], notcontract_2=[2], &
1239 filter_eps=ri_data%filter_eps, flop=nflop)
1240 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1241
1242 IF (use_virial_prv) THEN
1243 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1244 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1245 diag=.true., offdiag=.false.)
1246 ELSE
1247 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1248 kind_of, 1, -pref, ri_data, qs_env, diag=.true., offdiag=.false.)
1249 END IF
1250
1251 !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1252 CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1253 CALL apply_bump(t_2c_work(2), jatom, ri_data, qs_env, from_left=.true., from_right=.true.)
1254
1255 IF (use_virial_prv) THEN
1256 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1257 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1258 diag=.false., offdiag=.true.)
1259 ELSE
1260 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1261 kind_of, 1, -pref, ri_data, qs_env, diag=.false., offdiag=.true.)
1262 END IF
1263
1264 !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1265 !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1266 CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(jatom), &
1267 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1268 contract_1=[2], notcontract_1=[1], &
1269 contract_2=[1], notcontract_2=[2], &
1270 filter_eps=ri_data%filter_eps, flop=nflop)
1271 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1272
1273 CALL dbt_contract(1.0_dp, t_2c_bint(jatom), t_2c_work(4), &
1274 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1275 contract_1=[1], notcontract_1=[2], &
1276 contract_2=[1], notcontract_2=[2], &
1277 filter_eps=ri_data%filter_eps, flop=nflop)
1278 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1279
1280 CALL get_2c_bump_forces(force, t_2c_work(2), jatom, atom_of_kind, kind_of, -pref, &
1281 ri_data, qs_env, work_virial)
1282
1283 CALL timestop(handle2)
1284 END DO !iatom
1285 END DO !jatom
1286 END DO !b_img
1287
1288 IF (i_loop == 1) THEN
1289 CALL update_pattern_to_forces(force_pattern, sparsity_pattern, ngroups, ri_data, qs_env)
1290 END IF
1291 END DO !i_loop
1292
1293 CALL dbt_batched_contract_finalize(t_3c_work_3(1))
1294 CALL dbt_batched_contract_finalize(t_3c_work_3(2))
1295 CALL dbt_batched_contract_finalize(t_3c_work_3(3))
1296 CALL dbt_batched_contract_finalize(t_3c_work_2(1))
1297 CALL dbt_batched_contract_finalize(t_3c_work_2(2))
1298
1299 IF (use_virial_prv) THEN
1300 DO k_xyz = 1, 3
1301 DO j_xyz = 1, 3
1302 DO i_xyz = 1, 3
1303 virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
1304 + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
1305 END DO
1306 END DO
1307 END DO
1308 END IF
1309
1310 !End of subgroup parallelization
1311 CALL cp_blacs_env_release(blacs_env_sub)
1312 CALL para_env_sub%free()
1313 DEALLOCATE (para_env_sub)
1314
1315 CALL para_env%sync()
1316 t2 = m_walltime()
1317 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1318
1319 !clean-up
1320 IF (ASSOCIATED(dbcsr_template)) THEN
1321 CALL dbcsr_release(dbcsr_template)
1322 DEALLOCATE (dbcsr_template)
1323 END IF
1324 CALL dbt_destroy(t_2c_r)
1325 CALL dbt_destroy(t_2c_r_split)
1326 CALL dbt_destroy(t_2c_work(1))
1327 CALL dbt_destroy(t_2c_work(2))
1328 CALL dbt_destroy(t_2c_work(3))
1329 CALL dbt_destroy(t_2c_work(4))
1330 CALL dbt_destroy(t_2c_work(5))
1331 CALL dbt_destroy(t_3c_work_2(1))
1332 CALL dbt_destroy(t_3c_work_2(2))
1333 CALL dbt_destroy(t_3c_work_2(3))
1334 CALL dbt_destroy(t_3c_work_3(1))
1335 CALL dbt_destroy(t_3c_work_3(2))
1336 CALL dbt_destroy(t_3c_work_3(3))
1337 CALL dbt_destroy(t_3c_work_3(4))
1338 CALL dbt_destroy(t_3c_der_stack(1))
1339 CALL dbt_destroy(t_3c_der_stack(2))
1340 CALL dbt_destroy(t_3c_der_stack(3))
1341 CALL dbt_destroy(t_3c_der_stack(4))
1342 CALL dbt_destroy(t_3c_der_stack(5))
1343 CALL dbt_destroy(t_3c_der_stack(6))
1344 DO i_xyz = 1, 3
1345 CALL dbt_destroy(t_2c_der_pot(i_xyz))
1346 END DO
1347 DO iatom = 1, natom
1348 CALL dbt_destroy(t_2c_inv(iatom))
1349 CALL dbt_destroy(t_2c_binv(iatom))
1350 CALL dbt_destroy(t_2c_bint(iatom))
1351 CALL dbt_destroy(t_2c_metric(iatom))
1352 DO i_xyz = 1, 3
1353 CALL dbt_destroy(t_2c_der_metric_sub(iatom, i_xyz))
1354 END DO
1355 END DO
1356 DO i_img = 1, nimg
1357 CALL dbcsr_release(mat_2c_pot(i_img))
1358 DO i_spin = 1, nspins
1359 CALL dbt_destroy(rho_ao_t_sub(i_spin, i_img))
1360 CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
1361 END DO
1362 END DO
1363 DO i_xyz = 1, 3
1364 DO i_img = 1, nimg
1365 CALL dbt_destroy(t_3c_der_ri_sub(i_img, i_xyz))
1366 CALL dbt_destroy(t_3c_der_ao_sub(i_img, i_xyz))
1367 CALL dbcsr_release(mat_der_pot_sub(i_img, i_xyz))
1368 END DO
1369 END DO
1370
1371 CALL timestop(handle)
1372
1373 END SUBROUTINE hfx_ri_update_forces_kp
1374
1375! **************************************************************************************************
1376!> \brief A routine the applies the RI bump matrix from the left and/or the right, given an input
1377!> matrix and the central RI atom. We assume atomic block sizes
1378!> \param t_2c_inout ...
1379!> \param atom_i ...
1380!> \param ri_data ...
1381!> \param qs_env ...
1382!> \param from_left ...
1383!> \param from_right ...
1384!> \param debump ...
1385! **************************************************************************************************
1386 SUBROUTINE apply_bump(t_2c_inout, atom_i, ri_data, qs_env, from_left, from_right, debump)
1387 TYPE(dbt_type), INTENT(INOUT) :: t_2c_inout
1388 INTEGER, INTENT(IN) :: atom_i
1389 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1390 TYPE(qs_environment_type), POINTER :: qs_env
1391 LOGICAL, INTENT(IN), OPTIONAL :: from_left, from_right, debump
1392
1393 INTEGER :: i_img, i_ri, iatom, ind(2), j_img, j_ri, &
1394 jatom, natom, nblks(2), nimg, nkind
1395 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1396 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1397 LOGICAL :: found, my_debump, my_left, my_right
1398 REAL(dp) :: bval, r0, r1, ri(3), rj(3), rref(3), &
1399 scoord(3)
1400 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1401 TYPE(cell_type), POINTER :: cell
1402 TYPE(dbt_iterator_type) :: iter
1403 TYPE(kpoint_type), POINTER :: kpoints
1404 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1405 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1406
1407 NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1408
1409 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1410 kpoints=kpoints, particle_set=particle_set)
1411 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1412
1413 my_debump = .false.
1414 IF (PRESENT(debump)) my_debump = debump
1415
1416 my_left = .false.
1417 IF (PRESENT(from_left)) my_left = from_left
1418
1419 my_right = .false.
1420 IF (PRESENT(from_right)) my_right = from_right
1421 cpassert(my_left .OR. my_right)
1422
1423 CALL dbt_get_info(t_2c_inout, nblks_total=nblks)
1424 cpassert(nblks(1) == ri_data%ncell_RI*natom)
1425 cpassert(nblks(2) == ri_data%ncell_RI*natom)
1426
1427 nimg = ri_data%nimg
1428
1429 !Loop over the RI cells and atoms, and apply bump accordingly
1430 r1 = ri_data%kp_RI_range
1431 r0 = ri_data%kp_bump_rad
1432 rref = pbc(particle_set(atom_i)%r, cell)
1433
1434!$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_inout,natom,ri_data,cell,particle_set,index_to_cell,my_left, &
1435!$OMP my_right,r0,r1,rref,my_debump) &
1436!$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,bval)
1437 CALL dbt_iterator_start(iter, t_2c_inout)
1438 DO WHILE (dbt_iterator_blocks_left(iter))
1439 CALL dbt_iterator_next_block(iter, ind)
1440 CALL dbt_get_block(t_2c_inout, ind, blk, found)
1441 IF (.NOT. found) cycle
1442
1443 i_ri = (ind(1) - 1)/natom + 1
1444 i_img = ri_data%RI_cell_to_img(i_ri)
1445 iatom = ind(1) - (i_ri - 1)*natom
1446
1447 CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
1448 CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
1449
1450 j_ri = (ind(2) - 1)/natom + 1
1451 j_img = ri_data%RI_cell_to_img(j_ri)
1452 jatom = ind(2) - (j_ri - 1)*natom
1453
1454 CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1455 CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1456
1457 IF (.NOT. my_debump) THEN
1458 IF (my_left) blk(:, :) = blk(:, :)*bump(norm2(ri - rref), r0, r1)
1459 IF (my_right) blk(:, :) = blk(:, :)*bump(norm2(rj - rref), r0, r1)
1460 ELSE
1461 !Note: by construction, the bump function is never quite zero, as its range is the same
1462 ! as that of the extended RI basis (but we are safe)
1463 bval = bump(norm2(ri - rref), r0, r1)
1464 IF (my_left .AND. bval > epsilon(1.0_dp)) blk(:, :) = blk(:, :)/bval
1465 bval = bump(norm2(rj - rref), r0, r1)
1466 IF (my_right .AND. bval > epsilon(1.0_dp)) blk(:, :) = blk(:, :)/bval
1467 END IF
1468
1469 CALL dbt_put_block(t_2c_inout, ind, shape(blk), blk)
1470
1471 DEALLOCATE (blk)
1472 END DO
1473 CALL dbt_iterator_stop(iter)
1474!$OMP END PARALLEL
1475 CALL dbt_filter(t_2c_inout, ri_data%filter_eps)
1476
1477 END SUBROUTINE apply_bump
1478
1479! **************************************************************************************************
1480!> \brief A routine that calculates the forces due to the derivative of the bump function
1481!> \param force ...
1482!> \param t_2c_in ...
1483!> \param atom_i ...
1484!> \param atom_of_kind ...
1485!> \param kind_of ...
1486!> \param pref ...
1487!> \param ri_data ...
1488!> \param qs_env ...
1489!> \param work_virial ...
1490! **************************************************************************************************
1491 SUBROUTINE get_2c_bump_forces(force, t_2c_in, atom_i, atom_of_kind, kind_of, pref, ri_data, &
1492 qs_env, work_virial)
1493 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1494 TYPE(dbt_type), INTENT(INOUT) :: t_2c_in
1495 INTEGER, INTENT(IN) :: atom_i
1496 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
1497 REAL(dp), INTENT(IN) :: pref
1498 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1499 TYPE(qs_environment_type), POINTER :: qs_env
1500 REAL(dp), DIMENSION(3, 3), INTENT(INOUT) :: work_virial
1501
1502 INTEGER :: i, i_img, i_ri, i_xyz, iat_of_kind, iatom, ikind, ind(2), j_img, j_ri, j_xyz, &
1503 jat_of_kind, jatom, jkind, natom, nblks(2), nimg, nkind
1504 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1505 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1506 LOGICAL :: found
1507 REAL(dp) :: new_force, r0, r1, ri(3), rj(3), &
1508 rref(3), scoord(3), x
1509 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1510 TYPE(cell_type), POINTER :: cell
1511 TYPE(dbt_iterator_type) :: iter
1512 TYPE(kpoint_type), POINTER :: kpoints
1513 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1514 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1515
1516 NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1517
1518 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1519 kpoints=kpoints, particle_set=particle_set)
1520 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1521
1522 CALL dbt_get_info(t_2c_in, nblks_total=nblks)
1523 cpassert(nblks(1) == ri_data%ncell_RI*natom)
1524 cpassert(nblks(2) == ri_data%ncell_RI*natom)
1525
1526 nimg = ri_data%nimg
1527
1528 !Loop over the RI cells and atoms, and apply bump accordingly
1529 r1 = ri_data%kp_RI_range
1530 r0 = ri_data%kp_bump_rad
1531 rref = pbc(particle_set(atom_i)%r, cell)
1532
1533 iat_of_kind = atom_of_kind(atom_i)
1534 ikind = kind_of(atom_i)
1535
1536!$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_in,natom,ri_data,cell,particle_set,index_to_cell,pref, &
1537!$OMP force,r0,r1,rref,atom_of_kind,kind_of,iat_of_kind,ikind,work_virial) &
1538!$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,jkind,jat_of_kind, &
1539!$OMP new_force,i_xyz,i,x,j_xyz)
1540 CALL dbt_iterator_start(iter, t_2c_in)
1541 DO WHILE (dbt_iterator_blocks_left(iter))
1542 CALL dbt_iterator_next_block(iter, ind)
1543 IF (ind(1) .NE. ind(2)) cycle !bump matrix is diagonal
1544
1545 CALL dbt_get_block(t_2c_in, ind, blk, found)
1546 IF (.NOT. found) cycle
1547
1548 !bump is a function of x = SQRT((R - Rref)^2). We refer to R as jatom, and Rref as atom_i
1549 j_ri = (ind(2) - 1)/natom + 1
1550 j_img = ri_data%RI_cell_to_img(j_ri)
1551 jatom = ind(2) - (j_ri - 1)*natom
1552 jat_of_kind = atom_of_kind(jatom)
1553 jkind = kind_of(jatom)
1554
1555 CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1556 CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1557 x = norm2(rj - rref)
1558 IF (x < r0 .OR. x > r1) cycle
1559
1560 new_force = 0.0_dp
1561 DO i = 1, SIZE(blk, 1)
1562 new_force = new_force + blk(i, i)
1563 END DO
1564 new_force = pref*new_force*dbump(x, r0, r1)
1565
1566 !x = SQRT((R - Rref)^2), so we multiply by dx/dR and dx/dRref
1567 DO i_xyz = 1, 3
1568 !Force acting on second atom
1569!$OMP ATOMIC
1570 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) + &
1571 new_force*(rj(i_xyz) - rref(i_xyz))/x
1572
1573 !virial acting on second atom
1574 CALL real_to_scaled(scoord, rj, cell)
1575 DO j_xyz = 1, 3
1576!$OMP ATOMIC
1577 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1578 + new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1579 END DO
1580
1581 !Force acting on reference atom, defining the RI basis
1582!$OMP ATOMIC
1583 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) - &
1584 new_force*(rj(i_xyz) - rref(i_xyz))/x
1585
1586 !virial of ref atom
1587 CALL real_to_scaled(scoord, rref, cell)
1588 DO j_xyz = 1, 3
1589!$OMP ATOMIC
1590 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1591 - new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1592 END DO
1593 END DO !i_xyz
1594
1595 DEALLOCATE (blk)
1596 END DO
1597 CALL dbt_iterator_stop(iter)
1598!$OMP END PARALLEL
1599
1600 END SUBROUTINE get_2c_bump_forces
1601
1602! **************************************************************************************************
1603!> \brief The bumb function as defined by Juerg
1604!> \param x ...
1605!> \param r0 ...
1606!> \param r1 ...
1607!> \return ...
1608! **************************************************************************************************
1609 FUNCTION bump(x, r0, r1) RESULT(b)
1610 REAL(dp), INTENT(IN) :: x, r0, r1
1611 REAL(dp) :: b
1612
1613 REAL(dp) :: r
1614
1615 !Head-Gordon
1616 !b = 1.0_dp/(1.0_dp+EXP((r1-r0)/(r1-x)-(r1-r0)/(x-r0)))
1617 !Juerg
1618 r = (x - r0)/(r1 - r0)
1619 b = -6.0_dp*r**5 + 15.0_dp*r**4 - 10.0_dp*r**3 + 1.0_dp
1620 IF (x .GE. r1) b = 0.0_dp
1621 IF (x .LE. r0) b = 1.0_dp
1622
1623 END FUNCTION bump
1624
1625! **************************************************************************************************
1626!> \brief The derivative of the bump function
1627!> \param x ...
1628!> \param r0 ...
1629!> \param r1 ...
1630!> \return ...
1631! **************************************************************************************************
1632 FUNCTION dbump(x, r0, r1) RESULT(b)
1633 REAL(dp), INTENT(IN) :: x, r0, r1
1634 REAL(dp) :: b
1635
1636 REAL(dp) :: r
1637
1638 r = (x - r0)/(r1 - r0)
1639 b = (-30.0_dp*r**4 + 60.0_dp*r**3 - 30.0_dp*r**2)/(r1 - r0)
1640 IF (x .GE. r1) b = 0.0_dp
1641 IF (x .LE. r0) b = 0.0_dp
1642
1643 END FUNCTION dbump
1644
1645! **************************************************************************************************
1646!> \brief return the cell index a+c corresponding to given cell index i and b, with i = a+c-b
1647!> \param i_index ...
1648!> \param b_index ...
1649!> \param qs_env ...
1650!> \return ...
1651! **************************************************************************************************
1652 FUNCTION get_apc_index_from_ib(i_index, b_index, qs_env) RESULT(apc_index)
1653 INTEGER, INTENT(IN) :: i_index, b_index
1654 TYPE(qs_environment_type), POINTER :: qs_env
1655 INTEGER :: apc_index
1656
1657 INTEGER, DIMENSION(3) :: cell_apc
1658 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1659 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1660 TYPE(kpoint_type), POINTER :: kpoints
1661
1662 CALL get_qs_env(qs_env, kpoints=kpoints)
1663 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1664
1665 !i = a+c-b => a+c = i+b
1666 cell_apc(:) = index_to_cell(:, i_index) + index_to_cell(:, b_index)
1667
1668 IF (any([cell_apc(1), cell_apc(2), cell_apc(3)] < lbound(cell_to_index)) .OR. &
1669 any([cell_apc(1), cell_apc(2), cell_apc(3)] > ubound(cell_to_index))) THEN
1670
1671 apc_index = 0
1672 ELSE
1673 apc_index = cell_to_index(cell_apc(1), cell_apc(2), cell_apc(3))
1674 END IF
1675
1676 END FUNCTION get_apc_index_from_ib
1677
1678! **************************************************************************************************
1679!> \brief return the cell index i corresponding to the summ of cell_a and cell_c
1680!> \param a_index ...
1681!> \param c_index ...
1682!> \param qs_env ...
1683!> \return ...
1684! **************************************************************************************************
1685 FUNCTION get_apc_index(a_index, c_index, qs_env) RESULT(i_index)
1686 INTEGER, INTENT(IN) :: a_index, c_index
1687 TYPE(qs_environment_type), POINTER :: qs_env
1688 INTEGER :: i_index
1689
1690 INTEGER, DIMENSION(3) :: cell_i
1691 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1692 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1693 TYPE(kpoint_type), POINTER :: kpoints
1694
1695 CALL get_qs_env(qs_env, kpoints=kpoints)
1696 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1697
1698 cell_i(:) = index_to_cell(:, a_index) + index_to_cell(:, c_index)
1699
1700 IF (any([cell_i(1), cell_i(2), cell_i(3)] < lbound(cell_to_index)) .OR. &
1701 any([cell_i(1), cell_i(2), cell_i(3)] > ubound(cell_to_index))) THEN
1702
1703 i_index = 0
1704 ELSE
1705 i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1706 END IF
1707
1708 END FUNCTION get_apc_index
1709
1710! **************************************************************************************************
1711!> \brief return the cell index i corresponding to the summ of cell_a + cell_c - cell_b
1712!> \param apc_index ...
1713!> \param b_index ...
1714!> \param qs_env ...
1715!> \return ...
1716! **************************************************************************************************
1717 FUNCTION get_i_index(apc_index, b_index, qs_env) RESULT(i_index)
1718 INTEGER, INTENT(IN) :: apc_index, b_index
1719 TYPE(qs_environment_type), POINTER :: qs_env
1720 INTEGER :: i_index
1721
1722 INTEGER, DIMENSION(3) :: cell_i
1723 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1724 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1725 TYPE(kpoint_type), POINTER :: kpoints
1726
1727 CALL get_qs_env(qs_env, kpoints=kpoints)
1728 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1729
1730 cell_i(:) = index_to_cell(:, apc_index) - index_to_cell(:, b_index)
1731
1732 IF (any([cell_i(1), cell_i(2), cell_i(3)] < lbound(cell_to_index)) .OR. &
1733 any([cell_i(1), cell_i(2), cell_i(3)] > ubound(cell_to_index))) THEN
1734
1735 i_index = 0
1736 ELSE
1737 i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1738 END IF
1739
1740 END FUNCTION get_i_index
1741
1742! **************************************************************************************************
1743!> \brief A routine that returns all allowed a,c pairs such that a+c images corresponds to the value
1744!> of the apc_index input. Takes into account that image a corresponds to 3c integrals, which
1745!> are ordered in their own way
1746!> \param ac_pairs ...
1747!> \param apc_index ...
1748!> \param ri_data ...
1749!> \param qs_env ...
1750! **************************************************************************************************
1751 SUBROUTINE get_ac_pairs(ac_pairs, apc_index, ri_data, qs_env)
1752 INTEGER, DIMENSION(:, :), INTENT(INOUT) :: ac_pairs
1753 INTEGER, INTENT(IN) :: apc_index
1754 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1755 TYPE(qs_environment_type), POINTER :: qs_env
1756
1757 INTEGER :: a_index, actual_img, c_index, nimg
1758
1759 nimg = SIZE(ac_pairs, 1)
1760
1761 ac_pairs(:, :) = 0
1762!$OMP PARALLEL DO DEFAULT(NONE) SHARED(ac_pairs,nimg,ri_data,qs_env,apc_index) &
1763!$OMP PRIVATE(a_index,actual_img,c_index)
1764 DO a_index = 1, nimg
1765 actual_img = ri_data%idx_to_img(a_index)
1766 !c = a+c - a
1767 c_index = get_i_index(apc_index, actual_img, qs_env)
1768 ac_pairs(a_index, 1) = a_index
1769 ac_pairs(a_index, 2) = c_index
1770 END DO
1771!$OMP END PARALLEL DO
1772
1773 END SUBROUTINE get_ac_pairs
1774
1775! **************************************************************************************************
1776!> \brief A routine that returns all allowed i,a+c pairs such that, for the given value of b, we have
1777!> i = a+c-b. Takes into account that image i corrsponds to the 3c ints, which are ordered in
1778!> their own way
1779!> \param iapc_pairs ...
1780!> \param b_index ...
1781!> \param ri_data ...
1782!> \param qs_env ...
1783!> \param actual_i_img ...
1784! **************************************************************************************************
1785 SUBROUTINE get_iapc_pairs(iapc_pairs, b_index, ri_data, qs_env, actual_i_img)
1786 INTEGER, DIMENSION(:, :), INTENT(INOUT) :: iapc_pairs
1787 INTEGER, INTENT(IN) :: b_index
1788 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1789 TYPE(qs_environment_type), POINTER :: qs_env
1790 INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: actual_i_img
1791
1792 INTEGER :: actual_img, apc_index, i_index, nimg
1793
1794 nimg = SIZE(iapc_pairs, 1)
1795 IF (PRESENT(actual_i_img)) actual_i_img(:) = 0
1796
1797 iapc_pairs(:, :) = 0
1798!$OMP PARALLEL DO DEFAULT(NONE) SHARED(iapc_pairs,nimg,ri_data,qs_env,b_index,actual_i_img) &
1799!$OMP PRIVATE(i_index,actual_img,apc_index)
1800 DO i_index = 1, nimg
1801 actual_img = ri_data%idx_to_img(i_index)
1802 apc_index = get_apc_index_from_ib(actual_img, b_index, qs_env)
1803 IF (apc_index == 0) cycle
1804 iapc_pairs(i_index, 1) = i_index
1805 iapc_pairs(i_index, 2) = apc_index
1806 IF (PRESENT(actual_i_img)) actual_i_img(i_index) = actual_img
1807 END DO
1808
1809 END SUBROUTINE get_iapc_pairs
1810
1811! **************************************************************************************************
1812!> \brief A function that, given a cell index a, returun the index corresponding to -a, and zero if
1813!> if out of bounds
1814!> \param a_index ...
1815!> \param qs_env ...
1816!> \return ...
1817! **************************************************************************************************
1818 FUNCTION get_opp_index(a_index, qs_env) RESULT(opp_index)
1819 INTEGER, INTENT(IN) :: a_index
1820 TYPE(qs_environment_type), POINTER :: qs_env
1821 INTEGER :: opp_index
1822
1823 INTEGER, DIMENSION(3) :: opp_cell
1824 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1825 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1826 TYPE(kpoint_type), POINTER :: kpoints
1827
1828 NULLIFY (kpoints, cell_to_index, index_to_cell)
1829
1830 CALL get_qs_env(qs_env, kpoints=kpoints)
1831 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1832
1833 opp_cell(:) = -index_to_cell(:, a_index)
1834
1835 IF (any([opp_cell(1), opp_cell(2), opp_cell(3)] < lbound(cell_to_index)) .OR. &
1836 any([opp_cell(1), opp_cell(2), opp_cell(3)] > ubound(cell_to_index))) THEN
1837
1838 opp_index = 0
1839 ELSE
1840 opp_index = cell_to_index(opp_cell(1), opp_cell(2), opp_cell(3))
1841 END IF
1842
1843 END FUNCTION get_opp_index
1844
1845! **************************************************************************************************
1846!> \brief A routine that returns the actual non-symemtric density matrix for each image, by Fourier
1847!> transforming the kpoint density matrix
1848!> \param rho_ao_t ...
1849!> \param rho_ao ...
1850!> \param scale_prev_p ...
1851!> \param ri_data ...
1852!> \param qs_env ...
1853! **************************************************************************************************
1854 SUBROUTINE get_pmat_images(rho_ao_t, rho_ao, scale_prev_p, ri_data, qs_env)
1855 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t
1856 TYPE(dbcsr_p_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao
1857 REAL(dp), INTENT(IN) :: scale_prev_p
1858 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1859 TYPE(qs_environment_type), POINTER :: qs_env
1860
1861 INTEGER :: cell_j(3), i_img, i_spin, iatom, icol, &
1862 irow, j_img, jatom, mi_img, mj_img, &
1863 nimg, nspins
1864 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1865 LOGICAL :: found
1866 REAL(dp) :: fac
1867 REAL(dp), DIMENSION(:, :), POINTER :: pblock, pblock_desymm
1868 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: matrix_ks, rho_desymm
1869 TYPE(dbt_type) :: tmp
1870 TYPE(dft_control_type), POINTER :: dft_control
1871 TYPE(kpoint_type), POINTER :: kpoints
1873 DIMENSION(:), POINTER :: nl_iterator
1874 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1875 POINTER :: sab_nl, sab_nl_nosym
1876 TYPE(qs_scf_env_type), POINTER :: scf_env
1877
1878 NULLIFY (rho_desymm, kpoints, sab_nl_nosym, scf_env, matrix_ks, dft_control, &
1879 sab_nl, nl_iterator, cell_to_index, pblock, pblock_desymm)
1880
1881 CALL get_qs_env(qs_env, kpoints=kpoints, scf_env=scf_env, matrix_ks_kp=matrix_ks, dft_control=dft_control)
1882 CALL get_kpoint_info(kpoints, sab_nl_nosym=sab_nl_nosym, cell_to_index=cell_to_index, sab_nl=sab_nl)
1883
1884 IF (dft_control%do_admm) THEN
1885 CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_kp=matrix_ks)
1886 END IF
1887
1888 nspins = SIZE(matrix_ks, 1)
1889 nimg = ri_data%nimg
1890
1891 ALLOCATE (rho_desymm(nspins, nimg))
1892 DO i_img = 1, nimg
1893 DO i_spin = 1, nspins
1894 ALLOCATE (rho_desymm(i_spin, i_img)%matrix)
1895 CALL dbcsr_create(rho_desymm(i_spin, i_img)%matrix, template=matrix_ks(i_spin, i_img)%matrix, &
1896 matrix_type=dbcsr_type_no_symmetry)
1897 CALL cp_dbcsr_alloc_block_from_nbl(rho_desymm(i_spin, i_img)%matrix, sab_nl_nosym)
1898 END DO
1899 END DO
1900 CALL dbt_create(rho_desymm(1, 1)%matrix, tmp)
1901
1902 !We transfor the symmtric typed (but not actually symmetric: P_ab^i = P_ba^-i) real-spaced density
1903 !matrix into proper non-symemtric ones (using the same nl for consistency)
1904 CALL neighbor_list_iterator_create(nl_iterator, sab_nl)
1905 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
1906 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
1907 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
1908 IF (j_img > nimg .OR. j_img < 1) cycle
1909
1910 fac = 1.0_dp
1911 IF (iatom == jatom) fac = 0.5_dp
1912 mj_img = get_opp_index(j_img, qs_env)
1913 !if no opposite image, then no sum of P^j + P^-j => need full diag
1914 IF (mj_img == 0) fac = 1.0_dp
1915
1916 irow = iatom
1917 icol = jatom
1918 IF (iatom > jatom) THEN
1919 !because symmetric nl. Value for atom pair i,j is actually stored in j,i if i > j
1920 irow = jatom
1921 icol = iatom
1922 END IF
1923
1924 DO i_spin = 1, nspins
1925 CALL dbcsr_get_block_p(rho_ao(i_spin, j_img)%matrix, irow, icol, pblock, found)
1926 IF (.NOT. found) cycle
1927
1928 !distribution of symm and non-symm matrix match in that way
1929 CALL dbcsr_get_block_p(rho_desymm(i_spin, j_img)%matrix, iatom, jatom, pblock_desymm, found)
1930 IF (.NOT. found) cycle
1931
1932 IF (iatom > jatom) THEN
1933 pblock_desymm(:, :) = fac*transpose(pblock(:, :))
1934 ELSE
1935 pblock_desymm(:, :) = fac*pblock(:, :)
1936 END IF
1937 END DO
1938 END DO
1939 CALL neighbor_list_iterator_release(nl_iterator)
1940
1941 DO i_img = 1, nimg
1942 DO i_spin = 1, nspins
1943 CALL dbt_scale(rho_ao_t(i_spin, i_img), scale_prev_p)
1944
1945 CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, i_img)%matrix, tmp)
1946 CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), summation=.true., move_data=.true.)
1947
1948 !symmetrize by addin transpose of opp img
1949 mi_img = get_opp_index(i_img, qs_env)
1950 IF (mi_img > 0 .AND. mi_img .LE. nimg) THEN
1951 CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, mi_img)%matrix, tmp)
1952 CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), order=[2, 1], summation=.true., move_data=.true.)
1953 END IF
1954 CALL dbt_filter(rho_ao_t(i_spin, i_img), ri_data%filter_eps)
1955 END DO
1956 END DO
1957
1958 DO i_img = 1, nimg
1959 DO i_spin = 1, nspins
1960 CALL dbcsr_release(rho_desymm(i_spin, i_img)%matrix)
1961 DEALLOCATE (rho_desymm(i_spin, i_img)%matrix)
1962 END DO
1963 END DO
1964
1965 CALL dbt_destroy(tmp)
1966 DEALLOCATE (rho_desymm)
1967
1968 END SUBROUTINE get_pmat_images
1969
1970! **************************************************************************************************
1971!> \brief A routine that, given a cell index b and atom indices ij, returns a 2c tensor with the HFX
1972!> potential (P_i^0|Q_j^b), within the extended RI basis
1973!> \param t_2c_pot ...
1974!> \param mat_orig ...
1975!> \param atom_i ...
1976!> \param atom_j ...
1977!> \param img_b ...
1978!> \param ri_data ...
1979!> \param qs_env ...
1980!> \param do_inverse ...
1981!> \param para_env_ext ...
1982!> \param blacs_env_ext ...
1983!> \param dbcsr_template ...
1984!> \param off_diagonal ...
1985!> \param skip_inverse ...
1986! **************************************************************************************************
1987 SUBROUTINE get_ext_2c_int(t_2c_pot, mat_orig, atom_i, atom_j, img_b, ri_data, qs_env, do_inverse, &
1988 para_env_ext, blacs_env_ext, dbcsr_template, off_diagonal, skip_inverse)
1989 TYPE(dbt_type), INTENT(INOUT) :: t_2c_pot
1990 TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_orig
1991 INTEGER, INTENT(IN) :: atom_i, atom_j, img_b
1992 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1993 TYPE(qs_environment_type), POINTER :: qs_env
1994 LOGICAL, INTENT(IN), OPTIONAL :: do_inverse
1995 TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env_ext
1996 TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: blacs_env_ext
1997 TYPE(dbcsr_type), OPTIONAL, POINTER :: dbcsr_template
1998 LOGICAL, INTENT(IN), OPTIONAL :: off_diagonal, skip_inverse
1999
2000 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_ext_2c_int'
2001
2002 INTEGER :: blk, group, handle, handle2, i_img, i_ri, iatom, iblk, ikind, img_tot, j_img, &
2003 j_ri, jatom, jblk, jkind, n_dependent, natom, nblks_ri, nimg, nkind
2004 INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2
2005 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: present_atoms_i, present_atoms_j
2006 INTEGER, DIMENSION(3) :: cell_b, cell_i, cell_j, cell_tot
2007 INTEGER, DIMENSION(:), POINTER :: col_dist, col_dist_ext, ri_blk_size_ext, &
2008 row_dist, row_dist_ext
2009 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell, pgrid
2010 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
2011 LOGICAL :: do_inverse_prv, found, my_offd, &
2012 skip_inverse_prv, use_template
2013 REAL(dp) :: bfac, dij, r0, r1, threshold
2014 REAL(dp), DIMENSION(3) :: ri, rij, rj, rref, scoord
2015 REAL(dp), DIMENSION(:, :), POINTER :: pblock
2016 TYPE(cell_type), POINTER :: cell
2017 TYPE(cp_blacs_env_type), POINTER :: blacs_env
2018 TYPE(dbcsr_distribution_type) :: dbcsr_dist, dbcsr_dist_ext
2019 TYPE(dbcsr_iterator_type) :: dbcsr_iter
2020 TYPE(dbcsr_type) :: work, work_tight, work_tight_inv
2021 TYPE(dbt_type) :: t_2c_tmp
2022 TYPE(distribution_2d_type), POINTER :: dist_2d
2023 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2024 DIMENSION(:), TARGET :: basis_set_ri
2025 TYPE(kpoint_type), POINTER :: kpoints
2026 TYPE(mp_para_env_type), POINTER :: para_env
2028 DIMENSION(:), POINTER :: nl_iterator
2029 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2030 POINTER :: nl_2c
2031 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2032 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2033
2034 NULLIFY (qs_kind_set, nl_2c, nl_iterator, cell, kpoints, cell_to_index, index_to_cell, dist_2d, &
2035 para_env, pblock, blacs_env, particle_set, col_dist, row_dist, pgrid, &
2036 col_dist_ext, row_dist_ext)
2037
2038 CALL timeset(routinen, handle)
2039
2040 !Idea: run over the neighbor list once for i and once for j, and record in which cell the MIC
2041 ! atoms are. Then loop over the atoms and only take the pairs the we need
2042
2043 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
2044 kpoints=kpoints, para_env=para_env, blacs_env=blacs_env, particle_set=particle_set)
2045 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
2046
2047 do_inverse_prv = .false.
2048 IF (PRESENT(do_inverse)) do_inverse_prv = do_inverse
2049 IF (do_inverse_prv) THEN
2050 cpassert(atom_i == atom_j)
2051 END IF
2052
2053 skip_inverse_prv = .false.
2054 IF (PRESENT(skip_inverse)) skip_inverse_prv = skip_inverse
2055
2056 my_offd = .false.
2057 IF (PRESENT(off_diagonal)) my_offd = off_diagonal
2058
2059 IF (PRESENT(para_env_ext)) para_env => para_env_ext
2060 IF (PRESENT(blacs_env_ext)) blacs_env => blacs_env_ext
2061
2062 nimg = SIZE(mat_orig)
2063
2064 CALL timeset(routinen//"_nl_iter", handle2)
2065
2066 !create our own dist_2d in the subgroup
2067 ALLOCATE (dist1(natom), dist2(natom))
2068 DO iatom = 1, natom
2069 dist1(iatom) = mod(iatom, blacs_env%num_pe(1))
2070 dist2(iatom) = mod(iatom, blacs_env%num_pe(2))
2071 END DO
2072 CALL distribution_2d_create(dist_2d, dist1, dist2, nkind, particle_set, blacs_env_ext=blacs_env)
2073
2074 ALLOCATE (basis_set_ri(nkind))
2075 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
2076
2077 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
2078 "HFX_2c_nl_RI", qs_env, sym_ij=.false., dist_2d=dist_2d)
2079
2080 ALLOCATE (present_atoms_i(natom, nimg), present_atoms_j(natom, nimg))
2081 present_atoms_i = 0
2082 present_atoms_j = 0
2083
2084 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
2085 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
2086 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, r=rij, cell=cell_j, &
2087 ikind=ikind, jkind=jkind)
2088
2089 dij = norm2(rij)
2090
2091 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
2092 IF (j_img > nimg .OR. j_img < 1) cycle
2093
2094 IF (iatom == atom_i .AND. dij .LE. ri_data%kp_RI_range) present_atoms_i(jatom, j_img) = 1
2095 IF (iatom == atom_j .AND. dij .LE. ri_data%kp_RI_range) present_atoms_j(jatom, j_img) = 1
2096 END DO
2097 CALL neighbor_list_iterator_release(nl_iterator)
2098 CALL release_neighbor_list_sets(nl_2c)
2099 CALL distribution_2d_release(dist_2d)
2100 CALL timestop(handle2)
2101
2102 CALL para_env%sum(present_atoms_i)
2103 CALL para_env%sum(present_atoms_j)
2104
2105 !Need to build a work matrix with matching distribution to mat_orig
2106 !If template is provided, use it. If not, we create it.
2107 use_template = .false.
2108 IF (PRESENT(dbcsr_template)) THEN
2109 IF (ASSOCIATED(dbcsr_template)) use_template = .true.
2110 END IF
2111
2112 IF (use_template) THEN
2113 CALL dbcsr_create(work, template=dbcsr_template)
2114 ELSE
2115 CALL dbcsr_get_info(mat_orig(1), distribution=dbcsr_dist)
2116 CALL dbcsr_distribution_get(dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, pgrid=pgrid)
2117 ALLOCATE (row_dist_ext(ri_data%ncell_RI*natom), col_dist_ext(ri_data%ncell_RI*natom))
2118 ALLOCATE (ri_blk_size_ext(ri_data%ncell_RI*natom))
2119 DO i_ri = 1, ri_data%ncell_RI
2120 row_dist_ext((i_ri - 1)*natom + 1:i_ri*natom) = row_dist(:)
2121 col_dist_ext((i_ri - 1)*natom + 1:i_ri*natom) = col_dist(:)
2122 ri_blk_size_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
2123 END DO
2124
2125 CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2126 row_dist=row_dist_ext, col_dist=col_dist_ext)
2127 CALL dbcsr_create(work, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2128 row_blk_size=ri_blk_size_ext, col_blk_size=ri_blk_size_ext)
2129 CALL dbcsr_distribution_release(dbcsr_dist_ext)
2130 DEALLOCATE (col_dist_ext, row_dist_ext, ri_blk_size_ext)
2131
2132 IF (PRESENT(dbcsr_template)) THEN
2133 ALLOCATE (dbcsr_template)
2134 CALL dbcsr_create(dbcsr_template, template=work)
2135 END IF
2136 END IF !use_template
2137
2138 cell_b(:) = index_to_cell(:, img_b)
2139 DO i_img = 1, nimg
2140 i_ri = ri_data%img_to_RI_cell(i_img)
2141 IF (i_ri == 0) cycle
2142 cell_i(:) = index_to_cell(:, i_img)
2143 DO j_img = 1, nimg
2144 j_ri = ri_data%img_to_RI_cell(j_img)
2145 IF (j_ri == 0) cycle
2146 cell_j(:) = index_to_cell(:, j_img)
2147 cell_tot = cell_j - cell_i + cell_b
2148
2149 IF (any([cell_tot(1), cell_tot(2), cell_tot(3)] < lbound(cell_to_index)) .OR. &
2150 any([cell_tot(1), cell_tot(2), cell_tot(3)] > ubound(cell_to_index))) cycle
2151 img_tot = cell_to_index(cell_tot(1), cell_tot(2), cell_tot(3))
2152 IF (img_tot > nimg .OR. img_tot < 1) cycle
2153
2154 CALL dbcsr_iterator_start(dbcsr_iter, mat_orig(img_tot))
2155 DO WHILE (dbcsr_iterator_blocks_left(dbcsr_iter))
2156 CALL dbcsr_iterator_next_block(dbcsr_iter, row=iatom, column=jatom, blk=blk)
2157 IF (present_atoms_i(iatom, i_img) == 0) cycle
2158 IF (present_atoms_j(jatom, j_img) == 0) cycle
2159 IF (my_offd .AND. (i_ri - 1)*natom + iatom == (j_ri - 1)*natom + jatom) cycle
2160
2161 CALL dbcsr_get_block_p(mat_orig(img_tot), iatom, jatom, pblock, found)
2162 IF (.NOT. found) cycle
2163
2164 CALL dbcsr_put_block(work, (i_ri - 1)*natom + iatom, (j_ri - 1)*natom + jatom, pblock)
2165
2166 END DO
2167 CALL dbcsr_iterator_stop(dbcsr_iter)
2168
2169 END DO !j_img
2170 END DO !i_img
2171 CALL dbcsr_finalize(work)
2172
2173 IF (do_inverse_prv) THEN
2174
2175 r1 = ri_data%kp_RI_range
2176 r0 = ri_data%kp_bump_rad
2177
2178 !Because there are a lot of empty rows/cols in work, we need to get rid of them for inversion
2179 nblks_ri = sum(present_atoms_i)
2180 ALLOCATE (col_dist_ext(nblks_ri), row_dist_ext(nblks_ri), ri_blk_size_ext(nblks_ri))
2181 iblk = 0
2182 DO i_img = 1, nimg
2183 i_ri = ri_data%img_to_RI_cell(i_img)
2184 IF (i_ri == 0) cycle
2185 DO iatom = 1, natom
2186 IF (present_atoms_i(iatom, i_img) == 0) cycle
2187 iblk = iblk + 1
2188 col_dist_ext(iblk) = col_dist(iatom)
2189 row_dist_ext(iblk) = row_dist(iatom)
2190 ri_blk_size_ext(iblk) = ri_data%bsizes_RI(iatom)
2191 END DO
2192 END DO
2193
2194 CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2195 row_dist=row_dist_ext, col_dist=col_dist_ext)
2196 CALL dbcsr_create(work_tight, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2197 row_blk_size=ri_blk_size_ext, col_blk_size=ri_blk_size_ext)
2198 CALL dbcsr_create(work_tight_inv, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2199 row_blk_size=ri_blk_size_ext, col_blk_size=ri_blk_size_ext)
2200 CALL dbcsr_distribution_release(dbcsr_dist_ext)
2201 DEALLOCATE (col_dist_ext, row_dist_ext, ri_blk_size_ext)
2202
2203 !We apply a bump function to the RI metric inverse for smooth RI basis extension:
2204 ! S^-1 = B * ((P|Q)_D + B*(P|Q)_OD*B)^-1 * B, with D block-diagonal blocks and OD off-diagonal
2205 rref = pbc(particle_set(atom_i)%r, cell)
2206
2207 iblk = 0
2208 DO i_img = 1, nimg
2209 i_ri = ri_data%img_to_RI_cell(i_img)
2210 IF (i_ri == 0) cycle
2211 DO iatom = 1, natom
2212 IF (present_atoms_i(iatom, i_img) == 0) cycle
2213 iblk = iblk + 1
2214
2215 CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
2216 CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
2217
2218 jblk = 0
2219 DO j_img = 1, nimg
2220 j_ri = ri_data%img_to_RI_cell(j_img)
2221 IF (j_ri == 0) cycle
2222 DO jatom = 1, natom
2223 IF (present_atoms_j(jatom, j_img) == 0) cycle
2224 jblk = jblk + 1
2225
2226 CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
2227 CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
2228
2229 CALL dbcsr_get_block_p(work, (i_ri - 1)*natom + iatom, (j_ri - 1)*natom + jatom, pblock, found)
2230 IF (.NOT. found) cycle
2231
2232 bfac = 1.0_dp
2233 IF (iblk .NE. jblk) bfac = bump(norm2(ri - rref), r0, r1)*bump(norm2(rj - rref), r0, r1)
2234 CALL dbcsr_put_block(work_tight, iblk, jblk, bfac*pblock(:, :))
2235 END DO
2236 END DO
2237 END DO
2238 END DO
2239 CALL dbcsr_finalize(work_tight)
2240 CALL dbcsr_clear(work)
2241
2242 IF (.NOT. skip_inverse_prv) THEN
2243 SELECT CASE (ri_data%t2c_method)
2244 CASE (hfx_ri_do_2c_iter)
2245 threshold = max(ri_data%filter_eps, 1.0e-12_dp)
2246 CALL invert_hotelling(work_tight_inv, work_tight, threshold=threshold, silent=.false.)
2248 CALL dbcsr_copy(work_tight_inv, work_tight)
2249 CALL cp_dbcsr_cholesky_decompose(work_tight_inv, para_env=para_env, blacs_env=blacs_env)
2250 CALL cp_dbcsr_cholesky_invert(work_tight_inv, para_env=para_env, blacs_env=blacs_env, &
2251 upper_to_full=.true.)
2252 CASE (hfx_ri_do_2c_diag)
2253 CALL dbcsr_copy(work_tight_inv, work_tight)
2254 CALL cp_dbcsr_power(work_tight_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
2255 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
2256 END SELECT
2257 ELSE
2258 CALL dbcsr_copy(work_tight_inv, work_tight)
2259 END IF
2260
2261 !move back data to standard extended RI pattern
2262 !Note: we apply the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 later, because this matrix
2263 ! is required for forces
2264 iblk = 0
2265 DO i_img = 1, nimg
2266 i_ri = ri_data%img_to_RI_cell(i_img)
2267 IF (i_ri == 0) cycle
2268 DO iatom = 1, natom
2269 IF (present_atoms_i(iatom, i_img) == 0) cycle
2270 iblk = iblk + 1
2271
2272 jblk = 0
2273 DO j_img = 1, nimg
2274 j_ri = ri_data%img_to_RI_cell(j_img)
2275 IF (j_ri == 0) cycle
2276 DO jatom = 1, natom
2277 IF (present_atoms_j(jatom, j_img) == 0) cycle
2278 jblk = jblk + 1
2279
2280 CALL dbcsr_get_block_p(work_tight_inv, iblk, jblk, pblock, found)
2281 IF (.NOT. found) cycle
2282
2283 CALL dbcsr_put_block(work, (i_ri - 1)*natom + iatom, (j_ri - 1)*natom + jatom, pblock)
2284 END DO
2285 END DO
2286 END DO
2287 END DO
2288 CALL dbcsr_finalize(work)
2289
2290 CALL dbcsr_release(work_tight)
2291 CALL dbcsr_release(work_tight_inv)
2292 END IF
2293
2294 CALL dbt_create(work, t_2c_tmp)
2295 CALL dbt_copy_matrix_to_tensor(work, t_2c_tmp)
2296 CALL dbt_copy(t_2c_tmp, t_2c_pot, move_data=.true.)
2297 CALL dbt_filter(t_2c_pot, ri_data%filter_eps)
2298
2299 CALL dbt_destroy(t_2c_tmp)
2300 CALL dbcsr_release(work)
2301
2302 CALL timestop(handle)
2303
2304 END SUBROUTINE get_ext_2c_int
2305
2306! **************************************************************************************************
2307!> \brief Pre-contract the density matrices with the 3-center integrals:
2308!> P_sigma^a,lambda^a+c (mu^0 sigma^a| P^0)
2309!> \param t_3c_apc ...
2310!> \param rho_ao_t ...
2311!> \param ri_data ...
2312!> \param qs_env ...
2313! **************************************************************************************************
2314 SUBROUTINE contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
2315 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, rho_ao_t
2316 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2317 TYPE(qs_environment_type), POINTER :: qs_env
2318
2319 CHARACTER(len=*), PARAMETER :: routinen = 'contract_pmat_3c'
2320
2321 INTEGER :: apc_img, batch_size, handle, i_batch, &
2322 i_img, i_spin, j_batch, n_batch_img, &
2323 n_batch_nze, nimg, nimg_nze, nspins
2324 INTEGER(int_8) :: nflop, nze
2325 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_img, batch_ranges_nze, &
2326 int_indices
2327 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ac_pairs
2328 REAL(dp) :: occ, t1, t2
2329 TYPE(dbt_type) :: t_3c_tmp
2330 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ints_stack, res_stack, rho_stack
2331 TYPE(dft_control_type), POINTER :: dft_control
2332
2333 CALL timeset(routinen, handle)
2334
2335 CALL get_qs_env(qs_env, dft_control=dft_control)
2336
2337 nimg = ri_data%nimg
2338 nimg_nze = ri_data%nimg_nze
2339 nspins = dft_control%nspins
2340
2341 CALL dbt_create(t_3c_apc(1, 1), t_3c_tmp)
2342
2343 batch_size = nimg/ri_data%n_mem
2344
2345 !batching over all images
2346 n_batch_img = nimg/batch_size
2347 IF (modulo(nimg, batch_size) .NE. 0) n_batch_img = n_batch_img + 1
2348 ALLOCATE (batch_ranges_img(n_batch_img + 1))
2349 DO i_batch = 1, n_batch_img
2350 batch_ranges_img(i_batch) = (i_batch - 1)*batch_size + 1
2351 END DO
2352 batch_ranges_img(n_batch_img + 1) = nimg + 1
2353
2354 !batching over images with non-zero 3c integrals
2355 n_batch_nze = nimg_nze/batch_size
2356 IF (modulo(nimg_nze, batch_size) .NE. 0) n_batch_nze = n_batch_nze + 1
2357 ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
2358 DO i_batch = 1, n_batch_nze
2359 batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
2360 END DO
2361 batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
2362
2363 !Create the stack tensors in the approriate distribution
2364 ALLOCATE (rho_stack(2), ints_stack(2), res_stack(2))
2365 CALL get_stack_tensors(res_stack, rho_stack, ints_stack, rho_ao_t(1, 1), &
2366 ri_data%t_3c_int_ctr_1(1, 1), batch_size, ri_data, qs_env)
2367
2368 ALLOCATE (ac_pairs(nimg, 2), int_indices(nimg_nze))
2369 DO i_img = 1, nimg_nze
2370 int_indices(i_img) = i_img
2371 END DO
2372
2373 t1 = m_walltime()
2374 DO j_batch = 1, n_batch_nze
2375 !First batch is over the integrals. They are always in the same order, consistent with get_ac_pairs
2376 CALL fill_3c_stack(ints_stack(1), ri_data%t_3c_int_ctr_1(1, :), int_indices, 3, ri_data, &
2377 img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)])
2378 CALL dbt_copy(ints_stack(1), ints_stack(2), move_data=.true.)
2379
2380 DO i_spin = 1, nspins
2381 DO i_batch = 1, n_batch_img
2382 !Second batch is over the P matrix. Here we fill the stacked rho tensors col by col
2383 DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2384 CALL get_ac_pairs(ac_pairs, apc_img, ri_data, qs_env)
2385 CALL fill_2c_stack(rho_stack(1), rho_ao_t(i_spin, :), ac_pairs(:, 2), 1, ri_data, &
2386 img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)], &
2387 shift=apc_img - batch_ranges_img(i_batch) + 1)
2388
2389 END DO !apc_img
2390 CALL get_tensor_occupancy(rho_stack(1), nze, occ)
2391 IF (nze == 0) cycle
2392 CALL dbt_copy(rho_stack(1), rho_stack(2), move_data=.true.)
2393
2394 !The actual contraction
2395 CALL dbt_batched_contract_init(rho_stack(2))
2396 CALL dbt_contract(1.0_dp, ints_stack(2), rho_stack(2), &
2397 0.0_dp, res_stack(2), map_1=[1, 2], map_2=[3], &
2398 contract_1=[3], notcontract_1=[1, 2], &
2399 contract_2=[1], notcontract_2=[2], &
2400 filter_eps=ri_data%filter_eps, flop=nflop)
2401 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2402 CALL dbt_batched_contract_finalize(rho_stack(2))
2403 CALL dbt_copy(res_stack(2), res_stack(1), move_data=.true.)
2404
2405 DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2406 !Destack the resulting tensor and put it in t_3c_apc with correct apc_img
2407 CALL unstack_t_3c_apc(t_3c_tmp, res_stack(1), apc_img - batch_ranges_img(i_batch) + 1)
2408 CALL dbt_copy(t_3c_tmp, t_3c_apc(i_spin, apc_img), summation=.true., move_data=.true.)
2409 END DO
2410
2411 END DO !i_batch
2412 END DO !i_spin
2413 END DO !j_batch
2414 DEALLOCATE (batch_ranges_img)
2415 DEALLOCATE (batch_ranges_nze)
2416 t2 = m_walltime()
2417 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2418
2419 CALL dbt_destroy(rho_stack(1))
2420 CALL dbt_destroy(rho_stack(2))
2421 CALL dbt_destroy(ints_stack(1))
2422 CALL dbt_destroy(ints_stack(2))
2423 CALL dbt_destroy(res_stack(1))
2424 CALL dbt_destroy(res_stack(2))
2425 CALL dbt_destroy(t_3c_tmp)
2426
2427 CALL timestop(handle)
2428
2429 END SUBROUTINE contract_pmat_3c
2430
2431! **************************************************************************************************
2432!> \brief Pre-contract 3-center integrals with the bumped invrse RI metric, for each atom
2433!> \param t_3c_int ...
2434!> \param ri_data ...
2435!> \param qs_env ...
2436! **************************************************************************************************
2437 SUBROUTINE precontract_3c_ints(t_3c_int, ri_data, qs_env)
2438 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_int
2439 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2440 TYPE(qs_environment_type), POINTER :: qs_env
2441
2442 CHARACTER(len=*), PARAMETER :: routinen = 'precontract_3c_ints'
2443
2444 INTEGER :: batch_size, handle, i_batch, i_img, &
2445 i_ri, iatom, is, n_batch, natom, &
2446 nblks, nblks_3c(3), nimg
2447 INTEGER(int_8) :: nflop
2448 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges, bsizes_ri_ext, bsizes_ri_ext_split, &
2449 bsizes_stack, dist1, dist2, dist3, dist_stack3, idx_to_at_ao, int_indices
2450 TYPE(dbt_distribution_type) :: t_dist
2451 TYPE(dbt_type) :: t_2c_ri_tmp(2), t_3c_tmp(3)
2452
2453 CALL timeset(routinen, handle)
2454
2455 CALL get_qs_env(qs_env, natom=natom)
2456
2457 nimg = ri_data%nimg
2458 ALLOCATE (int_indices(nimg))
2459 DO i_img = 1, nimg
2460 int_indices(i_img) = i_img
2461 END DO
2462
2463 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
2464 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2465
2466 nblks = SIZE(ri_data%bsizes_RI_split)
2467 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*natom))
2468 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks))
2469 DO i_ri = 1, ri_data%ncell_RI
2470 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
2471 bsizes_ri_ext_split((i_ri - 1)*nblks + 1:i_ri*nblks) = ri_data%bsizes_RI_split(:)
2472 END DO
2473 CALL create_2c_tensor(t_2c_ri_tmp(1), dist1, dist2, ri_data%pgrid_2d, &
2474 bsizes_ri_ext, bsizes_ri_ext, &
2475 name="(RI | RI)")
2476 DEALLOCATE (dist1, dist2)
2477 CALL create_2c_tensor(t_2c_ri_tmp(2), dist1, dist2, ri_data%pgrid_2d, &
2478 bsizes_ri_ext_split, bsizes_ri_ext_split, &
2479 name="(RI | RI)")
2480 DEALLOCATE (dist1, dist2)
2481
2482 !For more efficiency, we stack multiple images of the 3-center integrals into a single tensor
2483 batch_size = nimg/ri_data%n_mem
2484 n_batch = nimg/batch_size
2485 IF (modulo(nimg, batch_size) .NE. 0) n_batch = n_batch + 1
2486 ALLOCATE (batch_ranges(n_batch + 1))
2487 DO i_batch = 1, n_batch
2488 batch_ranges(i_batch) = (i_batch - 1)*batch_size + 1
2489 END DO
2490 batch_ranges(n_batch + 1) = nimg + 1
2491
2492 nblks = SIZE(ri_data%bsizes_AO_split)
2493 ALLOCATE (bsizes_stack(batch_size*nblks))
2494 DO is = 1, batch_size
2495 bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
2496 END DO
2497
2498 CALL dbt_get_info(t_3c_int(1, 1), nblks_total=nblks_3c)
2499 ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)), dist_stack3(batch_size*nblks_3c(3)))
2500 CALL dbt_get_info(t_3c_int(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, proc_dist_3=dist3)
2501 DO is = 1, batch_size
2502 dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
2503 END DO
2504
2505 CALL dbt_distribution_new(t_dist, ri_data%pgrid, dist1, dist2, dist_stack3)
2506 CALL dbt_create(t_3c_tmp(1), "ints_stack", t_dist, [1], [2, 3], bsizes_ri_ext_split, &
2507 ri_data%bsizes_AO_split, bsizes_stack)
2508 CALL dbt_distribution_destroy(t_dist)
2509 DEALLOCATE (dist1, dist2, dist3, dist_stack3)
2510
2511 CALL dbt_create(t_3c_tmp(1), t_3c_tmp(2))
2512 CALL dbt_create(t_3c_int(1, 1), t_3c_tmp(3))
2513
2514 DO iatom = 1, natom
2515 CALL dbt_copy(ri_data%t_2c_inv(1, iatom), t_2c_ri_tmp(1))
2516 CALL apply_bump(t_2c_ri_tmp(1), iatom, ri_data, qs_env, from_left=.true., from_right=.true.)
2517 CALL dbt_copy(t_2c_ri_tmp(1), t_2c_ri_tmp(2), move_data=.true.)
2518
2519 CALL dbt_batched_contract_init(t_2c_ri_tmp(2))
2520 DO i_batch = 1, n_batch
2521
2522 CALL fill_3c_stack(t_3c_tmp(1), t_3c_int(1, :), int_indices, 3, ri_data, &
2523 img_bounds=[batch_ranges(i_batch), batch_ranges(i_batch + 1)], &
2524 filter_at=iatom, filter_dim=2, idx_to_at=idx_to_at_ao)
2525
2526 CALL dbt_contract(1.0_dp, t_2c_ri_tmp(2), t_3c_tmp(1), &
2527 0.0_dp, t_3c_tmp(2), map_1=[1], map_2=[2, 3], &
2528 contract_1=[2], notcontract_1=[1], &
2529 contract_2=[1], notcontract_2=[2, 3], &
2530 filter_eps=ri_data%filter_eps, flop=nflop)
2531 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2532
2533 DO i_img = batch_ranges(i_batch), batch_ranges(i_batch + 1) - 1
2534 CALL unstack_t_3c_apc(t_3c_tmp(3), t_3c_tmp(2), i_img - batch_ranges(i_batch) + 1)
2535 CALL dbt_copy(t_3c_tmp(3), ri_data%t_3c_int_ctr_1(1, i_img), summation=.true., &
2536 order=[2, 1, 3], move_data=.true.)
2537 END DO
2538 CALL dbt_clear(t_3c_tmp(1))
2539 END DO
2540 CALL dbt_batched_contract_finalize(t_2c_ri_tmp(2))
2541
2542 END DO
2543 CALL dbt_destroy(t_2c_ri_tmp(1))
2544 CALL dbt_destroy(t_2c_ri_tmp(2))
2545 CALL dbt_destroy(t_3c_tmp(1))
2546 CALL dbt_destroy(t_3c_tmp(2))
2547 CALL dbt_destroy(t_3c_tmp(3))
2548
2549 DO i_img = 1, nimg
2550 CALL dbt_destroy(t_3c_int(1, i_img))
2551 END DO
2552
2553 CALL timestop(handle)
2554
2555 END SUBROUTINE precontract_3c_ints
2556
2557! **************************************************************************************************
2558!> \brief Copy the data of a 2D tensor living in the main MPI group to a sub-group, given the proc
2559!> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2560!> \param t2c_sub ...
2561!> \param t2c_main ...
2562!> \param group_size ...
2563!> \param ngroups ...
2564!> \param para_env ...
2565! **************************************************************************************************
2566 SUBROUTINE copy_2c_to_subgroup(t2c_sub, t2c_main, group_size, ngroups, para_env)
2567 TYPE(dbt_type), INTENT(INOUT) :: t2c_sub, t2c_main
2568 INTEGER, INTENT(IN) :: group_size, ngroups
2569 TYPE(mp_para_env_type), POINTER :: para_env
2570
2571 INTEGER :: batch_size, i, i_batch, i_msg, iblk, &
2572 igroup, iproc, ir, is, jblk, n_batch, &
2573 nocc, tag
2574 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2
2575 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: block_dest, block_source
2576 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: current_dest
2577 INTEGER, DIMENSION(2) :: ind, nblks
2578 LOGICAL :: found
2579 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
2580 TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2581 TYPE(dbt_iterator_type) :: iter
2582 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2583
2584 !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2585 ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2586 ! to have blocks pre-reserved though
2587
2588 CALL dbt_get_info(t2c_main, nblks_total=nblks)
2589
2590 !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2591 ALLOCATE (block_source(nblks(1), nblks(2)))
2592 block_source = -1
2593 nocc = 0
2594!$OMP PARALLEL DEFAULT(NONE) SHARED(t2c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2595 CALL dbt_iterator_start(iter, t2c_main)
2596 DO WHILE (dbt_iterator_blocks_left(iter))
2597 CALL dbt_iterator_next_block(iter, ind)
2598 CALL dbt_get_block(t2c_main, ind, blk, found)
2599 IF (.NOT. found) cycle
2600
2601 block_source(ind(1), ind(2)) = para_env%mepos
2602!$OMP ATOMIC
2603 nocc = nocc + 1
2604 DEALLOCATE (blk)
2605 END DO
2606 CALL dbt_iterator_stop(iter)
2607!$OMP END PARALLEL
2608
2609 CALL para_env%sum(nocc)
2610 CALL para_env%sum(block_source)
2611 block_source = block_source + para_env%num_pe - 1
2612 IF (nocc == 0) RETURN
2613
2614 !Loop over the sub tensor, get the block destination
2615 igroup = para_env%mepos/group_size
2616 ALLOCATE (block_dest(nblks(1), nblks(2)))
2617 block_dest = -1
2618 DO jblk = 1, nblks(2)
2619 DO iblk = 1, nblks(1)
2620 IF (block_source(iblk, jblk) == -1) cycle
2621
2622 CALL dbt_get_stored_coordinates(t2c_sub, [iblk, jblk], iproc)
2623 block_dest(iblk, jblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2624 END DO
2625 END DO
2626
2627 ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)))
2628 CALL dbt_get_info(t2c_main, blk_size_1=bsizes1, blk_size_2=bsizes2)
2629
2630 ALLOCATE (current_dest(nblks(1), nblks(2), 0:ngroups - 1))
2631 DO igroup = 0, ngroups - 1
2632 !for a given subgroup, need to make the destination available to everyone in the main group
2633 current_dest(:, :, igroup) = block_dest(:, :)
2634 CALL para_env%bcast(current_dest(:, :, igroup), source=igroup*group_size) !bcast from first proc in sub-group
2635 END DO
2636
2637 !We go by batches, which cannot be larger than the maximum MPI tag value
2638 batch_size = min(para_env%get_tag_ub(), 128000, nocc*ngroups)
2639 n_batch = (nocc*ngroups)/batch_size
2640 IF (modulo(nocc*ngroups, batch_size) .NE. 0) n_batch = n_batch + 1
2641
2642 DO i_batch = 1, n_batch
2643 !Loop over groups, blocks and send/receive
2644 ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2645 ALLOCATE (send_req(batch_size), recv_req(batch_size))
2646 ir = 0
2647 is = 0
2648 i_msg = 0
2649 DO jblk = 1, nblks(2)
2650 DO iblk = 1, nblks(1)
2651 DO igroup = 0, ngroups - 1
2652 IF (block_source(iblk, jblk) == -1) cycle
2653
2654 i_msg = i_msg + 1
2655 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2656
2657 !a unique tag per block, within this batch
2658 tag = i_msg - (i_batch - 1)*batch_size
2659
2660 found = .false.
2661 IF (para_env%mepos == block_source(iblk, jblk)) THEN
2662 CALL dbt_get_block(t2c_main, [iblk, jblk], blk, found)
2663 END IF
2664
2665 !If blocks live on same proc, simply copy. Else MPI send/recv
2666 IF (block_source(iblk, jblk) == current_dest(iblk, jblk, igroup)) THEN
2667 IF (found) CALL dbt_put_block(t2c_sub, [iblk, jblk], shape(blk), blk)
2668 ELSE
2669 IF (para_env%mepos == block_source(iblk, jblk) .AND. found) THEN
2670 ALLOCATE (send_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2671 send_buff(tag)%array(:, :) = blk(:, :)
2672 is = is + 1
2673 CALL para_env%isend(msgin=send_buff(tag)%array, dest=current_dest(iblk, jblk, igroup), &
2674 request=send_req(is), tag=tag)
2675 END IF
2676
2677 IF (para_env%mepos == current_dest(iblk, jblk, igroup)) THEN
2678 ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2679 ir = ir + 1
2680 CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk), &
2681 request=recv_req(ir), tag=tag)
2682 END IF
2683 END IF
2684
2685 IF (found) DEALLOCATE (blk)
2686 END DO
2687 END DO
2688 END DO
2689
2690 CALL mp_waitall(send_req(1:is))
2691 CALL mp_waitall(recv_req(1:ir))
2692 !clean-up
2693 DO i = 1, batch_size
2694 IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
2695 END DO
2696
2697 !Finally copy the data from the buffer to the sub-tensor
2698 i_msg = 0
2699 DO jblk = 1, nblks(2)
2700 DO iblk = 1, nblks(1)
2701 DO igroup = 0, ngroups - 1
2702 IF (block_source(iblk, jblk) == -1) cycle
2703
2704 i_msg = i_msg + 1
2705 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2706
2707 !a unique tag per block, within this batch
2708 tag = i_msg - (i_batch - 1)*batch_size
2709
2710 IF (para_env%mepos == current_dest(iblk, jblk, igroup) .AND. &
2711 block_source(iblk, jblk) .NE. current_dest(iblk, jblk, igroup)) THEN
2712
2713 ALLOCATE (blk(bsizes1(iblk), bsizes2(jblk)))
2714 blk(:, :) = recv_buff(tag)%array(:, :)
2715 CALL dbt_put_block(t2c_sub, [iblk, jblk], shape(blk), blk)
2716 DEALLOCATE (blk)
2717 END IF
2718 END DO
2719 END DO
2720 END DO
2721
2722 !clean-up
2723 DO i = 1, batch_size
2724 IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
2725 END DO
2726 DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
2727 END DO !i_batch
2728 CALL dbt_finalize(t2c_sub)
2729
2730 END SUBROUTINE copy_2c_to_subgroup
2731
2732! **************************************************************************************************
2733!> \brief Copy the data of a 3D tensor living in the main MPI group to a sub-group, given the proc
2734!> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2735!> \param t3c_sub ...
2736!> \param t3c_main ...
2737!> \param group_size ...
2738!> \param ngroups ...
2739!> \param para_env ...
2740!> \param iatom_to_subgroup ...
2741!> \param dim_at ...
2742!> \param idx_to_at ...
2743! **************************************************************************************************
2744 SUBROUTINE copy_3c_to_subgroup(t3c_sub, t3c_main, group_size, ngroups, para_env, iatom_to_subgroup, &
2745 dim_at, idx_to_at)
2746 TYPE(dbt_type), INTENT(INOUT) :: t3c_sub, t3c_main
2747 INTEGER, INTENT(IN) :: group_size, ngroups
2748 TYPE(mp_para_env_type), POINTER :: para_env
2749 TYPE(cp_1d_logical_p_type), DIMENSION(:), &
2750 INTENT(INOUT), OPTIONAL :: iatom_to_subgroup
2751 INTEGER, INTENT(IN), OPTIONAL :: dim_at
2752 INTEGER, DIMENSION(:), OPTIONAL :: idx_to_at
2753
2754 INTEGER :: batch_size, i, i_batch, i_msg, iatom, &
2755 iblk, igroup, iproc, ir, is, jblk, &
2756 kblk, n_batch, nocc, tag
2757 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2, bsizes3
2758 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: block_dest, block_source
2759 INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :) :: current_dest
2760 INTEGER, DIMENSION(3) :: ind, nblks
2761 LOGICAL :: filter_at, found
2762 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
2763 TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2764 TYPE(dbt_iterator_type) :: iter
2765 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2766
2767 !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2768 ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2769 ! to have blocks pre-reserved though
2770
2771 CALL dbt_get_info(t3c_main, nblks_total=nblks)
2772
2773 !in some cases, only copy a fraction of the 3c tensor to a given subgroup (corresponding to some atoms)
2774 filter_at = .false.
2775 IF (PRESENT(iatom_to_subgroup) .AND. PRESENT(dim_at) .AND. PRESENT(idx_to_at)) THEN
2776 filter_at = .true.
2777 cpassert(nblks(dim_at) == SIZE(idx_to_at))
2778 END IF
2779
2780 !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2781 ALLOCATE (block_source(nblks(1), nblks(2), nblks(3)))
2782 block_source = -1
2783 nocc = 0
2784!$OMP PARALLEL DEFAULT(NONE) SHARED(t3c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2785 CALL dbt_iterator_start(iter, t3c_main)
2786 DO WHILE (dbt_iterator_blocks_left(iter))
2787 CALL dbt_iterator_next_block(iter, ind)
2788 CALL dbt_get_block(t3c_main, ind, blk, found)
2789 IF (.NOT. found) cycle
2790
2791 block_source(ind(1), ind(2), ind(3)) = para_env%mepos
2792!$OMP ATOMIC
2793 nocc = nocc + 1
2794 DEALLOCATE (blk)
2795 END DO
2796 CALL dbt_iterator_stop(iter)
2797!$OMP END PARALLEL
2798
2799 CALL para_env%sum(nocc)
2800 CALL para_env%sum(block_source)
2801 block_source = block_source + para_env%num_pe - 1
2802 IF (nocc == 0) RETURN
2803
2804 !Loop over the sub tensor, get the block destination
2805 igroup = para_env%mepos/group_size
2806 ALLOCATE (block_dest(nblks(1), nblks(2), nblks(3)))
2807 block_dest = -1
2808 DO kblk = 1, nblks(3)
2809 DO jblk = 1, nblks(2)
2810 DO iblk = 1, nblks(1)
2811 IF (block_source(iblk, jblk, kblk) == -1) cycle
2812
2813 CALL dbt_get_stored_coordinates(t3c_sub, [iblk, jblk, kblk], iproc)
2814 block_dest(iblk, jblk, kblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2815 END DO
2816 END DO
2817 END DO
2818
2819 ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)), bsizes3(nblks(3)))
2820 CALL dbt_get_info(t3c_main, blk_size_1=bsizes1, blk_size_2=bsizes2, blk_size_3=bsizes3)
2821
2822 ALLOCATE (current_dest(nblks(1), nblks(2), nblks(3), 0:ngroups - 1))
2823 DO igroup = 0, ngroups - 1
2824 !for a given subgroup, need to make the destination available to everyone in the main group
2825 current_dest(:, :, :, igroup) = block_dest(:, :, :)
2826 CALL para_env%bcast(current_dest(:, :, :, igroup), source=igroup*group_size) !bcast from first proc in subgroup
2827 END DO
2828
2829 !We go by batches, which cannot be larger than the maximum MPI tag value
2830 batch_size = min(para_env%get_tag_ub(), 128000, nocc*ngroups)
2831 n_batch = (nocc*ngroups)/batch_size
2832 IF (modulo(nocc*ngroups, batch_size) .NE. 0) n_batch = n_batch + 1
2833
2834 DO i_batch = 1, n_batch
2835 !Loop over groups, blocks and send/receive
2836 ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2837 ALLOCATE (send_req(batch_size), recv_req(batch_size))
2838 ir = 0
2839 is = 0
2840 i_msg = 0
2841 DO kblk = 1, nblks(3)
2842 DO jblk = 1, nblks(2)
2843 DO iblk = 1, nblks(1)
2844 DO igroup = 0, ngroups - 1
2845 IF (block_source(iblk, jblk, kblk) == -1) cycle
2846
2847 i_msg = i_msg + 1
2848 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2849
2850 !a unique tag per block, within this batch
2851 tag = i_msg - (i_batch - 1)*batch_size
2852
2853 IF (filter_at) THEN
2854 ind(:) = [iblk, jblk, kblk]
2855 iatom = idx_to_at(ind(dim_at))
2856 IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) cycle
2857 END IF
2858
2859 found = .false.
2860 IF (para_env%mepos == block_source(iblk, jblk, kblk)) THEN
2861 CALL dbt_get_block(t3c_main, [iblk, jblk, kblk], blk, found)
2862 END IF
2863
2864 !If blocks live on same proc, simply copy. Else MPI send/recv
2865 IF (block_source(iblk, jblk, kblk) == current_dest(iblk, jblk, kblk, igroup)) THEN
2866 IF (found) CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], shape(blk), blk)
2867 ELSE
2868 IF (para_env%mepos == block_source(iblk, jblk, kblk) .AND. found) THEN
2869 ALLOCATE (send_buff(tag)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
2870 send_buff(tag)%array(:, :, :) = blk(:, :, :)
2871 is = is + 1
2872 CALL para_env%isend(msgin=send_buff(tag)%array, &
2873 dest=current_dest(iblk, jblk, kblk, igroup), &
2874 request=send_req(is), tag=tag)
2875 END IF
2876
2877 IF (para_env%mepos == current_dest(iblk, jblk, kblk, igroup)) THEN
2878 ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
2879 ir = ir + 1
2880 CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk, kblk), &
2881 request=recv_req(ir), tag=tag)
2882 END IF
2883 END IF
2884
2885 IF (found) DEALLOCATE (blk)
2886 END DO
2887 END DO
2888 END DO
2889 END DO
2890
2891 CALL mp_waitall(send_req(1:is))
2892 CALL mp_waitall(recv_req(1:ir))
2893 !clean-up
2894 DO i = 1, batch_size
2895 IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
2896 END DO
2897
2898 !Finally copy the data from the buffer to the sub-tensor
2899 i_msg = 0
2900 DO kblk = 1, nblks(3)
2901 DO jblk = 1, nblks(2)
2902 DO iblk = 1, nblks(1)
2903 DO igroup = 0, ngroups - 1
2904 IF (block_source(iblk, jblk, kblk) == -1) cycle
2905
2906 i_msg = i_msg + 1
2907 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2908
2909 !a unique tag per block, within this batch
2910 tag = i_msg - (i_batch - 1)*batch_size
2911
2912 IF (filter_at) THEN
2913 ind(:) = [iblk, jblk, kblk]
2914 iatom = idx_to_at(ind(dim_at))
2915 IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) cycle
2916 END IF
2917
2918 IF (para_env%mepos == current_dest(iblk, jblk, kblk, igroup) .AND. &
2919 block_source(iblk, jblk, kblk) .NE. current_dest(iblk, jblk, kblk, igroup)) THEN
2920
2921 ALLOCATE (blk(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
2922 blk(:, :, :) = recv_buff(tag)%array(:, :, :)
2923 CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], shape(blk), blk)
2924 DEALLOCATE (blk)
2925 END IF
2926 END DO
2927 END DO
2928 END DO
2929 END DO
2930
2931 !clean-up
2932 DO i = 1, batch_size
2933 IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
2934 END DO
2935 DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
2936 END DO !i_batch
2937 CALL dbt_finalize(t3c_sub)
2938
2939 END SUBROUTINE copy_3c_to_subgroup
2940
2941! **************************************************************************************************
2942!> \brief A routine that gather the pieces of the KS matrix accross the subgroup and puts it in the
2943!> main group. Each b_img, iatom, jatom tuple is one a single CPU
2944!> \param ks_t ...
2945!> \param ks_t_sub ...
2946!> \param group_size ...
2947!> \param sparsity_pattern ...
2948!> \param para_env ...
2949!> \param ri_data ...
2950! **************************************************************************************************
2951 SUBROUTINE gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
2952 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t, ks_t_sub
2953 INTEGER, INTENT(IN) :: group_size
2954 INTEGER, DIMENSION(:, :, :), INTENT(IN) :: sparsity_pattern
2955 TYPE(mp_para_env_type), POINTER :: para_env
2956 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2957
2958 CHARACTER(len=*), PARAMETER :: routinen = 'gather_ks_matrix'
2959
2960 INTEGER :: b_img, dest, handle, i, i_spin, iatom, &
2961 igroup, ir, is, jatom, n_mess, natom, &
2962 nimg, nspins, source, tag
2963 LOGICAL :: found
2964 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
2965 TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2966 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2967
2968 CALL timeset(routinen, handle)
2969
2970 nimg = SIZE(sparsity_pattern, 3)
2971 natom = SIZE(sparsity_pattern, 2)
2972 nspins = SIZE(ks_t, 1)
2973
2974 DO b_img = 1, nimg
2975 n_mess = 0
2976 DO i_spin = 1, nspins
2977 DO jatom = 1, natom
2978 DO iatom = 1, natom
2979 IF (sparsity_pattern(iatom, jatom, b_img) > -1) n_mess = n_mess + 1
2980 END DO
2981 END DO
2982 END DO
2983
2984 ALLOCATE (send_buff(n_mess), recv_buff(n_mess))
2985 ALLOCATE (send_req(n_mess), recv_req(n_mess))
2986 ir = 0
2987 is = 0
2988 n_mess = 0
2989 tag = 0
2990
2991 DO i_spin = 1, nspins
2992 DO jatom = 1, natom
2993 DO iatom = 1, natom
2994 IF (sparsity_pattern(iatom, jatom, b_img) < 0) cycle
2995 n_mess = n_mess + 1
2996 tag = tag + 1
2997
2998 !sending the message
2999 CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3000 CALL dbt_get_stored_coordinates(ks_t_sub(i_spin, b_img), [iatom, jatom], source) !source within sub
3001 igroup = sparsity_pattern(iatom, jatom, b_img)
3002 source = source + igroup*group_size
3003 IF (para_env%mepos == source) THEN
3004 CALL dbt_get_block(ks_t_sub(i_spin, b_img), [iatom, jatom], blk, found)
3005 IF (source == dest) THEN
3006 IF (found) CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], shape(blk), blk)
3007 ELSE
3008 ALLOCATE (send_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3009 send_buff(n_mess)%array(:, :) = 0.0_dp
3010 IF (found) THEN
3011 send_buff(n_mess)%array(:, :) = blk(:, :)
3012 END IF
3013 is = is + 1
3014 CALL para_env%isend(msgin=send_buff(n_mess)%array, dest=dest, &
3015 request=send_req(is), tag=tag)
3016 END IF
3017 DEALLOCATE (blk)
3018 END IF
3019
3020 !receiving the message
3021 IF (para_env%mepos == dest .AND. source .NE. dest) THEN
3022 ALLOCATE (recv_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3023 ir = ir + 1
3024 CALL para_env%irecv(msgout=recv_buff(n_mess)%array, source=source, &
3025 request=recv_req(ir), tag=tag)
3026 END IF
3027 END DO !iatom
3028 END DO !jatom
3029 END DO !ispin
3030
3031 CALL mp_waitall(send_req(1:is))
3032 CALL mp_waitall(recv_req(1:ir))
3033
3034 !Copy the messages received into the KS matrix
3035 n_mess = 0
3036 DO i_spin = 1, nspins
3037 DO jatom = 1, natom
3038 DO iatom = 1, natom
3039 IF (sparsity_pattern(iatom, jatom, b_img) < 0) cycle
3040 n_mess = n_mess + 1
3041
3042 CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3043 IF (para_env%mepos == dest) THEN
3044 IF (.NOT. ASSOCIATED(recv_buff(n_mess)%array)) cycle
3045 ALLOCATE (blk(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3046 blk(:, :) = recv_buff(n_mess)%array(:, :)
3047 CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], shape(blk), blk)
3048 DEALLOCATE (blk)
3049 END IF
3050 END DO
3051 END DO
3052 END DO
3053
3054 !clean-up
3055 DO i = 1, n_mess
3056 IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
3057 IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
3058 END DO
3059 DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
3060 END DO !b_img
3061
3062 CALL timestop(handle)
3063
3064 END SUBROUTINE gather_ks_matrix
3065
3066! **************************************************************************************************
3067!> \brief copy all required 2c tensors from the main MPI group to the subgroups
3068!> \param mat_2c_pot ...
3069!> \param t_2c_work ...
3070!> \param t_2c_ao_tmp ...
3071!> \param ks_t_split ...
3072!> \param ks_t_sub ...
3073!> \param group_size ...
3074!> \param ngroups ...
3075!> \param para_env ...
3076!> \param para_env_sub ...
3077!> \param ri_data ...
3078! **************************************************************************************************
3079 SUBROUTINE get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
3080 group_size, ngroups, para_env, para_env_sub, ri_data)
3081 TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3082 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work, t_2c_ao_tmp, ks_t_split
3083 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t_sub
3084 INTEGER, INTENT(IN) :: group_size, ngroups
3085 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3086 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3087
3088 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_2c_tensors'
3089
3090 INTEGER :: handle, i, i_img, i_ri, i_spin, iproc, &
3091 j, natom, nblks, nimg, nspins
3092 INTEGER(int_8) :: nze
3093 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3094 dist1, dist2
3095 INTEGER, DIMENSION(2) :: pdims_2d
3096 INTEGER, DIMENSION(:), POINTER :: col_dist, ri_blk_size, row_dist
3097 INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3098 REAL(dp) :: occ
3099 TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3100 TYPE(dbt_pgrid_type) :: pgrid_2d
3101 TYPE(dbt_type) :: work, work_sub
3102
3103 CALL timeset(routinen, handle)
3104
3105 !Create the 2d pgrid
3106 pdims_2d = 0
3107 CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3108
3109 natom = SIZE(ri_data%bsizes_RI)
3110 nblks = SIZE(ri_data%bsizes_RI_split)
3111 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*natom))
3112 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks))
3113 DO i_ri = 1, ri_data%ncell_RI
3114 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
3115 bsizes_ri_ext_split((i_ri - 1)*nblks + 1:i_ri*nblks) = ri_data%bsizes_RI_split(:)
3116 END DO
3117
3118 !nRI x nRI 2c tensors
3119 CALL create_2c_tensor(t_2c_work(1), dist1, dist2, pgrid_2d, &
3120 bsizes_ri_ext, bsizes_ri_ext, &
3121 name="(RI | RI)")
3122 DEALLOCATE (dist1, dist2)
3123
3124 CALL create_2c_tensor(t_2c_work(2), dist1, dist2, pgrid_2d, &
3125 bsizes_ri_ext_split, bsizes_ri_ext_split, &
3126 name="(RI | RI)")
3127 DEALLOCATE (dist1, dist2)
3128
3129 !the AO based tensors
3130 CALL create_2c_tensor(ks_t_split(1), dist1, dist2, pgrid_2d, &
3131 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3132 name="(AO | AO)")
3133 DEALLOCATE (dist1, dist2)
3134 CALL dbt_create(ks_t_split(1), ks_t_split(2))
3135
3136 CALL create_2c_tensor(t_2c_ao_tmp(1), dist1, dist2, pgrid_2d, &
3137 ri_data%bsizes_AO, ri_data%bsizes_AO, &
3138 name="(AO | AO)")
3139 DEALLOCATE (dist1, dist2)
3140
3141 nspins = SIZE(ks_t_sub, 1)
3142 nimg = SIZE(ks_t_sub, 2)
3143 DO i_img = 1, nimg
3144 DO i_spin = 1, nspins
3145 CALL dbt_create(t_2c_ao_tmp(1), ks_t_sub(i_spin, i_img))
3146 END DO
3147 END DO
3148
3149 !Finally the HFX potential matrices
3150 !For now, we do a convoluted things where we go to tensors first, then back to matrices.
3151 CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3152 ri_data%bsizes_RI, ri_data%bsizes_RI, &
3153 name="(RI | RI)")
3154 CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3155
3156 ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3157 iproc = 0
3158 DO i = 0, pdims_2d(1) - 1
3159 DO j = 0, pdims_2d(2) - 1
3160 dbcsr_pgrid(i, j) = iproc
3161 iproc = iproc + 1
3162 END DO
3163 END DO
3164
3165 !We need to have the same exact 2d block dist as the tensors
3166 ALLOCATE (col_dist(natom), row_dist(natom))
3167 row_dist(:) = dist1(:)
3168 col_dist(:) = dist2(:)
3169
3170 ALLOCATE (ri_blk_size(natom))
3171 ri_blk_size(:) = ri_data%bsizes_RI(:)
3172
3173 CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3174 row_dist=row_dist, col_dist=col_dist)
3175 CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3176 row_blk_size=ri_blk_size, col_blk_size=ri_blk_size)
3177
3178 DO i_img = 1, nimg
3179 IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3180 CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3181 CALL get_tensor_occupancy(work, nze, occ)
3182 IF (nze == 0) cycle
3183
3184 CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3185 CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3186 CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3187 CALL dbt_clear(work_sub)
3188 END DO
3189
3190 CALL dbt_destroy(work)
3191 CALL dbt_destroy(work_sub)
3192 CALL dbt_pgrid_destroy(pgrid_2d)
3193 CALL dbcsr_distribution_release(dbcsr_dist_sub)
3194 DEALLOCATE (col_dist, row_dist, ri_blk_size, dbcsr_pgrid)
3195 CALL timestop(handle)
3196
3197 END SUBROUTINE get_subgroup_2c_tensors
3198
3199! **************************************************************************************************
3200!> \brief copy all required 3c tensors from the main MPI group to the subgroups
3201!> \param t_3c_int ...
3202!> \param t_3c_work_2 ...
3203!> \param t_3c_work_3 ...
3204!> \param t_3c_apc ...
3205!> \param t_3c_apc_sub ...
3206!> \param group_size ...
3207!> \param ngroups ...
3208!> \param para_env ...
3209!> \param para_env_sub ...
3210!> \param ri_data ...
3211! **************************************************************************************************
3212 SUBROUTINE get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
3213 group_size, ngroups, para_env, para_env_sub, ri_data)
3214 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_int, t_3c_work_2, t_3c_work_3
3215 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, t_3c_apc_sub
3216 INTEGER, INTENT(IN) :: group_size, ngroups
3217 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3218 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3219
3220 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_3c_tensors'
3221
3222 INTEGER :: batch_size, bfac, bo(2), handle, &
3223 handle2, i_blk, i_img, i_ri, i_spin, &
3224 ib, natom, nblks_ao, nblks_ri, nimg, &
3225 nspins
3226 INTEGER(int_8) :: nze
3227 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3228 bsizes_stack, bsizes_tmp, dist1, &
3229 dist2, dist3, dist_stack, idx_to_at
3230 INTEGER, DIMENSION(3) :: pdims
3231 REAL(dp) :: occ
3232 TYPE(dbt_distribution_type) :: t_dist
3233 TYPE(dbt_pgrid_type) :: pgrid
3234 TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3235
3236 CALL timeset(routinen, handle)
3237
3238 nblks_ri = SIZE(ri_data%bsizes_RI_split)
3239 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks_ri))
3240 DO i_ri = 1, ri_data%ncell_RI
3241 bsizes_ri_ext_split((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
3242 END DO
3243
3244 !Preparing larger block sizes for efficient communication (less, bigger messages)
3245 !we put 2 atoms per RI block
3246 bfac = 2
3247 natom = SIZE(ri_data%bsizes_RI)
3248 nblks_ri = max(1, natom/bfac)
3249 ALLOCATE (bsizes_tmp(nblks_ri))
3250 DO i_blk = 1, nblks_ri
3251 bo = get_limit(natom, nblks_ri, i_blk - 1)
3252 bsizes_tmp(i_blk) = sum(ri_data%bsizes_RI(bo(1):bo(2)))
3253 END DO
3254 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*nblks_ri))
3255 DO i_ri = 1, ri_data%ncell_RI
3256 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = bsizes_tmp(:)
3257 END DO
3258
3259 batch_size = ri_data%kp_stack_size
3260 nblks_ao = SIZE(ri_data%bsizes_AO_split)
3261 ALLOCATE (bsizes_stack(batch_size*nblks_ao))
3262 DO ib = 1, batch_size
3263 bsizes_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = ri_data%bsizes_AO_split(:)
3264 END DO
3265
3266 !Create the pgrid for the configuration correspoinding to ri_data%t_3c_int_ctr_3
3267 natom = SIZE(ri_data%bsizes_RI)
3268 pdims = 0
3269 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3270 tensor_dims=[SIZE(bsizes_ri_ext_split), 1, batch_size*SIZE(ri_data%bsizes_AO_split)])
3271
3272 !Create all required 3c tensors in that configuration
3273 CALL create_3c_tensor(t_3c_int(1), dist1, dist2, dist3, &
3274 pgrid, bsizes_ri_ext_split, ri_data%bsizes_AO_split, &
3275 ri_data%bsizes_AO_split, [1], [2, 3], name="(RI | AO AO)")
3276 nimg = SIZE(t_3c_int)
3277 DO i_img = 2, nimg
3278 CALL dbt_create(t_3c_int(1), t_3c_int(i_img))
3279 END DO
3280
3281 !The stacked work tensors, in a distribution that matches that of t_3c_int
3282 ALLOCATE (dist_stack(batch_size*nblks_ao))
3283 DO ib = 1, batch_size
3284 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3285 END DO
3286
3287 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3288 CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3289 bsizes_ri_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3290 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3291 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3292 CALL dbt_distribution_destroy(t_dist)
3293 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3294
3295 !For more efficient communication, we use intermediate tensors with larger block size
3296 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3297 pgrid, bsizes_ri_ext, ri_data%bsizes_AO, &
3298 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3299 DEALLOCATE (dist1, dist2, dist3)
3300
3301 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3302 ri_data%pgrid, bsizes_ri_ext, ri_data%bsizes_AO, &
3303 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3304 DEALLOCATE (dist1, dist2, dist3)
3305
3306 !Finally copy the integrals into the subgroups (if not there already)
3307 CALL timeset(routinen//"_ints", handle2)
3308 IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
3309 DO i_img = 1, nimg
3310 CALL dbt_copy(ri_data%kp_t_3c_int(i_img), t_3c_int(i_img), move_data=.true.)
3311 END DO
3312 ELSE
3313 ALLOCATE (ri_data%kp_t_3c_int(nimg))
3314 DO i_img = 1, nimg
3315 CALL dbt_create(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img))
3316 CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, i_img), nze, occ)
3317 IF (nze == 0) cycle
3318 CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, i_img), work_atom_block, order=[2, 1, 3])
3319 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, group_size, ngroups, para_env)
3320 CALL dbt_copy(work_atom_block_sub, t_3c_int(i_img), move_data=.true.)
3321 END DO
3322 END IF
3323 CALL timestop(handle2)
3324 CALL dbt_pgrid_destroy(pgrid)
3325 CALL dbt_destroy(work_atom_block)
3326 CALL dbt_destroy(work_atom_block_sub)
3327
3328 !Do the same for the t_3c_ctr_2 configuration
3329 pdims = 0
3330 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3331 tensor_dims=[1, SIZE(bsizes_ri_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3332
3333 !For more efficient communication, we use intermediate tensors with larger block size
3334 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3335 pgrid, ri_data%bsizes_AO, bsizes_ri_ext, &
3336 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3337 DEALLOCATE (dist1, dist2, dist3)
3338
3339 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3340 ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_ri_ext, &
3341 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3342 DEALLOCATE (dist1, dist2, dist3)
3343
3344 !template for t_3c_apc_sub
3345 CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3346 pgrid, ri_data%bsizes_AO_split, bsizes_ri_ext_split, &
3347 ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3348
3349 !create t_3c_work_2 tensors in a distribution that matches the above
3350 ALLOCATE (dist_stack(batch_size*nblks_ao))
3351 DO ib = 1, batch_size
3352 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3353 END DO
3354
3355 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3356 CALL dbt_create(t_3c_work_2(1), "work_2_stack", t_dist, [1], [2, 3], &
3357 ri_data%bsizes_AO_split, bsizes_ri_ext_split, bsizes_stack)
3358 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3359 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3360 CALL dbt_distribution_destroy(t_dist)
3361 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3362
3363 !Finally copy data from t_3c_apc to the subgroups
3364 ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3365 CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3366 nspins = SIZE(t_3c_apc, 1)
3367 CALL timeset(routinen//"_apc", handle2)
3368 DO i_img = 1, nimg
3369 DO i_spin = 1, nspins
3370 CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3371 CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3372 IF (nze == 0) cycle
3373 CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.true.)
3374 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, group_size, &
3375 ngroups, para_env, ri_data%iatom_to_subgroup, 1, idx_to_at)
3376 CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.true.)
3377 END DO
3378 DO i_spin = 1, nspins
3379 CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3380 END DO
3381 END DO
3382 CALL timestop(handle2)
3383 CALL dbt_pgrid_destroy(pgrid)
3384 CALL dbt_destroy(tmp)
3385 CALL dbt_destroy(work_atom_block)
3386 CALL dbt_destroy(work_atom_block_sub)
3387
3388 CALL timestop(handle)
3389
3390 END SUBROUTINE get_subgroup_3c_tensors
3391
3392! **************************************************************************************************
3393!> \brief copy all required 2c force tensors from the main MPI group to the subgroups
3394!> \param t_2c_inv ...
3395!> \param t_2c_bint ...
3396!> \param t_2c_metric ...
3397!> \param mat_2c_pot ...
3398!> \param t_2c_work ...
3399!> \param rho_ao_t ...
3400!> \param rho_ao_t_sub ...
3401!> \param t_2c_der_metric ...
3402!> \param t_2c_der_metric_sub ...
3403!> \param mat_der_pot ...
3404!> \param mat_der_pot_sub ...
3405!> \param group_size ...
3406!> \param ngroups ...
3407!> \param para_env ...
3408!> \param para_env_sub ...
3409!> \param ri_data ...
3410!> \note Main MPI group tensors are deleted within this routine, for memory optimization
3411! **************************************************************************************************
3412 SUBROUTINE get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
3413 rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
3414 mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
3415 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_inv, t_2c_bint, t_2c_metric
3416 TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3417 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work
3418 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
3419 t_2c_der_metric_sub
3420 TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot, mat_der_pot_sub
3421 INTEGER, INTENT(IN) :: group_size, ngroups
3422 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3423 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3424
3425 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_2c_derivs'
3426
3427 INTEGER :: handle, i, i_img, i_ri, i_spin, i_xyz, &
3428 iatom, iproc, j, natom, nblks, nimg, &
3429 nspins
3430 INTEGER(int_8) :: nze
3431 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3432 dist1, dist2
3433 INTEGER, DIMENSION(2) :: pdims_2d
3434 INTEGER, DIMENSION(:), POINTER :: col_dist, ri_blk_size, row_dist
3435 INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3436 REAL(dp) :: occ
3437 TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3438 TYPE(dbt_pgrid_type) :: pgrid_2d
3439 TYPE(dbt_type) :: work, work_sub
3440
3441 CALL timeset(routinen, handle)
3442
3443 !Note: a fair portion of this routine is copied from the energy version of it
3444 !Create the 2d pgrid
3445 pdims_2d = 0
3446 CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3447
3448 natom = SIZE(ri_data%bsizes_RI)
3449 nblks = SIZE(ri_data%bsizes_RI_split)
3450 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*natom))
3451 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks))
3452 DO i_ri = 1, ri_data%ncell_RI
3453 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
3454 bsizes_ri_ext_split((i_ri - 1)*nblks + 1:i_ri*nblks) = ri_data%bsizes_RI_split(:)
3455 END DO
3456
3457 !nRI x nRI 2c tensors
3458 CALL create_2c_tensor(t_2c_inv(1), dist1, dist2, pgrid_2d, &
3459 bsizes_ri_ext, bsizes_ri_ext, &
3460 name="(RI | RI)")
3461 DEALLOCATE (dist1, dist2)
3462
3463 CALL dbt_create(t_2c_inv(1), t_2c_bint(1))
3464 CALL dbt_create(t_2c_inv(1), t_2c_metric(1))
3465 DO iatom = 2, natom
3466 CALL dbt_create(t_2c_inv(1), t_2c_inv(iatom))
3467 CALL dbt_create(t_2c_inv(1), t_2c_bint(iatom))
3468 CALL dbt_create(t_2c_inv(1), t_2c_metric(iatom))
3469 END DO
3470 CALL dbt_create(t_2c_inv(1), t_2c_work(1))
3471 CALL dbt_create(t_2c_inv(1), t_2c_work(2))
3472 CALL dbt_create(t_2c_inv(1), t_2c_work(3))
3473 CALL dbt_create(t_2c_inv(1), t_2c_work(4))
3474
3475 CALL create_2c_tensor(t_2c_work(5), dist1, dist2, pgrid_2d, &
3476 bsizes_ri_ext_split, bsizes_ri_ext_split, &
3477 name="(RI | RI)")
3478 DEALLOCATE (dist1, dist2)
3479
3480 !copy the data from the main group.
3481 DO iatom = 1, natom
3482 CALL copy_2c_to_subgroup(t_2c_inv(iatom), ri_data%t_2c_inv(1, iatom), group_size, ngroups, para_env)
3483 CALL copy_2c_to_subgroup(t_2c_bint(iatom), ri_data%t_2c_int(1, iatom), group_size, ngroups, para_env)
3484 CALL copy_2c_to_subgroup(t_2c_metric(iatom), ri_data%t_2c_pot(1, iatom), group_size, ngroups, para_env)
3485 END DO
3486
3487 !This includes the derivatives of the RI metric, for which there is one per atom
3488 DO i_xyz = 1, 3
3489 DO iatom = 1, natom
3490 CALL dbt_create(t_2c_inv(1), t_2c_der_metric_sub(iatom, i_xyz))
3491 CALL copy_2c_to_subgroup(t_2c_der_metric_sub(iatom, i_xyz), t_2c_der_metric(iatom, i_xyz), &
3492 group_size, ngroups, para_env)
3493 CALL dbt_destroy(t_2c_der_metric(iatom, i_xyz))
3494 END DO
3495 END DO
3496
3497 !AO x AO 2c tensors
3498 CALL create_2c_tensor(rho_ao_t_sub(1, 1), dist1, dist2, pgrid_2d, &
3499 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3500 name="(AO | AO)")
3501 DEALLOCATE (dist1, dist2)
3502 nspins = SIZE(rho_ao_t, 1)
3503 nimg = SIZE(rho_ao_t, 2)
3504
3505 DO i_img = 1, nimg
3506 DO i_spin = 1, nspins
3507 IF (.NOT. (i_img == 1 .AND. i_spin == 1)) &
3508 CALL dbt_create(rho_ao_t_sub(1, 1), rho_ao_t_sub(i_spin, i_img))
3509 CALL copy_2c_to_subgroup(rho_ao_t_sub(i_spin, i_img), rho_ao_t(i_spin, i_img), &
3510 group_size, ngroups, para_env)
3511 CALL dbt_destroy(rho_ao_t(i_spin, i_img))
3512 END DO
3513 END DO
3514
3515 !The RIxRI matrices, going through tensors
3516 CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3517 ri_data%bsizes_RI, ri_data%bsizes_RI, &
3518 name="(RI | RI)")
3519 CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3520
3521 ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3522 iproc = 0
3523 DO i = 0, pdims_2d(1) - 1
3524 DO j = 0, pdims_2d(2) - 1
3525 dbcsr_pgrid(i, j) = iproc
3526 iproc = iproc + 1
3527 END DO
3528 END DO
3529
3530 !We need to have the same exact 2d block dist as the tensors
3531 ALLOCATE (col_dist(natom), row_dist(natom))
3532 row_dist(:) = dist1(:)
3533 col_dist(:) = dist2(:)
3534
3535 ALLOCATE (ri_blk_size(natom))
3536 ri_blk_size(:) = ri_data%bsizes_RI(:)
3537
3538 CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3539 row_dist=row_dist, col_dist=col_dist)
3540 CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3541 row_blk_size=ri_blk_size, col_blk_size=ri_blk_size)
3542
3543 !The HFX potential
3544 DO i_img = 1, nimg
3545 IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3546 CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3547 CALL get_tensor_occupancy(work, nze, occ)
3548 IF (nze == 0) cycle
3549
3550 CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3551 CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3552 CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3553 CALL dbt_clear(work_sub)
3554 END DO
3555
3556 !The derivatives of the HFX potential
3557 DO i_xyz = 1, 3
3558 DO i_img = 1, nimg
3559 CALL dbcsr_create(mat_der_pot_sub(i_img, i_xyz), template=mat_2c_pot(1))
3560 CALL dbt_copy_matrix_to_tensor(mat_der_pot(i_img, i_xyz), work)
3561 CALL dbcsr_release(mat_der_pot(i_img, i_xyz))
3562 CALL get_tensor_occupancy(work, nze, occ)
3563 IF (nze == 0) cycle
3564
3565 CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3566 CALL dbt_copy_tensor_to_matrix(work_sub, mat_der_pot_sub(i_img, i_xyz))
3567 CALL dbcsr_filter(mat_der_pot_sub(i_img, i_xyz), ri_data%filter_eps)
3568 CALL dbt_clear(work_sub)
3569 END DO
3570 END DO
3571
3572 CALL dbt_destroy(work)
3573 CALL dbt_destroy(work_sub)
3574 CALL dbt_pgrid_destroy(pgrid_2d)
3575 CALL dbcsr_distribution_release(dbcsr_dist_sub)
3576 DEALLOCATE (col_dist, row_dist, ri_blk_size, dbcsr_pgrid)
3577
3578 CALL timestop(handle)
3579
3580 END SUBROUTINE get_subgroup_2c_derivs
3581
3582! **************************************************************************************************
3583!> \brief copy all required 3c derivative tensors from the main MPI group to the subgroups
3584!> \param t_3c_work_2 ...
3585!> \param t_3c_work_3 ...
3586!> \param t_3c_der_AO ...
3587!> \param t_3c_der_AO_sub ...
3588!> \param t_3c_der_RI ...
3589!> \param t_3c_der_RI_sub ...
3590!> \param t_3c_apc ...
3591!> \param t_3c_apc_sub ...
3592!> \param t_3c_der_stack ...
3593!> \param group_size ...
3594!> \param ngroups ...
3595!> \param para_env ...
3596!> \param para_env_sub ...
3597!> \param ri_data ...
3598!> \note the tensor containing the derivatives in the main MPI group are deleted for memory
3599! **************************************************************************************************
3600 SUBROUTINE get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_AO, t_3c_der_AO_sub, &
3601 t_3c_der_RI, t_3c_der_RI_sub, t_3c_apc, t_3c_apc_sub, &
3602 t_3c_der_stack, group_size, ngroups, para_env, para_env_sub, &
3603 ri_data)
3604 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_work_2, t_3c_work_3
3605 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_ao, t_3c_der_ao_sub, &
3606 t_3c_der_ri, t_3c_der_ri_sub, &
3607 t_3c_apc, t_3c_apc_sub
3608 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_der_stack
3609 INTEGER, INTENT(IN) :: group_size, ngroups
3610 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3611 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3612
3613 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_3c_derivs'
3614
3615 INTEGER :: batch_size, handle, i_img, i_ri, i_spin, &
3616 i_xyz, ib, nblks_ao, nblks_ri, nimg, &
3617 nspins, pdims(3)
3618 INTEGER(int_8) :: nze
3619 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3620 bsizes_stack, dist1, dist2, dist3, &
3621 dist_stack, idx_to_at
3622 REAL(dp) :: occ
3623 TYPE(dbt_distribution_type) :: t_dist
3624 TYPE(dbt_pgrid_type) :: pgrid
3625 TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3626
3627 CALL timeset(routinen, handle)
3628
3629 !We use intermediate tensors with larger block size for more optimized communication
3630 nblks_ri = SIZE(ri_data%bsizes_RI)
3631 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*nblks_ri))
3632 DO i_ri = 1, ri_data%ncell_RI
3633 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI(:)
3634 END DO
3635
3636 CALL dbt_get_info(ri_data%kp_t_3c_int(1), pdims=pdims)
3637 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3638
3639 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3640 pgrid, bsizes_ri_ext, ri_data%bsizes_AO, &
3641 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3642 DEALLOCATE (dist1, dist2, dist3)
3643
3644 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3645 ri_data%pgrid_2, bsizes_ri_ext, ri_data%bsizes_AO, &
3646 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3647 DEALLOCATE (dist1, dist2, dist3)
3648 CALL dbt_pgrid_destroy(pgrid)
3649
3650 !We use the 3c integrals on the subgroup as template for the derivatives
3651 nimg = ri_data%nimg
3652 DO i_xyz = 1, 3
3653 DO i_img = 1, nimg
3654 CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_ao_sub(i_img, i_xyz))
3655 CALL get_tensor_occupancy(t_3c_der_ao(i_img, i_xyz), nze, occ)
3656 IF (nze == 0) cycle
3657
3658 CALL dbt_copy(t_3c_der_ao(i_img, i_xyz), work_atom_block, move_data=.true.)
3659 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3660 group_size, ngroups, para_env)
3661 CALL dbt_copy(work_atom_block_sub, t_3c_der_ao_sub(i_img, i_xyz), move_data=.true.)
3662 END DO
3663
3664 DO i_img = 1, nimg
3665 CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_ri_sub(i_img, i_xyz))
3666 CALL get_tensor_occupancy(t_3c_der_ri(i_img, i_xyz), nze, occ)
3667 IF (nze == 0) cycle
3668
3669 CALL dbt_copy(t_3c_der_ri(i_img, i_xyz), work_atom_block, move_data=.true.)
3670 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3671 group_size, ngroups, para_env)
3672 CALL dbt_copy(work_atom_block_sub, t_3c_der_ri_sub(i_img, i_xyz), move_data=.true.)
3673 END DO
3674
3675 DO i_img = 1, nimg
3676 CALL dbt_destroy(t_3c_der_ri(i_img, i_xyz))
3677 CALL dbt_destroy(t_3c_der_ao(i_img, i_xyz))
3678 END DO
3679 END DO
3680 CALL dbt_destroy(work_atom_block_sub)
3681 CALL dbt_destroy(work_atom_block)
3682
3683 !Deal with t_3c_apc
3684 nblks_ri = SIZE(ri_data%bsizes_RI_split)
3685 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks_ri))
3686 DO i_ri = 1, ri_data%ncell_RI
3687 bsizes_ri_ext_split((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
3688 END DO
3689
3690 pdims = 0
3691 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3692 tensor_dims=[1, SIZE(bsizes_ri_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3693
3694 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3695 pgrid, ri_data%bsizes_AO, bsizes_ri_ext, &
3696 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3697 DEALLOCATE (dist1, dist2, dist3)
3698
3699 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3700 ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_ri_ext, &
3701 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3702 DEALLOCATE (dist1, dist2, dist3)
3703
3704 CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3705 pgrid, ri_data%bsizes_AO_split, bsizes_ri_ext_split, &
3706 ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3707 DEALLOCATE (dist1, dist2, dist3)
3708
3709 ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3710 CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3711 nspins = SIZE(t_3c_apc, 1)
3712 DO i_img = 1, nimg
3713 DO i_spin = 1, nspins
3714 CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3715 CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3716 IF (nze == 0) cycle
3717 CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.true.)
3718 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, group_size, &
3719 ngroups, para_env, ri_data%iatom_to_subgroup, 1, idx_to_at)
3720 CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.true.)
3721 END DO
3722 DO i_spin = 1, nspins
3723 CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3724 END DO
3725 END DO
3726 CALL dbt_destroy(tmp)
3727 CALL dbt_destroy(work_atom_block)
3728 CALL dbt_destroy(work_atom_block_sub)
3729 CALL dbt_pgrid_destroy(pgrid)
3730
3731 !t_3c_work_3 based on structure of 3c integrals/derivs
3732 batch_size = ri_data%kp_stack_size
3733 nblks_ao = SIZE(ri_data%bsizes_AO_split)
3734 ALLOCATE (bsizes_stack(batch_size*nblks_ao))
3735 DO ib = 1, batch_size
3736 bsizes_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = ri_data%bsizes_AO_split(:)
3737 END DO
3738
3739 ALLOCATE (dist1(ri_data%ncell_RI*nblks_ri), dist2(nblks_ao), dist3(nblks_ao))
3740 CALL dbt_get_info(ri_data%kp_t_3c_int(1), proc_dist_1=dist1, proc_dist_2=dist2, &
3741 proc_dist_3=dist3, pdims=pdims)
3742
3743 ALLOCATE (dist_stack(batch_size*nblks_ao))
3744 DO ib = 1, batch_size
3745 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3746 END DO
3747
3748 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3749 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3750 CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3751 bsizes_ri_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3752 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3753 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3754 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(4))
3755 CALL dbt_distribution_destroy(t_dist)
3756 CALL dbt_pgrid_destroy(pgrid)
3757 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3758
3759 !the derivatives are stacked in the same way
3760 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(1))
3761 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(2))
3762 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(3))
3763 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(4))
3764 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(5))
3765 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(6))
3766
3767 !t_3c_work_2 based on structure of t_3c_apc
3768 ALLOCATE (dist1(nblks_ao), dist2(ri_data%ncell_RI*nblks_ri), dist3(nblks_ao))
3769 CALL dbt_get_info(t_3c_apc_sub(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, &
3770 proc_dist_3=dist3, pdims=pdims)
3771
3772 ALLOCATE (dist_stack(batch_size*nblks_ao))
3773 DO ib = 1, batch_size
3774 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3775 END DO
3776
3777 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3778 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3779 CALL dbt_create(t_3c_work_2(1), "work_3_stack", t_dist, [1], [2, 3], &
3780 ri_data%bsizes_AO_split, bsizes_ri_ext_split, bsizes_stack)
3781 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3782 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3783 CALL dbt_distribution_destroy(t_dist)
3784 CALL dbt_pgrid_destroy(pgrid)
3785 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3786
3787 CALL timestop(handle)
3788
3789 END SUBROUTINE get_subgroup_3c_derivs
3790
3791! **************************************************************************************************
3792!> \brief A routine that reorders the t_3c_int tensors such that all items which are fully empty
3793!> are bunched together. This way, we can get much more efficient screening based on NZE
3794!> \param t_3c_ints ...
3795!> \param ri_data ...
3796! **************************************************************************************************
3797 SUBROUTINE reorder_3c_ints(t_3c_ints, ri_data)
3798 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_ints
3799 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3800
3801 CHARACTER(LEN=*), PARAMETER :: routinen = 'reorder_3c_ints'
3802
3803 INTEGER :: handle, i_img, idx, idx_empty, idx_full, &
3804 nimg
3805 INTEGER(int_8) :: nze
3806 REAL(dp) :: occ
3807 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
3808
3809 CALL timeset(routinen, handle)
3810
3811 nimg = ri_data%nimg
3812 ALLOCATE (t_3c_tmp(nimg))
3813 DO i_img = 1, nimg
3814 CALL dbt_create(t_3c_ints(i_img), t_3c_tmp(i_img))
3815 CALL dbt_copy(t_3c_ints(i_img), t_3c_tmp(i_img), move_data=.true.)
3816 END DO
3817
3818 !Loop over the images, check if ints have NZE == 0, and put them at the start or end of the
3819 !initial tensor array. Keep the mapping in an array
3820 ALLOCATE (ri_data%idx_to_img(nimg))
3821 idx_full = 0
3822 idx_empty = nimg + 1
3823
3824 DO i_img = 1, nimg
3825 CALL get_tensor_occupancy(t_3c_tmp(i_img), nze, occ)
3826 IF (nze == 0) THEN
3827 idx_empty = idx_empty - 1
3828 CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_empty), move_data=.true.)
3829 ri_data%idx_to_img(idx_empty) = i_img
3830 ELSE
3831 idx_full = idx_full + 1
3832 CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_full), move_data=.true.)
3833 ri_data%idx_to_img(idx_full) = i_img
3834 END IF
3835 CALL dbt_destroy(t_3c_tmp(i_img))
3836 END DO
3837
3838 !store the highest image index with non-zero integrals
3839 ri_data%nimg_nze = idx_full
3840
3841 ALLOCATE (ri_data%img_to_idx(nimg))
3842 DO idx = 1, nimg
3843 ri_data%img_to_idx(ri_data%idx_to_img(idx)) = idx
3844 END DO
3845
3846 CALL timestop(handle)
3847
3848 END SUBROUTINE reorder_3c_ints
3849
3850! **************************************************************************************************
3851!> \brief A routine that reorders the 3c derivatives, the same way that the integrals are, also to
3852!> increase efficiency of screening
3853!> \param t_3c_derivs ...
3854!> \param ri_data ...
3855! **************************************************************************************************
3856 SUBROUTINE reorder_3c_derivs(t_3c_derivs, ri_data)
3857 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_derivs
3858 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3859
3860 CHARACTER(LEN=*), PARAMETER :: routinen = 'reorder_3c_derivs'
3861
3862 INTEGER :: handle, i_img, i_xyz, idx, nimg
3863 INTEGER(int_8) :: nze
3864 REAL(dp) :: occ
3865 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
3866
3867 CALL timeset(routinen, handle)
3868
3869 nimg = ri_data%nimg
3870 ALLOCATE (t_3c_tmp(nimg))
3871 DO i_img = 1, nimg
3872 CALL dbt_create(t_3c_derivs(1, 1), t_3c_tmp(i_img))
3873 END DO
3874
3875 DO i_xyz = 1, 3
3876 DO i_img = 1, nimg
3877 CALL dbt_copy(t_3c_derivs(i_img, i_xyz), t_3c_tmp(i_img), move_data=.true.)
3878 END DO
3879 DO i_img = 1, nimg
3880 idx = ri_data%img_to_idx(i_img)
3881 CALL dbt_copy(t_3c_tmp(i_img), t_3c_derivs(idx, i_xyz), move_data=.true.)
3882 CALL get_tensor_occupancy(t_3c_derivs(idx, i_xyz), nze, occ)
3883 IF (nze > 0) ri_data%nimg_nze = max(idx, ri_data%nimg_nze)
3884 END DO
3885 END DO
3886
3887 DO i_img = 1, nimg
3888 CALL dbt_destroy(t_3c_tmp(i_img))
3889 END DO
3890
3891 CALL timestop(handle)
3892
3893 END SUBROUTINE reorder_3c_derivs
3894
3895! **************************************************************************************************
3896!> \brief Get the sparsity pattern related to the non-symmetric AO basis overlap neighbor list
3897!> \param pattern ...
3898!> \param ri_data ...
3899!> \param qs_env ...
3900! **************************************************************************************************
3901 SUBROUTINE get_sparsity_pattern(pattern, ri_data, qs_env)
3902 INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: pattern
3903 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3904 TYPE(qs_environment_type), POINTER :: qs_env
3905
3906 INTEGER :: iatom, j_img, jatom, mj_img, natom, nimg
3907 INTEGER, ALLOCATABLE, DIMENSION(:) :: bins
3908 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: tmp_pattern
3909 INTEGER, DIMENSION(3) :: cell_j
3910 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
3911 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
3912 TYPE(dft_control_type), POINTER :: dft_control
3913 TYPE(kpoint_type), POINTER :: kpoints
3914 TYPE(mp_para_env_type), POINTER :: para_env
3916 DIMENSION(:), POINTER :: nl_iterator
3917 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3918 POINTER :: nl_2c
3919
3920 NULLIFY (nl_2c, nl_iterator, kpoints, cell_to_index, dft_control, index_to_cell, para_env)
3921
3922 CALL get_qs_env(qs_env, kpoints=kpoints, dft_control=dft_control, para_env=para_env, natom=natom)
3923 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell, sab_nl=nl_2c)
3924
3925 nimg = ri_data%nimg
3926 pattern(:, :, :) = 0
3927
3928 !We use the symmetric nl for all images that have an opposite cell
3929 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
3930 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
3931 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
3932
3933 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
3934 IF (j_img > nimg .OR. j_img < 1) cycle
3935
3936 mj_img = get_opp_index(j_img, qs_env)
3937 IF (mj_img > nimg .OR. mj_img < 1) cycle
3938
3939 IF (ri_data%present_images(j_img) == 0) cycle
3940
3941 pattern(iatom, jatom, j_img) = 1
3942 END DO
3943 CALL neighbor_list_iterator_release(nl_iterator)
3944
3945 !If there is no opposite cell present, then we take into account the non-symmetric nl
3946 CALL get_kpoint_info(kpoints, sab_nl_nosym=nl_2c)
3947
3948 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
3949 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
3950 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
3951
3952 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
3953 IF (j_img > nimg .OR. j_img < 1) cycle
3954
3955 mj_img = get_opp_index(j_img, qs_env)
3956 IF (mj_img .LE. nimg .AND. mj_img > 0) cycle
3957
3958 IF (ri_data%present_images(j_img) == 0) cycle
3959
3960 pattern(iatom, jatom, j_img) = 1
3961 END DO
3962 CALL neighbor_list_iterator_release(nl_iterator)
3963
3964 CALL para_env%sum(pattern)
3965
3966 !If the opposite image is considered, then there is no need to compute diagonal twice
3967 DO j_img = 2, nimg
3968 DO iatom = 1, natom
3969 IF (pattern(iatom, iatom, j_img) .NE. 0) THEN
3970 mj_img = get_opp_index(j_img, qs_env)
3971 IF (mj_img > nimg .OR. mj_img < 1) cycle
3972 pattern(iatom, iatom, mj_img) = 0
3973 END IF
3974 END DO
3975 END DO
3976
3977 ! We want to equilibrate the sparsity pattern such that there are same amount of blocks
3978 ! for each atom i of i,j pairs
3979 ALLOCATE (bins(natom))
3980 bins(:) = 0
3981
3982 ALLOCATE (tmp_pattern(natom, natom, nimg))
3983 tmp_pattern(:, :, :) = 0
3984 DO j_img = 1, nimg
3985 DO jatom = 1, natom
3986 DO iatom = 1, natom
3987 IF (pattern(iatom, jatom, j_img) == 0) cycle
3988 mj_img = get_opp_index(j_img, qs_env)
3989
3990 !Should we take the i,j,b or th j,i,-b atomic block?
3991 IF (mj_img > nimg .OR. mj_img < 1) THEN
3992 !No opposite image, no choice
3993 bins(iatom) = bins(iatom) + 1
3994 tmp_pattern(iatom, jatom, j_img) = 1
3995 ELSE
3996
3997 IF (bins(iatom) > bins(jatom)) THEN
3998 bins(jatom) = bins(jatom) + 1
3999 tmp_pattern(jatom, iatom, mj_img) = 1
4000 ELSE
4001 bins(iatom) = bins(iatom) + 1
4002 tmp_pattern(iatom, jatom, j_img) = 1
4003 END IF
4004 END IF
4005 END DO
4006 END DO
4007 END DO
4008
4009 ! -1 => unoccupied, 0 => occupied
4010 pattern(:, :, :) = tmp_pattern(:, :, :) - 1
4011
4012 END SUBROUTINE get_sparsity_pattern
4013
4014! **************************************************************************************************
4015!> \brief Distribute the iatom, jatom, b_img triplet over the subgroupd to spread the load
4016!> the group id for each triplet is passed as the value of sparsity_pattern(i, j, b),
4017!> with -1 being an unoccupied block
4018!> \param sparsity_pattern ...
4019!> \param ngroups ...
4020!> \param ri_data ...
4021! **************************************************************************************************
4022 SUBROUTINE get_sub_dist(sparsity_pattern, ngroups, ri_data)
4023 INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: sparsity_pattern
4024 INTEGER, INTENT(IN) :: ngroups
4025 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4026
4027 INTEGER :: b_img, ctr, iat, iatom, igroup, jatom, &
4028 natom, nimg, ub
4029 INTEGER, ALLOCATABLE, DIMENSION(:) :: max_at_per_group
4030 REAL(dp) :: cost
4031 REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4032
4033 natom = SIZE(sparsity_pattern, 2)
4034 nimg = SIZE(sparsity_pattern, 3)
4035
4036 !To avoid unnecessary data replication accross the subgroups, we want to have a limited number
4037 !of subgroup with the data of a given iatom. At the minimum, all groups have 1 atom
4038 !We assume that the cost associated to each iatom is roughly the same
4039 IF (.NOT. ALLOCATED(ri_data%iatom_to_subgroup)) THEN
4040 ALLOCATE (ri_data%iatom_to_subgroup(natom), max_at_per_group(ngroups))
4041 DO iatom = 1, natom
4042 NULLIFY (ri_data%iatom_to_subgroup(iatom)%array)
4043 ALLOCATE (ri_data%iatom_to_subgroup(iatom)%array(ngroups))
4044 ri_data%iatom_to_subgroup(iatom)%array(:) = .false.
4045 END DO
4046
4047 ub = natom/ngroups
4048 IF (ub*ngroups < natom) ub = ub + 1
4049 max_at_per_group(:) = max(1, ub)
4050
4051 !We want each atom to be present the same amount of times. Some groups might have more atoms
4052 !than other to achieve this.
4053 ctr = 0
4054 DO WHILE (modulo(sum(max_at_per_group), natom) .NE. 0)
4055 igroup = modulo(ctr, ngroups) + 1
4056 max_at_per_group(igroup) = max_at_per_group(igroup) + 1
4057 ctr = ctr + 1
4058 END DO
4059
4060 ctr = 0
4061 DO igroup = 1, ngroups
4062 DO iat = 1, max_at_per_group(igroup)
4063 iatom = modulo(ctr, natom) + 1
4064 ri_data%iatom_to_subgroup(iatom)%array(igroup) = .true.
4065 ctr = ctr + 1
4066 END DO
4067 END DO
4068 END IF
4069
4070 ALLOCATE (bins(ngroups))
4071 bins = 0.0_dp
4072 DO b_img = 1, nimg
4073 DO jatom = 1, natom
4074 DO iatom = 1, natom
4075 IF (sparsity_pattern(iatom, jatom, b_img) == -1) cycle
4076 igroup = minloc(bins, 1, mask=ri_data%iatom_to_subgroup(iatom)%array) - 1
4077
4078 !Use cost information from previous SCF if available
4079 IF (any(ri_data%kp_cost > epsilon(0.0_dp))) THEN
4080 cost = ri_data%kp_cost(iatom, jatom, b_img)
4081 ELSE
4082 cost = real(ri_data%bsizes_AO(iatom)*ri_data%bsizes_AO(jatom), dp)
4083 END IF
4084 bins(igroup + 1) = bins(igroup + 1) + cost
4085 sparsity_pattern(iatom, jatom, b_img) = igroup
4086 END DO
4087 END DO
4088 END DO
4089
4090 END SUBROUTINE get_sub_dist
4091
4092! **************************************************************************************************
4093!> \brief A rouine that updates the sparsity pattern for force calculation, where all i,j,b combinations
4094!> are visited.
4095!> \param force_pattern ...
4096!> \param scf_pattern ...
4097!> \param ngroups ...
4098!> \param ri_data ...
4099!> \param qs_env ...
4100! **************************************************************************************************
4101 SUBROUTINE update_pattern_to_forces(force_pattern, scf_pattern, ngroups, ri_data, qs_env)
4102 INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: force_pattern, scf_pattern
4103 INTEGER, INTENT(IN) :: ngroups
4104 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4105 TYPE(qs_environment_type), POINTER :: qs_env
4106
4107 INTEGER :: b_img, iatom, igroup, jatom, mb_img, &
4108 natom, nimg
4109 REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4110
4111 natom = SIZE(scf_pattern, 2)
4112 nimg = SIZE(scf_pattern, 3)
4113
4114 ALLOCATE (bins(ngroups))
4115 bins = 0.0_dp
4116
4117 DO b_img = 1, nimg
4118 mb_img = get_opp_index(b_img, qs_env)
4119 DO jatom = 1, natom
4120 DO iatom = 1, natom
4121 !Important: same distribution as KS matrix, because reuse t_3c_apc
4122 igroup = minloc(bins, 1, mask=ri_data%iatom_to_subgroup(iatom)%array) - 1
4123
4124 !check that block not already treated
4125 IF (scf_pattern(iatom, jatom, b_img) > -1) cycle
4126
4127 !If not, take the cost of block j, i, -b (same energy contribution)
4128 IF (mb_img > 0 .AND. mb_img .LE. nimg) THEN
4129 IF (scf_pattern(jatom, iatom, mb_img) == -1) cycle
4130 bins(igroup + 1) = bins(igroup + 1) + ri_data%kp_cost(jatom, iatom, mb_img)
4131 force_pattern(iatom, jatom, b_img) = igroup
4132 END IF
4133 END DO
4134 END DO
4135 END DO
4136
4137 END SUBROUTINE update_pattern_to_forces
4138
4139! **************************************************************************************************
4140!> \brief A routine that determines the extend of the KP RI-HFX periodic images, including for the
4141!> extension of the RI basis
4142!> \param ri_data ...
4143!> \param qs_env ...
4144! **************************************************************************************************
4145 SUBROUTINE get_kp_and_ri_images(ri_data, qs_env)
4146 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4147 TYPE(qs_environment_type), POINTER :: qs_env
4148
4149 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_kp_and_ri_images'
4150
4151 INTEGER :: cell_j(3), cell_k(3), handle, i_img, iatom, ikind, j_img, jatom, jcell, katom, &
4152 kcell, kp_index_lbounds(3), kp_index_ubounds(3), natom, ngroups, nimg, nkind, pcoord(3), &
4153 pdims(3)
4154 INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_ao_1, dist_ao_2, dist_ri, &
4155 nri_per_atom, present_img, ri_cells
4156 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
4157 REAL(dp) :: bump_fact, dij, dik, image_range, &
4158 ri_range, rij(3), rik(3)
4159 TYPE(dbt_type) :: t_dummy
4160 TYPE(dft_control_type), POINTER :: dft_control
4161 TYPE(distribution_2d_type), POINTER :: dist_2d
4162 TYPE(distribution_3d_type) :: dist_3d
4163 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4164 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
4165 TYPE(kpoint_type), POINTER :: kpoints
4166 TYPE(mp_cart_type) :: mp_comm_t3c
4167 TYPE(mp_para_env_type), POINTER :: para_env
4168 TYPE(neighbor_list_3c_iterator_type) :: nl_3c_iter
4169 TYPE(neighbor_list_3c_type) :: nl_3c
4171 DIMENSION(:), POINTER :: nl_iterator
4172 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4173 POINTER :: nl_2c
4174 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4175 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4176 TYPE(section_vals_type), POINTER :: hfx_section
4177
4178 NULLIFY (qs_kind_set, dist_2d, nl_2c, nl_iterator, dft_control, &
4179 particle_set, kpoints, para_env, cell_to_index, hfx_section)
4180
4181 CALL timeset(routinen, handle)
4182
4183 CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, &
4184 dft_control=dft_control, particle_set=particle_set, kpoints=kpoints, &
4185 para_env=para_env, natom=natom)
4186 nimg = dft_control%nimages
4187 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
4188 kp_index_lbounds = lbound(cell_to_index)
4189 kp_index_ubounds = ubound(cell_to_index)
4190
4191 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
4192 CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
4193
4194 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
4195 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
4196 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
4197
4198 !In case of shortrange HFX potential, it is imprtant to be consistent with the rest of the KP
4199 !code, and use EPS_SCHWARZ to determine the range (rather than eps_filter_2c in normal RI-HFX)
4200 IF (ri_data%hfx_pot%potential_type == do_potential_short) THEN
4201 CALL erfc_cutoff(ri_data%eps_schwarz, ri_data%hfx_pot%omega, ri_data%hfx_pot%cutoff_radius)
4202 END IF
4203
4204 !Determine the range for contributing periodic images, and for the RI basis extension
4205 ri_data%kp_RI_range = 0.0_dp
4206 ri_data%kp_image_range = 0.0_dp
4207 DO ikind = 1, nkind
4208
4209 CALL init_interaction_radii_orb_basis(basis_set_ao(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4210 CALL get_gto_basis_set(basis_set_ao(ikind)%gto_basis_set, kind_radius=ri_range)
4211 ri_data%kp_RI_range = max(ri_range, ri_data%kp_RI_range)
4212
4213 CALL init_interaction_radii_orb_basis(basis_set_ao(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4214 CALL init_interaction_radii_orb_basis(basis_set_ri(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4215 CALL get_gto_basis_set(basis_set_ri(ikind)%gto_basis_set, kind_radius=image_range)
4216
4217 image_range = 2.0_dp*image_range + cutoff_screen_factor*ri_data%hfx_pot%cutoff_radius
4218 ri_data%kp_image_range = max(image_range, ri_data%kp_image_range)
4219 END DO
4220
4221 CALL section_vals_val_get(hfx_section, "KP_RI_BUMP_FACTOR", r_val=bump_fact)
4222 ri_data%kp_bump_rad = bump_fact*ri_data%kp_RI_range
4223
4224 !For the extent of the KP RI-HFX images, we are limited by the RI-HFX potential in
4225 !(mu^0 sigma^a|P^0) (P^0|Q^b) (Q^b|nu^b lambda^a+c), if there is no contact between
4226 !any P^0 and Q^b, then image b does not contribute
4227 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
4228 "HFX_2c_nl_RI", qs_env, sym_ij=.false., dist_2d=dist_2d)
4229
4230 ALLOCATE (present_img(nimg))
4231 present_img = 0
4232 ri_data%nimg = 0
4233 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4234 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4235 CALL get_iterator_info(nl_iterator, r=rij, cell=cell_j)
4236
4237 dij = norm2(rij)
4238
4239 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4240 IF (j_img > nimg .OR. j_img < 1) cycle
4241
4242 IF (dij > ri_data%kp_image_range) cycle
4243
4244 ri_data%nimg = max(j_img, ri_data%nimg)
4245 present_img(j_img) = 1
4246
4247 END DO
4248 CALL neighbor_list_iterator_release(nl_iterator)
4249 CALL release_neighbor_list_sets(nl_2c)
4250 CALL para_env%max(ri_data%nimg)
4251 IF (ri_data%nimg > nimg) &
4252 cpabort("Make sure the smallest exponent of the RI-HFX basis is larger than that of the ORB basis.")
4253
4254 !Keep track of which images will not contribute, so that can be ignored before calculation
4255 CALL para_env%sum(present_img)
4256 ALLOCATE (ri_data%present_images(ri_data%nimg))
4257 ri_data%present_images = 0
4258 DO i_img = 1, ri_data%nimg
4259 IF (present_img(i_img) > 0) ri_data%present_images(i_img) = 1
4260 END DO
4261
4262 CALL create_3c_tensor(t_dummy, dist_ao_1, dist_ao_2, dist_ri, &
4263 ri_data%pgrid, ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4264 map1=[1, 2], map2=[3], name="(AO AO | RI)")
4265
4266 CALL dbt_mp_environ_pgrid(ri_data%pgrid, pdims, pcoord)
4267 CALL mp_comm_t3c%create(ri_data%pgrid%mp_comm_2d, 3, pdims)
4268 CALL distribution_3d_create(dist_3d, dist_ao_1, dist_ao_2, dist_ri, &
4269 nkind, particle_set, mp_comm_t3c, own_comm=.true.)
4270 DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
4271 CALL dbt_destroy(t_dummy)
4272
4273 !For the extension of the RI basis P in (mu^0 sigma^a |P^i), we consider an atom if the distance,
4274 !between mu^0 and P^i if smaller or equal to the kind radius of mu^0
4275 CALL build_3c_neighbor_lists(nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, dist_3d, &
4276 ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.false., &
4277 own_dist=.true.)
4278
4279 ALLOCATE (ri_cells(nimg))
4280 ri_cells = 0
4281
4282 ALLOCATE (nri_per_atom(natom))
4283 nri_per_atom = 0
4284
4285 CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
4286 DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
4287 CALL get_3c_iterator_info(nl_3c_iter, cell_k=cell_k, rik=rik, cell_j=cell_j, &
4288 iatom=iatom, jatom=jatom, katom=katom)
4289 dik = norm2(rik)
4290
4291 IF (any([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
4292 any([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) cycle
4293
4294 jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4295 IF (jcell > nimg .OR. jcell < 1) cycle
4296
4297 IF (any([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
4298 any([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) cycle
4299
4300 kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
4301 IF (kcell > nimg .OR. kcell < 1) cycle
4302
4303 IF (dik > ri_data%kp_RI_range) cycle
4304 ri_cells(kcell) = 1
4305
4306 IF (jcell == 1 .AND. iatom == jatom) nri_per_atom(iatom) = nri_per_atom(iatom) + ri_data%bsizes_RI(katom)
4307 END DO
4308 CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
4309 CALL neighbor_list_3c_destroy(nl_3c)
4310 CALL para_env%sum(ri_cells)
4311 CALL para_env%sum(nri_per_atom)
4312
4313 ALLOCATE (ri_data%img_to_RI_cell(nimg))
4314 ri_data%ncell_RI = 0
4315 ri_data%img_to_RI_cell = 0
4316 DO i_img = 1, nimg
4317 IF (ri_cells(i_img) > 0) THEN
4318 ri_data%ncell_RI = ri_data%ncell_RI + 1
4319 ri_data%img_to_RI_cell(i_img) = ri_data%ncell_RI
4320 END IF
4321 END DO
4322
4323 ALLOCATE (ri_data%RI_cell_to_img(ri_data%ncell_RI))
4324 DO i_img = 1, nimg
4325 IF (ri_data%img_to_RI_cell(i_img) > 0) ri_data%RI_cell_to_img(ri_data%img_to_RI_cell(i_img)) = i_img
4326 END DO
4327
4328 !Print some info
4329 IF (ri_data%unit_nr > 0) THEN
4330 WRITE (ri_data%unit_nr, fmt="(/T3,A,I29)") &
4331 "KP-HFX_RI_INFO| Number of RI-KP parallel groups:", ngroups
4332 WRITE (ri_data%unit_nr, fmt="(T3,A,F31.3,A)") &
4333 "KP-HFX_RI_INFO| RI basis extension radius:", ri_data%kp_RI_range*angstrom, " Ang"
4334 WRITE (ri_data%unit_nr, fmt="(T3,A,F12.3,A, F6.3, A)") &
4335 "KP-HFX_RI_INFO| RI basis bump factor and bump radius:", bump_fact, " /", &
4336 ri_data%kp_bump_rad*angstrom, " Ang"
4337 WRITE (ri_data%unit_nr, fmt="(T3,A,I16,A)") &
4338 "KP-HFX_RI_INFO| The extended RI bases cover up to ", ri_data%ncell_RI, " unit cells"
4339 WRITE (ri_data%unit_nr, fmt="(T3,A,I18)") &
4340 "KP-HFX_RI_INFO| Average number of sgf in extended RI bases:", sum(nri_per_atom)/natom
4341 WRITE (ri_data%unit_nr, fmt="(T3,A,F13.3,A)") &
4342 "KP-HFX_RI_INFO| Consider all image cells within a radius of ", ri_data%kp_image_range*angstrom, " Ang"
4343 WRITE (ri_data%unit_nr, fmt="(T3,A,I27/)") &
4344 "KP-HFX_RI_INFO| Number of image cells considered: ", ri_data%nimg
4345 CALL m_flush(ri_data%unit_nr)
4346 END IF
4347
4348 CALL timestop(handle)
4349
4350 END SUBROUTINE get_kp_and_ri_images
4351
4352! **************************************************************************************************
4353!> \brief A routine that creates tensors structure for rho_ao and 3c_ints in a stacked format for
4354!> the efficient contractions of rho_sigma^0,lambda^c * (mu^0 sigam^a | P) => TAS tensors
4355!> \param res_stack ...
4356!> \param rho_stack ...
4357!> \param ints_stack ...
4358!> \param rho_template ...
4359!> \param ints_template ...
4360!> \param stack_size ...
4361!> \param ri_data ...
4362!> \param qs_env ...
4363!> \note The result tensor has the exact same shape and distribution as the integral tensor
4364! **************************************************************************************************
4365 SUBROUTINE get_stack_tensors(res_stack, rho_stack, ints_stack, rho_template, ints_template, &
4366 stack_size, ri_data, qs_env)
4367 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: res_stack, rho_stack, ints_stack
4368 TYPE(dbt_type), INTENT(INOUT) :: rho_template, ints_template
4369 INTEGER, INTENT(IN) :: stack_size
4370 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4371 TYPE(qs_environment_type), POINTER :: qs_env
4372
4373 INTEGER :: is, nblks, nblks_3c(3), pdims_3d(3)
4374 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_stack, dist1, &
4375 dist2, dist3, dist_stack1, &
4376 dist_stack2, dist_stack3
4377 TYPE(dbt_distribution_type) :: t_dist
4378 TYPE(dbt_pgrid_type) :: pgrid
4379 TYPE(mp_para_env_type), POINTER :: para_env
4380
4381 NULLIFY (para_env)
4382
4383 CALL get_qs_env(qs_env, para_env=para_env)
4384
4385 nblks = SIZE(ri_data%bsizes_AO_split)
4386 ALLOCATE (bsizes_stack(stack_size*nblks))
4387 DO is = 1, stack_size
4388 bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
4389 END DO
4390
4391 ALLOCATE (dist1(nblks), dist2(nblks), dist_stack1(stack_size*nblks), dist_stack2(stack_size*nblks))
4392 CALL dbt_get_info(rho_template, proc_dist_1=dist1, proc_dist_2=dist2)
4393 DO is = 1, stack_size
4394 dist_stack1((is - 1)*nblks + 1:is*nblks) = dist1(:)
4395 dist_stack2((is - 1)*nblks + 1:is*nblks) = dist2(:)
4396 END DO
4397
4398 !First 2c tensor matches the distribution of template
4399 !It is stacked in both directions
4400 CALL dbt_distribution_new(t_dist, ri_data%pgrid_2d, dist_stack1, dist_stack2)
4401 CALL dbt_create(rho_stack(1), "RHO_stack", t_dist, [1], [2], bsizes_stack, bsizes_stack)
4402 CALL dbt_distribution_destroy(t_dist)
4403 DEALLOCATE (dist1, dist2, dist_stack1, dist_stack2)
4404
4405 !Second 2c tensor has optimal distribution on the 2d pgrid
4406 CALL create_2c_tensor(rho_stack(2), dist1, dist2, ri_data%pgrid_2d, bsizes_stack, bsizes_stack, name="RHO_stack")
4407 DEALLOCATE (dist1, dist2)
4408
4409 CALL dbt_get_info(ints_template, nblks_total=nblks_3c)
4410 ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)))
4411 ALLOCATE (dist_stack3(stack_size*nblks_3c(3)), bsizes_ri_ext(nblks_3c(2)))
4412 CALL dbt_get_info(ints_template, proc_dist_1=dist1, proc_dist_2=dist2, &
4413 proc_dist_3=dist3, blk_size_2=bsizes_ri_ext)
4414 DO is = 1, stack_size
4415 dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
4416 END DO
4417
4418 !First 3c tensor matches the distribution of template
4419 CALL dbt_distribution_new(t_dist, ri_data%pgrid_1, dist1, dist2, dist_stack3)
4420 CALL dbt_create(ints_stack(1), "ints_stack", t_dist, [1, 2], [3], ri_data%bsizes_AO_split, &
4421 bsizes_ri_ext, bsizes_stack)
4422 CALL dbt_distribution_destroy(t_dist)
4423 DEALLOCATE (dist1, dist2, dist3, dist_stack3)
4424
4425 !Second 3c tensor has optimal pgrid
4426 pdims_3d = 0
4427 CALL dbt_pgrid_create(para_env, pdims_3d, pgrid, tensor_dims=[nblks_3c(1), nblks_3c(2), stack_size*nblks_3c(3)])
4428 CALL create_3c_tensor(ints_stack(2), dist1, dist2, dist3, pgrid, ri_data%bsizes_AO_split, &
4429 bsizes_ri_ext, bsizes_stack, [1, 2], [3], name="ints_stack")
4430 DEALLOCATE (dist1, dist2, dist3)
4431 CALL dbt_pgrid_destroy(pgrid)
4432
4433 !The result tensor has the same shape and dist as the integral tensor
4434 CALL dbt_create(ints_stack(1), res_stack(1))
4435 CALL dbt_create(ints_stack(2), res_stack(2))
4436
4437 END SUBROUTINE get_stack_tensors
4438
4439! **************************************************************************************************
4440!> \brief Fill the stack of 3c tensors accrding to the order in the images input
4441!> \param t_3c_stack ...
4442!> \param t_3c_in ...
4443!> \param images ...
4444!> \param stack_dim ...
4445!> \param ri_data ...
4446!> \param filter_at ...
4447!> \param filter_dim ...
4448!> \param idx_to_at ...
4449!> \param img_bounds ...
4450! **************************************************************************************************
4451 SUBROUTINE fill_3c_stack(t_3c_stack, t_3c_in, images, stack_dim, ri_data, filter_at, filter_dim, &
4452 idx_to_at, img_bounds)
4453 TYPE(dbt_type), INTENT(INOUT) :: t_3c_stack
4454 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_in
4455 INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4456 INTEGER, INTENT(IN) :: stack_dim
4457 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4458 INTEGER, INTENT(IN), OPTIONAL :: filter_at, filter_dim
4459 INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: idx_to_at
4460 INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2)
4461
4462 INTEGER :: dest(3), i_img, idx, ind(3), lb, nblks, &
4463 nimg, offset, ub
4464 LOGICAL :: do_filter, found
4465 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4466 TYPE(dbt_iterator_type) :: iter
4467
4468 !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4469 !in the stack tensor (corresponding to pair index). Distributions match by construction
4470 nimg = ri_data%nimg
4471 nblks = SIZE(ri_data%bsizes_AO_split)
4472
4473 do_filter = .false.
4474 IF (PRESENT(filter_at) .AND. PRESENT(filter_dim) .AND. PRESENT(idx_to_at)) do_filter = .true.
4475
4476 lb = 1
4477 ub = nimg
4478 offset = 0
4479 IF (PRESENT(img_bounds)) THEN
4480 lb = img_bounds(1)
4481 ub = img_bounds(2) - 1
4482 offset = lb - 1
4483 END IF
4484
4485 DO idx = lb, ub
4486 i_img = images(idx)
4487 IF (i_img == 0 .OR. i_img > nimg) cycle
4488
4489!$OMP PARALLEL DEFAULT(NONE) &
4490!$OMP SHARED(idx,i_img,t_3c_in,t_3c_stack,nblks,stack_dim,filter_at,filter_dim,idx_to_at,do_filter,offset) &
4491!$OMP PRIVATE(iter,ind,blk,found,dest)
4492 CALL dbt_iterator_start(iter, t_3c_in(i_img))
4493 DO WHILE (dbt_iterator_blocks_left(iter))
4494 CALL dbt_iterator_next_block(iter, ind)
4495 CALL dbt_get_block(t_3c_in(i_img), ind, blk, found)
4496 IF (.NOT. found) cycle
4497
4498 IF (do_filter) THEN
4499 IF (.NOT. idx_to_at(ind(filter_dim)) == filter_at) cycle
4500 END IF
4501
4502 IF (stack_dim == 1) THEN
4503 dest = [(idx - offset - 1)*nblks + ind(1), ind(2), ind(3)]
4504 ELSE IF (stack_dim == 2) THEN
4505 dest = [ind(1), (idx - offset - 1)*nblks + ind(2), ind(3)]
4506 ELSE
4507 dest = [ind(1), ind(2), (idx - offset - 1)*nblks + ind(3)]
4508 END IF
4509
4510 CALL dbt_put_block(t_3c_stack, dest, shape(blk), blk)
4511 DEALLOCATE (blk)
4512 END DO
4513 CALL dbt_iterator_stop(iter)
4514!$OMP END PARALLEL
4515 END DO !i_img
4516 CALL dbt_finalize(t_3c_stack)
4517
4518 END SUBROUTINE fill_3c_stack
4519
4520! **************************************************************************************************
4521!> \brief Fill the stack of 2c tensors based on the content of images input
4522!> \param t_2c_stack ...
4523!> \param t_2c_in ...
4524!> \param images ...
4525!> \param stack_dim ...
4526!> \param ri_data ...
4527!> \param img_bounds ...
4528!> \param shift ...
4529! **************************************************************************************************
4530 SUBROUTINE fill_2c_stack(t_2c_stack, t_2c_in, images, stack_dim, ri_data, img_bounds, shift)
4531 TYPE(dbt_type), INTENT(INOUT) :: t_2c_stack
4532 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_in
4533 INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4534 INTEGER, INTENT(IN) :: stack_dim
4535 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4536 INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2), shift
4537
4538 INTEGER :: dest(2), i_img, idx, ind(2), lb, &
4539 my_shift, nblks, nimg, offset, ub
4540 LOGICAL :: found
4541 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
4542 TYPE(dbt_iterator_type) :: iter
4543
4544 !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4545 !in the stack tensor (corresponding to pair index). Distributions match by construction
4546 nimg = ri_data%nimg
4547 nblks = SIZE(ri_data%bsizes_AO_split)
4548
4549 lb = 1
4550 ub = nimg
4551 offset = 0
4552 IF (PRESENT(img_bounds)) THEN
4553 lb = img_bounds(1)
4554 ub = img_bounds(2) - 1
4555 offset = lb - 1
4556 END IF
4557
4558 my_shift = 1
4559 IF (PRESENT(shift)) my_shift = shift
4560
4561 DO idx = lb, ub
4562 i_img = images(idx)
4563 IF (i_img == 0 .OR. i_img > nimg) cycle
4564
4565!$OMP PARALLEL DEFAULT(NONE) SHARED(idx,i_img,t_2c_in,t_2c_stack,nblks,stack_dim,offset,my_shift) &
4566!$OMP PRIVATE(iter,ind,blk,found,dest)
4567 CALL dbt_iterator_start(iter, t_2c_in(i_img))
4568 DO WHILE (dbt_iterator_blocks_left(iter))
4569 CALL dbt_iterator_next_block(iter, ind)
4570 CALL dbt_get_block(t_2c_in(i_img), ind, blk, found)
4571 IF (.NOT. found) cycle
4572
4573 IF (stack_dim == 1) THEN
4574 dest = [(idx - offset - 1)*nblks + ind(1), (my_shift - 1)*nblks + ind(2)]
4575 ELSE
4576 dest = [(my_shift - 1)*nblks + ind(1), (idx - offset - 1)*nblks + ind(2)]
4577 END IF
4578
4579 CALL dbt_put_block(t_2c_stack, dest, shape(blk), blk)
4580 DEALLOCATE (blk)
4581 END DO
4582 CALL dbt_iterator_stop(iter)
4583!$OMP END PARALLEL
4584 END DO !idx
4585 CALL dbt_finalize(t_2c_stack)
4586
4587 END SUBROUTINE fill_2c_stack
4588
4589! **************************************************************************************************
4590!> \brief Unstacks a stacked 3c tensor containing t_3c_apc
4591!> \param t_3c_apc ...
4592!> \param t_stacked ...
4593!> \param idx ...
4594! **************************************************************************************************
4595 SUBROUTINE unstack_t_3c_apc(t_3c_apc, t_stacked, idx)
4596 TYPE(dbt_type), INTENT(INOUT) :: t_3c_apc, t_stacked
4597 INTEGER, INTENT(IN) :: idx
4598
4599 INTEGER :: current_idx
4600 INTEGER, DIMENSION(3) :: ind, nblks_3c
4601 LOGICAL :: found
4602 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4603 TYPE(dbt_iterator_type) :: iter
4604
4605 !Note: t_3c_apc and t_stacked must have the same ditribution
4606 CALL dbt_get_info(t_3c_apc, nblks_total=nblks_3c)
4607
4608!$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_apc,t_stacked,idx,nblks_3c) PRIVATE(iter,ind,blk,found,current_idx)
4609 CALL dbt_iterator_start(iter, t_stacked)
4610 DO WHILE (dbt_iterator_blocks_left(iter))
4611 CALL dbt_iterator_next_block(iter, ind)
4612
4613 !tensor is stacked along the 3rd dimension
4614 current_idx = (ind(3) - 1)/nblks_3c(3) + 1
4615 IF (.NOT. idx == current_idx) cycle
4616
4617 CALL dbt_get_block(t_stacked, ind, blk, found)
4618 IF (.NOT. found) cycle
4619
4620 CALL dbt_put_block(t_3c_apc, [ind(1), ind(2), ind(3) - (idx - 1)*nblks_3c(3)], shape(blk), blk)
4621 DEALLOCATE (blk)
4622 END DO
4623 CALL dbt_iterator_stop(iter)
4624!$OMP END PARALLEL
4625
4626 END SUBROUTINE unstack_t_3c_apc
4627
4628! **************************************************************************************************
4629!> \brief copies the 3c integrals correspoinding to a single atom mu from the general (P^0| mu^0 sigam^a)
4630!> \param t_3c_at ...
4631!> \param t_3c_ints ...
4632!> \param iatom ...
4633!> \param dim_at ...
4634!> \param idx_to_at ...
4635! **************************************************************************************************
4636 SUBROUTINE get_atom_3c_ints(t_3c_at, t_3c_ints, iatom, dim_at, idx_to_at)
4637 TYPE(dbt_type), INTENT(INOUT) :: t_3c_at, t_3c_ints
4638 INTEGER, INTENT(IN) :: iatom, dim_at
4639 INTEGER, DIMENSION(:), INTENT(IN) :: idx_to_at
4640
4641 INTEGER, DIMENSION(3) :: ind
4642 LOGICAL :: found
4643 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4644 TYPE(dbt_iterator_type) :: iter
4645
4646!$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_ints,t_3c_at,iatom,idx_to_at,dim_at) PRIVATE(iter,ind,blk,found)
4647 CALL dbt_iterator_start(iter, t_3c_ints)
4648 DO WHILE (dbt_iterator_blocks_left(iter))
4649 CALL dbt_iterator_next_block(iter, ind)
4650 IF (.NOT. idx_to_at(ind(dim_at)) == iatom) cycle
4651
4652 CALL dbt_get_block(t_3c_ints, ind, blk, found)
4653 IF (.NOT. found) cycle
4654
4655 CALL dbt_put_block(t_3c_at, ind, shape(blk), blk)
4656 DEALLOCATE (blk)
4657 END DO
4658 CALL dbt_iterator_stop(iter)
4659!$OMP END PARALLEL
4660 CALL dbt_finalize(t_3c_at)
4661
4662 END SUBROUTINE get_atom_3c_ints
4663
4664! **************************************************************************************************
4665!> \brief Precalculate the 3c and 2c derivatives tensors
4666!> \param t_3c_der_RI ...
4667!> \param t_3c_der_AO ...
4668!> \param mat_der_pot ...
4669!> \param t_2c_der_metric ...
4670!> \param ri_data ...
4671!> \param qs_env ...
4672! **************************************************************************************************
4673 SUBROUTINE precalc_derivatives(t_3c_der_RI, t_3c_der_AO, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
4674 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_ri, t_3c_der_ao
4675 TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot
4676 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_2c_der_metric
4677 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4678 TYPE(qs_environment_type), POINTER :: qs_env
4679
4680 CHARACTER(LEN=*), PARAMETER :: routinen = 'precalc_derivatives'
4681
4682 INTEGER :: handle, handle2, i_img, i_mem, i_ri, &
4683 i_xyz, iatom, n_mem, natom, nblks_ri, &
4684 ncell_ri, nimg, nkind, nthreads
4685 INTEGER(int_8) :: nze
4686 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, dist_ao_1, &
4687 dist_ao_2, dist_ri, dist_ri_ext, dummy_end, dummy_start, end_blocks, start_blocks
4688 INTEGER, DIMENSION(3) :: pcoord, pdims
4689 INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
4690 REAL(dp) :: occ
4691 TYPE(dbcsr_distribution_type) :: dbcsr_dist
4692 TYPE(dbcsr_type) :: dbcsr_template
4693 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_metric
4694 TYPE(dbt_distribution_type) :: t_dist
4695 TYPE(dbt_pgrid_type) :: pgrid
4696 TYPE(dbt_type) :: t_3c_template
4697 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_ao_prv, t_3c_der_ri_prv
4698 TYPE(dft_control_type), POINTER :: dft_control
4699 TYPE(distribution_2d_type), POINTER :: dist_2d
4700 TYPE(distribution_3d_type) :: dist_3d
4701 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4702 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
4703 TYPE(mp_cart_type) :: mp_comm_t3c
4704 TYPE(mp_para_env_type), POINTER :: para_env
4705 TYPE(neighbor_list_3c_type) :: nl_3c
4706 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4707 POINTER :: nl_2c
4708 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4709 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4710
4711 NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env, row_bsize, col_bsize)
4712
4713 CALL timeset(routinen, handle)
4714
4715 CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
4716 particle_set=particle_set, dft_control=dft_control, para_env=para_env)
4717
4718 nimg = ri_data%nimg
4719 ncell_ri = ri_data%ncell_RI
4720
4721 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
4722 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
4723 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ri)
4724 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
4725 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ao)
4726
4727 !Dealing with the 3c derivatives
4728 nthreads = 1
4729!$ nthreads = omp_get_num_threads()
4730 pdims = 0
4731 CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[max(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
4732
4733 CALL create_3c_tensor(t_3c_template, dist_ao_1, dist_ao_2, dist_ri, pgrid, &
4734 ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4735 map1=[1, 2], map2=[3], name="tmp")
4736 CALL dbt_destroy(t_3c_template)
4737
4738 !We stack the RI basis images. Keep consistent distribution
4739 nblks_ri = SIZE(ri_data%bsizes_RI_split)
4740 ALLOCATE (dist_ri_ext(natom*ncell_ri))
4741 ALLOCATE (bsizes_ri_ext(natom*ncell_ri))
4742 ALLOCATE (bsizes_ri_ext_split(nblks_ri*ncell_ri))
4743 DO i_ri = 1, ncell_ri
4744 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
4745 dist_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = dist_ri(:)
4746 bsizes_ri_ext_split((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
4747 END DO
4748
4749 CALL dbt_distribution_new(t_dist, pgrid, dist_ao_1, dist_ao_2, dist_ri_ext)
4750 CALL dbt_create(t_3c_template, "KP_3c_der", t_dist, [1, 2], [3], &
4751 ri_data%bsizes_AO, ri_data%bsizes_AO, bsizes_ri_ext)
4752 CALL dbt_distribution_destroy(t_dist)
4753
4754 ALLOCATE (t_3c_der_ri_prv(nimg, 1, 3), t_3c_der_ao_prv(nimg, 1, 3))
4755 DO i_xyz = 1, 3
4756 DO i_img = 1, nimg
4757 CALL dbt_create(t_3c_template, t_3c_der_ri_prv(i_img, 1, i_xyz))
4758 CALL dbt_create(t_3c_template, t_3c_der_ao_prv(i_img, 1, i_xyz))
4759 END DO
4760 END DO
4761 CALL dbt_destroy(t_3c_template)
4762
4763 CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
4764 CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
4765 CALL distribution_3d_create(dist_3d, dist_ao_1, dist_ao_2, dist_ri, &
4766 nkind, particle_set, mp_comm_t3c, own_comm=.true.)
4767 DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
4768 CALL dbt_pgrid_destroy(pgrid)
4769
4770 CALL build_3c_neighbor_lists(nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, dist_3d, ri_data%ri_metric, &
4771 "HFX_3c_nl", qs_env, op_pos=2, sym_jk=.false., own_dist=.true.)
4772
4773 n_mem = ri_data%n_mem
4774 CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
4775 start_blocks, end_blocks)
4776 DEALLOCATE (dummy_start, dummy_end)
4777
4778 CALL create_3c_tensor(t_3c_template, dist_ri, dist_ao_1, dist_ao_2, ri_data%pgrid_2, &
4779 bsizes_ri_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
4780 map1=[1], map2=[2, 3], name="der (RI | AO AO)")
4781 DO i_xyz = 1, 3
4782 DO i_img = 1, nimg
4783 CALL dbt_create(t_3c_template, t_3c_der_ri(i_img, i_xyz))
4784 CALL dbt_create(t_3c_template, t_3c_der_ao(i_img, i_xyz))
4785 END DO
4786 END DO
4787
4788 DO i_mem = 1, n_mem
4789 CALL build_3c_derivatives(t_3c_der_ao_prv, t_3c_der_ri_prv, ri_data%filter_eps, qs_env, &
4790 nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, &
4791 ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=2, &
4792 do_kpoints=.true., do_hfx_kpoints=.true., &
4793 bounds_k=[start_blocks(i_mem), end_blocks(i_mem)], &
4794 ri_range=ri_data%kp_RI_range, img_to_ri_cell=ri_data%img_to_RI_cell)
4795
4796 CALL timeset(routinen//"_cpy", handle2)
4797 !We go from (mu^0 sigma^i | P^j) to (P^i| sigma^j mu^0) and finally to (P^i| mu^0 sigma^j)
4798 DO i_img = 1, nimg
4799 DO i_xyz = 1, 3
4800 !derivative wrt to mu^0
4801 CALL get_tensor_occupancy(t_3c_der_ao_prv(i_img, 1, i_xyz), nze, occ)
4802 IF (nze > 0) THEN
4803 CALL dbt_copy(t_3c_der_ao_prv(i_img, 1, i_xyz), t_3c_template, &
4804 order=[3, 2, 1], move_data=.true.)
4805 CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4806 CALL dbt_copy(t_3c_template, t_3c_der_ao(i_img, i_xyz), &
4807 order=[1, 3, 2], move_data=.true., summation=.true.)
4808 END IF
4809
4810 !derivative wrt to P^i
4811 CALL get_tensor_occupancy(t_3c_der_ri_prv(i_img, 1, i_xyz), nze, occ)
4812 IF (nze > 0) THEN
4813 CALL dbt_copy(t_3c_der_ri_prv(i_img, 1, i_xyz), t_3c_template, &
4814 order=[3, 2, 1], move_data=.true.)
4815 CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4816 CALL dbt_copy(t_3c_template, t_3c_der_ri(i_img, i_xyz), &
4817 order=[1, 3, 2], move_data=.true., summation=.true.)
4818 END IF
4819 END DO
4820 END DO
4821 CALL timestop(handle2)
4822 END DO
4823 CALL dbt_destroy(t_3c_template)
4824
4825 CALL neighbor_list_3c_destroy(nl_3c)
4826 DO i_xyz = 1, 3
4827 DO i_img = 1, nimg
4828 CALL dbt_destroy(t_3c_der_ri_prv(i_img, 1, i_xyz))
4829 CALL dbt_destroy(t_3c_der_ao_prv(i_img, 1, i_xyz))
4830 END DO
4831 END DO
4832 DEALLOCATE (t_3c_der_ri_prv, t_3c_der_ao_prv)
4833
4834 !Reorder 3c derivatives to be consistant with ints
4835 CALL reorder_3c_derivs(t_3c_der_ri, ri_data)
4836 CALL reorder_3c_derivs(t_3c_der_ao, ri_data)
4837
4838 CALL timeset(routinen//"_2c", handle2)
4839 !The 2-center derivatives
4840 CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
4841 ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
4842 ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
4843 row_bsize(:) = ri_data%bsizes_RI
4844 col_bsize(:) = ri_data%bsizes_RI
4845
4846 CALL dbcsr_create(dbcsr_template, "2c_der", dbcsr_dist, dbcsr_type_no_symmetry, &
4847 row_bsize, col_bsize)
4848 CALL dbcsr_distribution_release(dbcsr_dist)
4849 DEALLOCATE (col_bsize, row_bsize)
4850
4851 ALLOCATE (mat_der_metric(nimg, 3))
4852 DO i_xyz = 1, 3
4853 DO i_img = 1, nimg
4854 CALL dbcsr_create(mat_der_pot(i_img, i_xyz), template=dbcsr_template)
4855 CALL dbcsr_create(mat_der_metric(i_img, i_xyz), template=dbcsr_template)
4856 END DO
4857 END DO
4858 CALL dbcsr_release(dbcsr_template)
4859
4860 !HFX potential derivatives
4861 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
4862 "HFX_2c_nl_pot", qs_env, sym_ij=.false., dist_2d=dist_2d)
4863 CALL build_2c_derivatives(mat_der_pot, ri_data%filter_eps_2c, qs_env, nl_2c, &
4864 basis_set_ri, basis_set_ri, ri_data%hfx_pot, do_kpoints=.true.)
4865 CALL release_neighbor_list_sets(nl_2c)
4866
4867 !RI metric derivatives
4868 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
4869 "HFX_2c_nl_pot", qs_env, sym_ij=.false., dist_2d=dist_2d)
4870 CALL build_2c_derivatives(mat_der_metric, ri_data%filter_eps_2c, qs_env, nl_2c, &
4871 basis_set_ri, basis_set_ri, ri_data%ri_metric, do_kpoints=.true.)
4872 CALL release_neighbor_list_sets(nl_2c)
4873
4874 !Get into extended RI basis and tensor format
4875 DO i_xyz = 1, 3
4876 DO iatom = 1, natom
4877 CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_der_metric(iatom, i_xyz))
4878 CALL get_ext_2c_int(t_2c_der_metric(iatom, i_xyz), mat_der_metric(:, i_xyz), &
4879 iatom, iatom, 1, ri_data, qs_env)
4880 END DO
4881 DO i_img = 1, nimg
4882 CALL dbcsr_release(mat_der_metric(i_img, i_xyz))
4883 END DO
4884 END DO
4885 CALL timestop(handle2)
4886
4887 CALL timestop(handle)
4888
4889 END SUBROUTINE precalc_derivatives
4890
4891! **************************************************************************************************
4892!> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
4893!> \param force ...
4894!> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
4895!> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
4896!> \param atom_of_kind ...
4897!> \param kind_of ...
4898!> \param img in which periodic image the second center of the tensor is
4899!> \param pref ...
4900!> \param ri_data ...
4901!> \param qs_env ...
4902!> \param work_virial ...
4903!> \param cell ...
4904!> \param particle_set ...
4905!> \param diag ...
4906!> \param offdiag ...
4907!> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution. Atomic block sizes are
4908!> assumed
4909! **************************************************************************************************
4910 SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, img, pref, &
4911 ri_data, qs_env, work_virial, cell, particle_set, diag, offdiag)
4912
4913 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
4914 TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
4915 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_der
4916 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
4917 INTEGER, INTENT(IN) :: img
4918 REAL(dp), INTENT(IN) :: pref
4919 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4920 TYPE(qs_environment_type), POINTER :: qs_env
4921 REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
4922 TYPE(cell_type), OPTIONAL, POINTER :: cell
4923 TYPE(particle_type), DIMENSION(:), OPTIONAL, &
4924 POINTER :: particle_set
4925 LOGICAL, INTENT(IN), OPTIONAL :: diag, offdiag
4926
4927 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_2c_der_force'
4928
4929 INTEGER :: handle, i_img, i_ri, i_xyz, iat, &
4930 iat_of_kind, ikind, j_img, j_ri, &
4931 j_xyz, jat, jat_of_kind, jkind, natom
4932 INTEGER, DIMENSION(2) :: ind
4933 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
4934 LOGICAL :: found, my_diag, my_offdiag, use_virial
4935 REAL(dp) :: new_force
4936 REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
4937 REAL(dp), DIMENSION(3) :: scoord
4938 TYPE(dbt_iterator_type) :: iter
4939 TYPE(kpoint_type), POINTER :: kpoints
4940
4941 NULLIFY (kpoints, index_to_cell)
4942
4943 !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
4944 !update the relevant force
4945
4946 CALL timeset(routinen, handle)
4947
4948 use_virial = .false.
4949 IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .true.
4950
4951 my_diag = .false.
4952 IF (PRESENT(diag)) my_diag = diag
4953
4954 my_offdiag = .false.
4955 IF (PRESENT(diag)) my_offdiag = offdiag
4956
4957 CALL get_qs_env(qs_env, kpoints=kpoints, natom=natom)
4958 CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
4959
4960!$OMP PARALLEL DEFAULT(NONE) &
4961!$OMP SHARED(t_2c_der,t_2c_contr,work_virial,force,use_virial,natom,index_to_cell,ri_data,img) &
4962!$OMP SHARED(pref,atom_of_kind,kind_of,particle_set,cell,my_diag,my_offdiag) &
4963!$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,i_RI,i_img,j_RI,j_img) &
4964!$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
4965 DO i_xyz = 1, 3
4966 CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
4967 DO WHILE (dbt_iterator_blocks_left(iter))
4968 CALL dbt_iterator_next_block(iter, ind)
4969
4970 !Only take forecs due to block diagonal or block off-diagonal, depending on arguments
4971 IF ((my_diag .AND. .NOT. my_offdiag) .OR. (.NOT. my_diag .AND. my_offdiag)) THEN
4972 IF (my_diag .AND. (ind(1) .NE. ind(2))) cycle
4973 IF (my_offdiag .AND. (ind(1) == ind(2))) cycle
4974 END IF
4975
4976 CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
4977 cpassert(found)
4978 CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
4979
4980 IF (found) THEN
4981
4982 !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
4983 !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
4984 new_force = pref*sum(der_blk(:, :)*contr_blk(:, :))
4985
4986 i_ri = (ind(1) - 1)/natom + 1
4987 i_img = ri_data%RI_cell_to_img(i_ri)
4988 iat = ind(1) - (i_ri - 1)*natom
4989 iat_of_kind = atom_of_kind(iat)
4990 ikind = kind_of(iat)
4991
4992 j_ri = (ind(2) - 1)/natom + 1
4993 j_img = ri_data%RI_cell_to_img(j_ri)
4994 jat = ind(2) - (j_ri - 1)*natom
4995 jat_of_kind = atom_of_kind(jat)
4996 jkind = kind_of(jat)
4997
4998 !Force on iatom (first center)
4999!$OMP ATOMIC
5000 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5001 + new_force
5002
5003 IF (use_virial) THEN
5004
5005 CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5006 scoord(:) = scoord(:) + real(index_to_cell(:, i_img), dp)
5007
5008 DO j_xyz = 1, 3
5009!$OMP ATOMIC
5010 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5011 END DO
5012 END IF
5013
5014 !Force on jatom (second center)
5015!$OMP ATOMIC
5016 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5017 - new_force
5018
5019 IF (use_virial) THEN
5020
5021 CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5022 scoord(:) = scoord(:) + real(index_to_cell(:, j_img) + index_to_cell(:, img), dp)
5023
5024 DO j_xyz = 1, 3
5025!$OMP ATOMIC
5026 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) - new_force*scoord(j_xyz)
5027 END DO
5028 END IF
5029
5030 DEALLOCATE (contr_blk)
5031 END IF
5032
5033 DEALLOCATE (der_blk)
5034 END DO !iter
5035 CALL dbt_iterator_stop(iter)
5036
5037 END DO !i_xyz
5038!$OMP END PARALLEL
5039 CALL timestop(handle)
5040
5041 END SUBROUTINE get_2c_der_force
5042
5043! **************************************************************************************************
5044!> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
5045!> force = sum_ijk A_ijk B_ijk., the B tensor is (P^0| sigma^0 lambda^img), with P in the
5046!> extended RI basis. Note that all tensors are stacked along the 3rd dimension
5047!> \param force ...
5048!> \param t_3c_contr ...
5049!> \param t_3c_der_1 ...
5050!> \param t_3c_der_2 ...
5051!> \param atom_of_kind ...
5052!> \param kind_of ...
5053!> \param idx_to_at_RI ...
5054!> \param idx_to_at_AO ...
5055!> \param i_images ...
5056!> \param lb_img ...
5057!> \param pref ...
5058!> \param ri_data ...
5059!> \param qs_env ...
5060!> \param work_virial ...
5061!> \param cell ...
5062!> \param particle_set ...
5063! **************************************************************************************************
5064 SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der_1, t_3c_der_2, atom_of_kind, kind_of, &
5065 idx_to_at_RI, idx_to_at_AO, i_images, lb_img, pref, &
5066 ri_data, qs_env, work_virial, cell, particle_set)
5067
5068 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
5069 TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
5070 TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der_1, t_3c_der_2
5071 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at_ri, &
5072 idx_to_at_ao, i_images
5073 INTEGER, INTENT(IN) :: lb_img
5074 REAL(dp), INTENT(IN) :: pref
5075 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
5076 TYPE(qs_environment_type), POINTER :: qs_env
5077 REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
5078 TYPE(cell_type), OPTIONAL, POINTER :: cell
5079 TYPE(particle_type), DIMENSION(:), OPTIONAL, &
5080 POINTER :: particle_set
5081
5082 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_force_from_3c_trace'
5083
5084 INTEGER :: handle, i_ri, i_xyz, iat, iat_of_kind, idx, ikind, j_xyz, jat, jat_of_kind, &
5085 jkind, kat, kat_of_kind, kkind, nblks_ao, nblks_ri, ri_img
5086 INTEGER, DIMENSION(3) :: ind
5087 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
5088 LOGICAL :: found, found_1, found_2, use_virial
5089 REAL(dp) :: new_force
5090 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk_1, der_blk_2, &
5091 der_blk_3
5092 REAL(dp), DIMENSION(3) :: scoord
5093 TYPE(dbt_iterator_type) :: iter
5094 TYPE(kpoint_type), POINTER :: kpoints
5095
5096 NULLIFY (kpoints, index_to_cell)
5097
5098 CALL timeset(routinen, handle)
5099
5100 CALL get_qs_env(qs_env, kpoints=kpoints)
5101 CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
5102
5103 nblks_ri = SIZE(ri_data%bsizes_RI_split)
5104 nblks_ao = SIZE(ri_data%bsizes_AO_split)
5105
5106 use_virial = .false.
5107 IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .true.
5108
5109!$OMP PARALLEL DEFAULT(NONE) &
5110!$OMP SHARED(t_3c_der_1, t_3c_der_2,t_3c_contr,work_virial,force,use_virial,index_to_cell,i_images,lb_img) &
5111!$OMP SHARED(pref,idx_to_at_AO,atom_of_kind,kind_of,particle_set,cell,idx_to_at_RI,ri_data,nblks_RI,nblks_AO) &
5112!$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk_1,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord) &
5113!$OMP PRIVATE(jat,kat,jat_of_kind,kat_of_kind,jkind,kkind,i_RI,RI_img,der_blk_2,der_blk_3,found_1,found_2,idx)
5114 CALL dbt_iterator_start(iter, t_3c_contr)
5115 DO WHILE (dbt_iterator_blocks_left(iter))
5116 CALL dbt_iterator_next_block(iter, ind)
5117
5118 CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
5119 IF (found) THEN
5120
5121 DO i_xyz = 1, 3
5122 CALL dbt_get_block(t_3c_der_1(i_xyz), ind, der_blk_1, found_1)
5123 IF (.NOT. found_1) THEN
5124 DEALLOCATE (der_blk_1)
5125 ALLOCATE (der_blk_1(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5126 der_blk_1(:, :, :) = 0.0_dp
5127 END IF
5128 CALL dbt_get_block(t_3c_der_2(i_xyz), ind, der_blk_2, found_2)
5129 IF (.NOT. found_2) THEN
5130 DEALLOCATE (der_blk_2)
5131 ALLOCATE (der_blk_2(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5132 der_blk_2(:, :, :) = 0.0_dp
5133 END IF
5134
5135 ALLOCATE (der_blk_3(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5136 der_blk_3(:, :, :) = -(der_blk_1(:, :, :) + der_blk_2(:, :, :))
5137
5138 !We assume the tensors are in the format (P^0| sigma^0 mu^a+c-b), with P a member of the
5139 !extended RI basis set
5140
5141 !Force for the first center (RI extended basis, zero cell)
5142 new_force = pref*sum(der_blk_1(:, :, :)*contr_blk(:, :, :))
5143
5144 i_ri = (ind(1) - 1)/nblks_ri + 1
5145 ri_img = ri_data%RI_cell_to_img(i_ri)
5146 iat = idx_to_at_ri(ind(1) - (i_ri - 1)*nblks_ri)
5147 iat_of_kind = atom_of_kind(iat)
5148 ikind = kind_of(iat)
5149
5150!$OMP ATOMIC
5151 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5152 + new_force
5153
5154 IF (use_virial) THEN
5155
5156 CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5157 scoord(:) = scoord(:) + real(index_to_cell(:, ri_img), dp)
5158
5159 DO j_xyz = 1, 3
5160!$OMP ATOMIC
5161 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5162 END DO
5163 END IF
5164
5165 !Force with respect to the second center (AO basis, zero cell)
5166 new_force = pref*sum(der_blk_2(:, :, :)*contr_blk(:, :, :))
5167 jat = idx_to_at_ao(ind(2))
5168 jat_of_kind = atom_of_kind(jat)
5169 jkind = kind_of(jat)
5170
5171!$OMP ATOMIC
5172 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5173 + new_force
5174
5175 IF (use_virial) THEN
5176
5177 CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5178
5179 DO j_xyz = 1, 3
5180!$OMP ATOMIC
5181 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5182 END DO
5183 END IF
5184
5185 !Force with respect to the third center (AO basis, apc_img - b_img)
5186 !Note: tensors are stacked along the 3rd direction
5187 new_force = pref*sum(der_blk_3(:, :, :)*contr_blk(:, :, :))
5188 idx = (ind(3) - 1)/nblks_ao + 1
5189 kat = idx_to_at_ao(ind(3) - (idx - 1)*nblks_ao)
5190 kat_of_kind = atom_of_kind(kat)
5191 kkind = kind_of(kat)
5192
5193!$OMP ATOMIC
5194 force(kkind)%fock_4c(i_xyz, kat_of_kind) = force(kkind)%fock_4c(i_xyz, kat_of_kind) &
5195 + new_force
5196
5197 IF (use_virial) THEN
5198 CALL real_to_scaled(scoord, pbc(particle_set(kat)%r, cell), cell)
5199 scoord(:) = scoord(:) + real(index_to_cell(:, i_images(lb_img - 1 + idx)), dp)
5200
5201 DO j_xyz = 1, 3
5202!$OMP ATOMIC
5203 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5204 END DO
5205 END IF
5206
5207 DEALLOCATE (der_blk_1, der_blk_2, der_blk_3)
5208 END DO !i_xyz
5209 DEALLOCATE (contr_blk)
5210 END IF !found
5211 END DO !iter
5212 CALL dbt_iterator_stop(iter)
5213!$OMP END PARALLEL
5214 CALL timestop(handle)
5215
5216 END SUBROUTINE get_force_from_3c_trace
5217
5218END MODULE
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static GRID_HOST_DEVICE double fac(const int i)
Factorial function, e.g. fac(5) = 5! = 120.
Definition grid_common.h:48
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
Types and set/get functions for auxiliary density matrix methods.
Definition admm_types.F:15
subroutine, public get_admm_env(admm_env, mo_derivs_aux_fit, mos_aux_fit, sab_aux_fit, sab_aux_fit_asymm, sab_aux_fit_vs_orb, matrix_s_aux_fit, matrix_s_aux_fit_kp, matrix_s_aux_fit_vs_orb, matrix_s_aux_fit_vs_orb_kp, task_list_aux_fit, matrix_ks_aux_fit, matrix_ks_aux_fit_kp, matrix_ks_aux_fit_im, matrix_ks_aux_fit_dft, matrix_ks_aux_fit_hfx, matrix_ks_aux_fit_dft_kp, matrix_ks_aux_fit_hfx_kp, rho_aux_fit, rho_aux_fit_buffer, admm_dm)
Get routine for the ADMM env.
Definition admm_types.F:593
Define the atomic kind types and their sub types.
subroutine, public get_atomic_kind_set(atomic_kind_set, atom_of_kind, kind_of, natom_of_kind, maxatom, natom, nshell, fist_potential_present, shell_present, shell_adiabatic, shell_check_distance, damping_present)
Get attributes of an atomic kind set.
subroutine, public get_gto_basis_set(gto_basis_set, name, aliases, norm_type, kind_radius, ncgf, nset, nsgf, cgf_symbol, sgf_symbol, norm_cgf, set_radius, lmax, lmin, lx, ly, lz, m, ncgf_set, npgf, nsgf_set, nshell, cphi, pgf_radius, sphi, scon, zet, first_cgf, first_sgf, l, last_cgf, last_sgf, n, gcc, maxco, maxl, maxpgf, maxsgf_set, maxshell, maxso, nco_sum, npgf_sum, nshell_sum, maxder, short_kind_radius)
...
Handles all functions related to the CELL.
Definition cell_types.F:15
subroutine, public scaled_to_real(r, s, cell)
Transform scaled cell coordinates real coordinates. r=h*s.
Definition cell_types.F:516
subroutine, public real_to_scaled(s, r, cell)
Transform real to scaled cell coordinates. s=h_inv*r.
Definition cell_types.F:486
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
Interface to (sca)lapack for the Cholesky based procedures.
subroutine, public cp_dbcsr_cholesky_decompose(matrix, n, para_env, blacs_env)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
subroutine, public cp_dbcsr_cholesky_invert(matrix, n, para_env, blacs_env, upper_to_full)
used to replace the cholesky decomposition by the inverse
Interface to (sca)lapack for the Cholesky based procedures.
subroutine, public cp_dbcsr_power(matrix, exponent, threshold, n_dependent, para_env, blacs_env, verbose, eigenvectors, eigenvalues)
...
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_dist2d_to_dist(dist2d, dist)
Creates a DBCSR distribution from a distribution_2d.
This is the start of a dbt_api, all publically needed functions are exported here....
Definition dbt_api.F:17
stores a mapping of 2D info (e.g. matrix) on a 2D processor distribution (i.e. blacs grid) where cpus...
subroutine, public distribution_2d_create(distribution_2d, blacs_env, local_rows_ptr, n_local_rows, local_cols_ptr, row_distribution_ptr, col_distribution_ptr, n_local_cols, n_row_distribution, n_col_distribution)
initializes the distribution_2d
subroutine, public distribution_2d_release(distribution_2d)
...
RI-methods for HFX and K-points. \auhtor Augustin Bussy (01.2023)
Definition hfx_ri_kp.F:13
subroutine, public hfx_ri_update_forces_kp(qs_env, ri_data, nspins, hf_fraction, rho_ao, use_virial)
Update the K-points RI-HFX forces.
Definition hfx_ri_kp.F:771
subroutine, public hfx_ri_update_ks_kp(qs_env, ri_data, ks_matrix, ehfx, rho_ao, geometry_did_change, nspins, hf_fraction)
Update the KS matrices for each real-space image.
Definition hfx_ri_kp.F:420
RI-methods for HFX.
Definition hfx_ri.F:12
subroutine, public get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, deriv_dim)
This routines calculates the force contribution from a trace over 3D tensors, i.e....
Definition hfx_ri.F:3360
subroutine, public get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
a small utility function that returns the atom corresponding to a block of a split tensor
Definition hfx_ri.F:4106
subroutine, public hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_ri, t_2c_int_pot, t_3c_int, do_kpoints)
Calculate 2-center and 3-center integrals.
Definition hfx_ri.F:441
subroutine, public get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, do_ovlp)
Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
Definition hfx_ri.F:3446
Types and set/get functions for HFX.
Definition hfx_types.F:15
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public hfx_ri_do_2c_cholesky
integer, parameter, public hfx_ri_do_2c_diag
integer, parameter, public hfx_ri_do_2c_iter
integer, parameter, public do_potential_short
function that builds the hartree fock exchange section of the input
integer, parameter, public ri_pmat
objects that represent the structure of input sections and the data contained in an input section
subroutine, public section_vals_val_set(section_vals, keyword_name, i_rep_section, i_rep_val, val, l_val, i_val, r_val, c_val, l_vals_ptr, i_vals_ptr, r_vals_ptr, c_vals_ptr)
sets the requested value
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_val_get(section_vals, keyword_name, i_rep_section, i_rep_val, n_rep_val, val, l_val, i_val, r_val, c_val, l_vals, i_vals, r_vals, c_vals, explicit)
returns the requested value
Routines useful for iterative matrix calculations.
subroutine, public invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite matrix by Hotelling's method explicit symmetrization makes this ...
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
Types and basic routines needed for a kpoint calculation.
subroutine, public get_kpoint_info(kpoint, kp_scheme, nkp_grid, kp_shift, symmetry, verbose, full_grid, use_real_wfn, eps_geo, parallel_group_size, kp_range, nkp, xkp, wkp, para_env, blacs_env_all, para_env_kp, para_env_inter_kp, blacs_env, kp_env, kp_aux_env, mpools, iogrp, nkp_groups, kp_dist, cell_to_index, index_to_cell, sab_nl, sab_nl_nosym)
Retrieve information from a kpoint environment.
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
real(kind=dp), parameter, public cutoff_screen_factor
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:106
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:123
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
subroutine, public diag(n, a, d, v)
Diagonalize matrix a. The eigenvalues are returned in vector d and the eigenvectors are returned in m...
Definition mathlib.F:1507
subroutine, public erfc_cutoff(eps, omg, r_cutoff)
compute a truncation radius for the shortrange operator
Definition mathlib.F:1689
Interface to the message passing library MPI.
Define methods related to particle_type.
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Define the data structure for the particle information.
Definition of physical constants:
Definition physcon.F:68
real(kind=dp), parameter, public angstrom
Definition physcon.F:144
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, rhs)
Get the QUICKSTEP environment.
Some utility functions for the calculation of integrals.
subroutine, public basis_set_list_setup(basis_set_list, basis_type, qs_kind_set)
Set up an easy accessible list of the basis sets for all kinds.
Calculate the interaction radii for the operator matrix calculation.
subroutine, public init_interaction_radii_orb_basis(orb_basis_set, eps_pgf_orb, eps_pgf_short)
...
Define the quickstep kind type and their sub types.
Define the neighbor list data types and the corresponding functionality.
subroutine, public release_neighbor_list_sets(nlists)
releases an array of neighbor_list_sets
subroutine, public neighbor_list_iterator_create(iterator_set, nl, search, nthread)
Neighbor list iterator functions.
subroutine, public neighbor_list_iterator_release(iterator_set)
...
integer function, public neighbor_list_iterate(iterator_set, mepos)
...
subroutine, public get_iterator_info(iterator_set, mepos, ikind, jkind, nkind, ilist, nlist, inode, nnode, iatom, jatom, r, cell)
...
module that contains the definitions of the scf types
Utility methods to build 3-center integral tensors of various types.
subroutine, public distribution_3d_create(dist_3d, dist1, dist2, dist3, nkind, particle_set, mp_comm_3d, own_comm)
Create a 3d distribution.
subroutine, public create_2c_tensor(t2c, dist_1, dist_2, pgrid, sizes_1, sizes_2, order, name)
...
subroutine, public create_tensor_batches(sizes, nbatches, starts_array, ends_array, starts_array_block, ends_array_block)
...
subroutine, public create_3c_tensor(t3c, dist_1, dist_2, dist_3, pgrid, sizes_1, sizes_2, sizes_3, map1, map2, name)
...
Utility methods to build 3-center integral tensors of various types.
Definition qs_tensors.F:11
subroutine, public build_3c_derivatives(t3c_der_i, t3c_der_k, filter_eps, qs_env, nl_3c, basis_i, basis_j, basis_k, potential_parameter, der_eps, op_pos, do_kpoints, do_hfx_kpoints, bounds_i, bounds_j, bounds_k, ri_range, img_to_ri_cell)
Build 3-center derivative tensors.
subroutine, public build_2c_neighbor_lists(ij_list, basis_i, basis_j, potential_parameter, name, qs_env, sym_ij, molecular, dist_2d, pot_to_rad)
Build 2-center neighborlists adapted to different operators This mainly wraps build_neighbor_lists fo...
Definition qs_tensors.F:143
recursive integer function, public neighbor_list_3c_iterate(iterator)
Iterate 3c-nl iterator.
Definition qs_tensors.F:467
subroutine, public neighbor_list_3c_iterator_destroy(iterator)
Destroy 3c-nl iterator.
Definition qs_tensors.F:445
subroutine, public neighbor_list_3c_destroy(ijk_list)
Destroy 3c neighborlist.
Definition qs_tensors.F:383
subroutine, public build_2c_derivatives(t2c_der, filter_eps, qs_env, nl_2c, basis_i, basis_j, potential_parameter, do_kpoints)
Calculates the derivatives of 2-center integrals, wrt to the first center.
subroutine, public get_tensor_occupancy(tensor, nze, occ)
...
subroutine, public build_3c_neighbor_lists(ijk_list, basis_i, basis_j, basis_k, dist_3d, potential_parameter, name, qs_env, sym_ij, sym_jk, sym_ik, molecular, op_pos, own_dist)
Build a 3-center neighbor list.
Definition qs_tensors.F:282
subroutine, public neighbor_list_3c_iterator_create(iterator, ijk_nl)
Create a 3-center neighborlist iterator.
Definition qs_tensors.F:400
subroutine, public get_3c_iterator_info(iterator, ikind, jkind, kkind, nkind, iatom, jatom, katom, rij, rjk, rik, cell_j, cell_k)
Get info of current iteration.
Definition qs_tensors.F:564
All kind of helpful little routines.
Definition util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333
Provides all information about an atomic kind.
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
represent a pointer to a 1d array
represent a pointer to a 2d array
represent a pointer to a 3d array
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
distributes pairs on a 2d grid of processors
Contains information about kpoints.
stores all the informations relevant to an mpi environment
Provides all information about a quickstep kind.