(git:ed6f26b)
Loading...
Searching...
No Matches
hfx_ri_kp.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief RI-methods for HFX and K-points.
10!> \auhtor Augustin Bussy (01.2023)
11! **************************************************************************************************
12
14 USE admm_types, ONLY: get_admm_env
19 USE bibliography, ONLY: bussy2024,&
20 cite_reference
21 USE cell_types, ONLY: cell_type,&
22 pbc,&
32 USE cp_dbcsr_api, ONLY: &
37 dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_type, dbcsr_type_no_symmetry, &
38 dbcsr_type_symmetric
45 USE dbt_api, ONLY: &
46 dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
47 dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, dbt_destroy, &
48 dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, dbt_filter, &
49 dbt_finalize, dbt_get_block, dbt_get_info, dbt_get_stored_coordinates, &
50 dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
51 dbt_iterator_type, dbt_mp_environ_pgrid, dbt_pgrid_create, dbt_pgrid_destroy, &
52 dbt_pgrid_type, dbt_put_block, dbt_scale, dbt_type
55 USE hfx_ri, ONLY: get_idx_to_atom,&
57 USE hfx_types, ONLY: hfx_ri_type
62 USE input_cp2k_hfx, ONLY: ri_pmat
68 USE kinds, ONLY: dp,&
69 int_8
70 USE kpoint_types, ONLY: get_kpoint_info,&
73 USE machine, ONLY: m_flush,&
75 USE mathlib, ONLY: erfc_cutoff
76 USE message_passing, ONLY: mp_cart_type,&
82 USE physcon, ONLY: angstrom
97 USE qs_tensors, ONLY: &
110 USE util, ONLY: get_limit
111 USE virial_types, ONLY: virial_type
112#include "./base/base_uses.f90"
113
114!$ USE OMP_LIB, ONLY: omp_get_num_threads
115
116 IMPLICIT NONE
117 PRIVATE
118
120
121 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri_kp'
122CONTAINS
123
124! NOTES: for a start, we do not seek performance, but accuracy. So in this first implementation,
125! we give little consideration to batching, load balance and such.
126! We also put everything here, even if there is some code replication with the original RI_HFX
127! We will only work in the RHO flavor
128! For now, we will also always assume that there is a single para_env, and that there is no
129! K-point subgroup. This might change in the future
130
131! **************************************************************************************************
132!> \brief I_1nitialize the ri_data for K-point. For now, we take the normal, usual existing ri_data
133!> and we adapt it to our needs
134!> \param dbcsr_template ...
135!> \param ri_data ...
136!> \param qs_env ...
137! **************************************************************************************************
138 SUBROUTINE adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
139 TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
140 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
141 TYPE(qs_environment_type), POINTER :: qs_env
142
143 INTEGER :: i_img, i_RI, i_spin, iatom, natom, &
144 nblks_RI, nimg, nkind, nspins
145 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, dist1, dist2, dist3
146 TYPE(dft_control_type), POINTER :: dft_control
147 TYPE(mp_para_env_type), POINTER :: para_env
148
149 NULLIFY (dft_control, para_env)
150
151 !The main thing that we need to do is to allocate more space for the integrals, such that there
152 !is room for each periodic image. Note that we only go in 1D, i.e. we store (mu^0 sigma^a|P^0),
153 !and (P^0|Q^a) => the RI basis is always in the main cell.
154
155 !Get kpoint info
156 CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, para_env=para_env, nkind=nkind)
157 nimg = ri_data%nimg
158
159 !Along the RI direction we have basis elements spread accross ncell_RI images.
160 nblks_ri = SIZE(ri_data%bsizes_RI_split)
161 ALLOCATE (bsizes_ri_ext(nblks_ri*ri_data%ncell_RI))
162 DO i_ri = 1, ri_data%ncell_RI
163 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
164 END DO
165
166 ALLOCATE (ri_data%t_3c_int_ctr_1(1, nimg))
167 CALL create_3c_tensor(ri_data%t_3c_int_ctr_1(1, 1), dist1, dist2, dist3, &
168 ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_ri_ext, &
169 ri_data%bsizes_AO_split, [1, 2], [3], name="(AO RI | AO)")
170
171 DO i_img = 2, nimg
172 CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_1(1, i_img))
173 END DO
174 DEALLOCATE (dist1, dist2, dist3)
175
176 ALLOCATE (ri_data%t_3c_int_ctr_2(1, 1))
177 CALL create_3c_tensor(ri_data%t_3c_int_ctr_2(1, 1), dist1, dist2, dist3, &
178 ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_ri_ext, &
179 ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
180 DEALLOCATE (dist1, dist2, dist3)
181
182 !We use full block sizes for the 2c quantities
183 DEALLOCATE (bsizes_ri_ext)
184 nblks_ri = SIZE(ri_data%bsizes_RI)
185 ALLOCATE (bsizes_ri_ext(nblks_ri*ri_data%ncell_RI))
186 DO i_ri = 1, ri_data%ncell_RI
187 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI(:)
188 END DO
189
190 ALLOCATE (ri_data%t_2c_inv(1, natom), ri_data%t_2c_int(1, natom), ri_data%t_2c_pot(1, natom))
191 CALL create_2c_tensor(ri_data%t_2c_inv(1, 1), dist1, dist2, ri_data%pgrid_2d, &
192 bsizes_ri_ext, bsizes_ri_ext, &
193 name="(RI | RI)")
194 DEALLOCATE (dist1, dist2)
195 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, 1))
196 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1))
197 DO iatom = 2, natom
198 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_inv(1, iatom))
199 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, iatom))
200 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, iatom))
201 END DO
202
203 ALLOCATE (ri_data%kp_cost(natom, natom, nimg))
204 ri_data%kp_cost = 0.0_dp
205
206 !We store the density and KS matrix in tensor format
207 nspins = dft_control%nspins
208 ALLOCATE (ri_data%rho_ao_t(nspins, nimg), ri_data%ks_t(nspins, nimg))
209 CALL create_2c_tensor(ri_data%rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
210 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
211 name="(AO | AO)")
212 DEALLOCATE (dist1, dist2)
213
214 CALL dbt_create(dbcsr_template, ri_data%ks_t(1, 1))
215
216 IF (nspins == 2) THEN
217 CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(2, 1))
218 CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(2, 1))
219 END IF
220 DO i_img = 2, nimg
221 DO i_spin = 1, nspins
222 CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(i_spin, i_img))
223 CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(i_spin, i_img))
224 END DO
225 END DO
226
227 END SUBROUTINE adapt_ri_data_to_kp
228
229! **************************************************************************************************
230!> \brief The pre-scf steps for RI-HFX k-points calculation. Namely the calculation of the integrals
231!> \param dbcsr_template ...
232!> \param ri_data ...
233!> \param qs_env ...
234! **************************************************************************************************
235 SUBROUTINE hfx_ri_pre_scf_kp(dbcsr_template, ri_data, qs_env)
236 TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
237 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
238 TYPE(qs_environment_type), POINTER :: qs_env
239
240 CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_kp'
241
242 INTEGER :: handle, i_img, iatom, natom, nimg, nkind
243 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: t_2c_op_pot, t_2c_op_RI
244 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
245 TYPE(dft_control_type), POINTER :: dft_control
246
247 NULLIFY (dft_control)
248
249 CALL timeset(routinen, handle)
250
251 CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, nkind=nkind)
252
253 CALL cleanup_kp(ri_data)
254
255 !We do all the checks on what we allow in this initial implementation
256 IF (ri_data%flavor .NE. ri_pmat) cpabort("K-points RI-HFX only with RHO flavor")
257 IF (ri_data%same_op) ri_data%same_op = .false. !force the full calculation with RI metric
258 IF (abs(ri_data%eps_pgf_orb - dft_control%qs_control%eps_pgf_orb) > 1.0e-16_dp) &
259 cpabort("RI%EPS_PGF_ORB and QS%EPS_PGF_ORB must be identical for RI-HFX k-points")
260
261 CALL get_kp_and_ri_images(ri_data, qs_env)
262 nimg = ri_data%nimg
263
264 !Calculate the integrals
265 ALLOCATE (t_2c_op_pot(nimg), t_2c_op_ri(nimg))
266 ALLOCATE (t_3c_int(1, nimg))
267 CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_ri, t_2c_op_pot, t_3c_int, do_kpoints=.true.)
268
269 !Make sure the internals have the k-point format
270 CALL adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
271
272 !For each atom i, we calculate the inverse RI metric (P^0 | Q^0)^-1 without external bumping yet
273 !Also store the off-diagonal integrals of the RI metric in case of forces, bumped from the left
274 DO iatom = 1, natom
275 CALL get_ext_2c_int(ri_data%t_2c_inv(1, iatom), t_2c_op_ri, iatom, iatom, 1, ri_data, qs_env, &
276 do_inverse=.true.)
277 !for the forces:
278 !off-diagonl RI metric bumped from the left
279 CALL get_ext_2c_int(ri_data%t_2c_int(1, iatom), t_2c_op_ri, iatom, iatom, 1, ri_data, &
280 qs_env, off_diagonal=.true.)
281 CALL apply_bump(ri_data%t_2c_int(1, iatom), iatom, ri_data, qs_env, from_left=.true., from_right=.false.)
282
283 !RI metric with bumped off-diagonal blocks (but not inverted), depumed from left and right
284 CALL get_ext_2c_int(ri_data%t_2c_pot(1, iatom), t_2c_op_ri, iatom, iatom, 1, ri_data, qs_env, &
285 do_inverse=.true., skip_inverse=.true.)
286 CALL apply_bump(ri_data%t_2c_pot(1, iatom), iatom, ri_data, qs_env, from_left=.true., &
287 from_right=.true., debump=.true.)
288
289 END DO
290
291 DO i_img = 1, nimg
292 CALL dbcsr_release(t_2c_op_ri(i_img))
293 END DO
294
295 ALLOCATE (ri_data%kp_mat_2c_pot(1, nimg))
296 DO i_img = 1, nimg
297 CALL dbcsr_create(ri_data%kp_mat_2c_pot(1, i_img), template=t_2c_op_pot(i_img))
298 CALL dbcsr_copy(ri_data%kp_mat_2c_pot(1, i_img), t_2c_op_pot(i_img))
299 CALL dbcsr_release(t_2c_op_pot(i_img))
300 END DO
301
302 !Pre-contract all 3c integrals with the bumped inverse RI metric (P^0|Q^0)^-1,
303 !and store in ri_data%t_3c_int_ctr_1
304 CALL precontract_3c_ints(t_3c_int, ri_data, qs_env)
305
306 !reorder the 3c integrals such that empty images are bunched up together
307 CALL reorder_3c_ints(ri_data%t_3c_int_ctr_1(1, :), ri_data)
308
309 CALL timestop(handle)
310
311 END SUBROUTINE hfx_ri_pre_scf_kp
312
313! **************************************************************************************************
314!> \brief clean-up the KP specific data from ri_data
315!> \param ri_data ...
316! **************************************************************************************************
317 SUBROUTINE cleanup_kp(ri_data)
318 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
319
320 INTEGER :: i, j
321
322 IF (ALLOCATED(ri_data%kp_cost)) DEALLOCATE (ri_data%kp_cost)
323 IF (ALLOCATED(ri_data%idx_to_img)) DEALLOCATE (ri_data%idx_to_img)
324 IF (ALLOCATED(ri_data%img_to_idx)) DEALLOCATE (ri_data%img_to_idx)
325 IF (ALLOCATED(ri_data%present_images)) DEALLOCATE (ri_data%present_images)
326 IF (ALLOCATED(ri_data%img_to_RI_cell)) DEALLOCATE (ri_data%img_to_RI_cell)
327 IF (ALLOCATED(ri_data%RI_cell_to_img)) DEALLOCATE (ri_data%RI_cell_to_img)
328
329 IF (ALLOCATED(ri_data%kp_mat_2c_pot)) THEN
330 DO j = 1, SIZE(ri_data%kp_mat_2c_pot, 2)
331 DO i = 1, SIZE(ri_data%kp_mat_2c_pot, 1)
332 CALL dbcsr_release(ri_data%kp_mat_2c_pot(i, j))
333 END DO
334 END DO
335 DEALLOCATE (ri_data%kp_mat_2c_pot)
336 END IF
337
338 IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
339 DO i = 1, SIZE(ri_data%kp_t_3c_int)
340 CALL dbt_destroy(ri_data%kp_t_3c_int(i))
341 END DO
342 DEALLOCATE (ri_data%kp_t_3c_int)
343 END IF
344
345 IF (ALLOCATED(ri_data%t_2c_inv)) THEN
346 DO j = 1, SIZE(ri_data%t_2c_inv, 2)
347 DO i = 1, SIZE(ri_data%t_2c_inv, 1)
348 CALL dbt_destroy(ri_data%t_2c_inv(i, j))
349 END DO
350 END DO
351 DEALLOCATE (ri_data%t_2c_inv)
352 END IF
353
354 IF (ALLOCATED(ri_data%t_2c_int)) THEN
355 DO j = 1, SIZE(ri_data%t_2c_int, 2)
356 DO i = 1, SIZE(ri_data%t_2c_int, 1)
357 CALL dbt_destroy(ri_data%t_2c_int(i, j))
358 END DO
359 END DO
360 DEALLOCATE (ri_data%t_2c_int)
361 END IF
362
363 IF (ALLOCATED(ri_data%t_2c_pot)) THEN
364 DO j = 1, SIZE(ri_data%t_2c_pot, 2)
365 DO i = 1, SIZE(ri_data%t_2c_pot, 1)
366 CALL dbt_destroy(ri_data%t_2c_pot(i, j))
367 END DO
368 END DO
369 DEALLOCATE (ri_data%t_2c_pot)
370 END IF
371
372 IF (ALLOCATED(ri_data%t_3c_int_ctr_1)) THEN
373 DO j = 1, SIZE(ri_data%t_3c_int_ctr_1, 2)
374 DO i = 1, SIZE(ri_data%t_3c_int_ctr_1, 1)
375 CALL dbt_destroy(ri_data%t_3c_int_ctr_1(i, j))
376 END DO
377 END DO
378 DEALLOCATE (ri_data%t_3c_int_ctr_1)
379 END IF
380
381 IF (ALLOCATED(ri_data%t_3c_int_ctr_2)) THEN
382 DO j = 1, SIZE(ri_data%t_3c_int_ctr_2, 2)
383 DO i = 1, SIZE(ri_data%t_3c_int_ctr_2, 1)
384 CALL dbt_destroy(ri_data%t_3c_int_ctr_2(i, j))
385 END DO
386 END DO
387 DEALLOCATE (ri_data%t_3c_int_ctr_2)
388 END IF
389
390 IF (ALLOCATED(ri_data%rho_ao_t)) THEN
391 DO j = 1, SIZE(ri_data%rho_ao_t, 2)
392 DO i = 1, SIZE(ri_data%rho_ao_t, 1)
393 CALL dbt_destroy(ri_data%rho_ao_t(i, j))
394 END DO
395 END DO
396 DEALLOCATE (ri_data%rho_ao_t)
397 END IF
398
399 IF (ALLOCATED(ri_data%ks_t)) THEN
400 DO j = 1, SIZE(ri_data%ks_t, 2)
401 DO i = 1, SIZE(ri_data%ks_t, 1)
402 CALL dbt_destroy(ri_data%ks_t(i, j))
403 END DO
404 END DO
405 DEALLOCATE (ri_data%ks_t)
406 END IF
407
408 END SUBROUTINE cleanup_kp
409
410! **************************************************************************************************
411!> \brief Update the KS matrices for each real-space image
412!> \param qs_env ...
413!> \param ri_data ...
414!> \param ks_matrix ...
415!> \param ehfx ...
416!> \param rho_ao ...
417!> \param geometry_did_change ...
418!> \param nspins ...
419!> \param hf_fraction ...
420! **************************************************************************************************
421 SUBROUTINE hfx_ri_update_ks_kp(qs_env, ri_data, ks_matrix, ehfx, rho_ao, &
422 geometry_did_change, nspins, hf_fraction)
423
424 TYPE(qs_environment_type), POINTER :: qs_env
425 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
426 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
427 REAL(kind=dp), INTENT(OUT) :: ehfx
428 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
429 LOGICAL, INTENT(IN) :: geometry_did_change
430 INTEGER, INTENT(IN) :: nspins
431 REAL(kind=dp), INTENT(IN) :: hf_fraction
432
433 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_ks_kp'
434
435 INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_spin, iatom, &
436 iblk, igroup, jatom, mb_img, n_batch_nze, natom, ngroups, nimg, nimg_nze
437 INTEGER(int_8) :: nflop, nze
438 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_at, batch_ranges_nze, &
439 idx_to_at_ao
440 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
441 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: sparsity_pattern
442 LOGICAL :: use_delta_p
443 REAL(dp) :: etmp, fac, occ, pfac, pref, t1, t2, t3, &
444 t4
445 TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
446 TYPE(dbcsr_type) :: ks_desymm, rho_desymm, tmp
447 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
448 TYPE(dbcsr_type), POINTER :: dbcsr_template
449 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ks_t_split, t_2c_ao_tmp, t_2c_work, &
450 t_3c_int, t_3c_work_2, t_3c_work_3
451 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: ks_t, ks_t_sub, t_3c_apc, t_3c_apc_sub
452 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
453 TYPE(section_vals_type), POINTER :: hfx_section
454
455 NULLIFY (para_env, para_env_sub, blacs_env_sub, hfx_section, dbcsr_template)
456
457 CALL cite_reference(bussy2024)
458
459 CALL timeset(routinen, handle)
460
461 CALL get_qs_env(qs_env, para_env=para_env, natom=natom)
462
463 IF (nspins == 1) THEN
464 fac = 0.5_dp*hf_fraction
465 ELSE
466 fac = 1.0_dp*hf_fraction
467 END IF
468
469 IF (geometry_did_change) THEN
470 CALL hfx_ri_pre_scf_kp(ks_matrix(1, 1)%matrix, ri_data, qs_env)
471 END IF
472 nimg = ri_data%nimg
473 nimg_nze = ri_data%nimg_nze
474
475 !We need to calculate the KS matrix for each periodic cell with index b: F_mu^0,nu^b
476 !F_mu^0,nu^b = -0.5 sum_a,c P_sigma^0,lambda^c (mu^0, sigma^a| P^0) V_P^0,Q^b (Q^b| nu^b lambda^a+c)
477 !with V_P^0,Q^b = (P^0|R^0)^-1 * (R^0|S^b) * (S^b|Q^b)^-1
478
479 !We use a local RI basis set for each atom in the system, which inlcudes RI basis elements for
480 !each neighboring atom standing within the KIND radius (decay of Gaussian with smallest exponent)
481
482 !We also limit the number of periodic images we consider accorrding to the HFX potentail in the
483 !RI basis, because if V_P^0,Q^b is zero everywhere, then image b can be ignored (RI basis less diffuse)
484
485 !We manage to calculate each KS matrix doing a double loop on iamges, and a double loop on atoms
486 !First, we pre-contract and store P_sigma^0,lambda^c (mu^0, sigma^a| P^0) (P^0|R^0)^-1 into T_mu^0,lambda^a+c,P^0
487 !Then, we loop over b_img, iatom, jatom to get (R^0|S^b)
488 !Finally, we do an additional loop over a+c images where we do (R^0|S^b) (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c)
489 !and the final contraction with T_mu^0,lambda^a+c,P^0
490
491 !Note that the 3-center integrals are pre-contracted with the RI metric, and that the same tensor can be used
492 !(mu^0, sigma^a| P^0) (P^0|R^0) <===> (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c) by relabelling the images
493
494 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
495 CALL section_vals_val_get(hfx_section, "KP_USE_DELTA_P", l_val=use_delta_p)
496
497 !By default, build the density tensor based on the difference of this SCF P and that of the prev. SCF
498 pfac = -1.0_dp
499 IF (.NOT. use_delta_p) pfac = 0.0_dp
500 CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, pfac, ri_data, qs_env)
501
502 ALLOCATE (ks_t(nspins, nimg))
503 DO i_img = 1, nimg
504 DO i_spin = 1, nspins
505 CALL dbt_create(ri_data%ks_t(1, 1), ks_t(i_spin, i_img))
506 END DO
507 END DO
508
509 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
510 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
511
512 !First we calculate and store T^1_mu^0,lambda^a+c,P = P_mu^0,lambda^c * (mu_0 sigma^a | P^0) (P^0|R^0)^-1
513 !To avoid doing nimg**2 tiny contractions that do not scale well with a large number of CPUs,
514 !we instead do a single loop over the a+c image index. For each a+c, we get a list of allowed
515 !combination of a,c indices. Then we build TAS tensors P_mu^0,lambda^c with all concerned c's
516 !and (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 with all a's. Then we perform a single contraction with larger tensors,
517 !were the sum over a,c is automatically taken care of
518 ALLOCATE (t_3c_apc(nspins, nimg))
519 DO i_img = 1, nimg
520 DO i_spin = 1, nspins
521 CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
522 END DO
523 END DO
524 CALL contract_pmat_3c(t_3c_apc, ri_data%rho_ao_t, ri_data, qs_env)
525
526 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
527 CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
528 CALL section_vals_val_get(hfx_section, "KP_STACK_SIZE", i_val=batch_size)
529 ri_data%kp_stack_size = batch_size
530
531 IF (mod(para_env%num_pe, ngroups) .NE. 0) THEN
532 cpwarn("KP_NGROUPS must be an integer divisor of the total number of MPI ranks. It was set to 1.")
533 ngroups = 1
534 CALL section_vals_val_set(hfx_section, "KP_NGROUPS", i_val=ngroups)
535 END IF
536 IF ((mod(ngroups, natom) .NE. 0) .AND. (mod(natom, ngroups) .NE. 0) .AND. geometry_did_change) THEN
537 IF (ngroups > 1) THEN
538 cpwarn("Better load balancing is reached if NGROUPS is a multiple/divisor of the number of atoms")
539 END IF
540 END IF
541 group_size = para_env%num_pe/ngroups
542 igroup = para_env%mepos/group_size
543
544 ALLOCATE (para_env_sub)
545 CALL para_env_sub%from_split(para_env, igroup)
546 CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
547
548 ! The sparsity pattern of each iatom, jatom pair, on each b_img, and on which subgroup
549 ALLOCATE (sparsity_pattern(natom, natom, nimg))
550 CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
551 CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
552
553 !Get all the required tensors in the subgroups
554 ALLOCATE (mat_2c_pot(nimg), ks_t_sub(nspins, nimg), t_2c_ao_tmp(1), ks_t_split(2), t_2c_work(3))
555 CALL get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
556 group_size, ngroups, para_env, para_env_sub, ri_data)
557
558 ALLOCATE (t_3c_int(nimg), t_3c_apc_sub(nspins, nimg), t_3c_work_2(3), t_3c_work_3(3))
559 CALL get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
560 group_size, ngroups, para_env, para_env_sub, ri_data)
561
562 !We go atom by atom, therefore there is an automatic batching along that direction
563 !Also, because we stack the 3c tensors nimg times, we naturally do some batching there too
564 ALLOCATE (batch_ranges_at(natom + 1))
565 batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
566 iatom = 0
567 DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
568 IF (idx_to_at_ao(iblk) == iatom + 1) THEN
569 iatom = iatom + 1
570 batch_ranges_at(iatom) = iblk
571 END IF
572 END DO
573
574 n_batch_nze = nimg_nze/batch_size
575 IF (modulo(nimg_nze, batch_size) .NE. 0) n_batch_nze = n_batch_nze + 1
576 ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
577 DO i_batch = 1, n_batch_nze
578 batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
579 END DO
580 batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
581
582 CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
583 CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
584 CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
585 CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
586
587 t1 = m_walltime()
588 ri_data%kp_cost(:, :, :) = 0.0_dp
589 ALLOCATE (iapc_pairs(nimg, 2))
590 DO b_img = 1, nimg
591 CALL dbt_batched_contract_init(ks_t_split(1))
592 CALL dbt_batched_contract_init(ks_t_split(2))
593 DO jatom = 1, natom
594 DO iatom = 1, natom
595 IF (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup) cycle
596 pref = 1.0_dp
597 IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp
598
599 !measure the cost of the given i, j, b configuration
600 t3 = m_walltime()
601
602 !Get the proper HFX potential 2c integrals (R_i^0|S_j^b)
603 CALL timeset(routinen//"_2c", handle2)
604 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
605 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
606 dbcsr_template=dbcsr_template)
607 CALL dbt_copy(t_2c_work(1), t_2c_work(2), move_data=.true.) !move to split blocks
608 CALL dbt_filter(t_2c_work(2), ri_data%filter_eps)
609 CALL timestop(handle2)
610
611 CALL dbt_batched_contract_init(t_2c_work(2))
612 CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env)
613 CALL timeset(routinen//"_3c", handle2)
614
615 !Stack the (S^b|Q^b)^-1 * (Q^b| nu^b lambda^a+c) integrals over a+c and multiply by (R_i^0|S_j^b)
616 DO i_batch = 1, n_batch_nze
617 CALL fill_3c_stack(t_3c_work_3(3), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
618 filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_ao, &
619 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
620 CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1), move_data=.true.)
621
622 CALL dbt_contract(1.0_dp, t_2c_work(2), t_3c_work_3(1), &
623 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
624 contract_1=[2], notcontract_1=[1], &
625 contract_2=[1], notcontract_2=[2, 3], &
626 filter_eps=ri_data%filter_eps, flop=nflop)
627 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
628 CALL dbt_copy(t_3c_work_3(2), t_3c_work_2(2), order=[2, 1, 3], move_data=.true.)
629 CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1))
630
631 !Stack the P_sigma^a,lambda^a+c * (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 integrals over a+c and contract
632 !to get the final block of the KS matrix
633 DO i_spin = 1, nspins
634 CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
635 ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_ao, &
636 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
637 CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
638 IF (nze == 0) cycle
639 CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.true.)
640 CALL dbt_contract(-pref*fac, t_3c_work_2(1), t_3c_work_2(2), &
641 1.0_dp, ks_t_split(i_spin), map_1=[1], map_2=[2], &
642 contract_1=[2, 3], notcontract_1=[1], &
643 contract_2=[2, 3], notcontract_2=[1], &
644 filter_eps=ri_data%filter_eps, &
645 move_data=i_spin == nspins, flop=nflop)
646 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
647 END DO
648 END DO !i_batch
649 CALL timestop(handle2)
650 CALL dbt_batched_contract_finalize(t_2c_work(2))
651
652 t4 = m_walltime()
653 ri_data%kp_cost(iatom, jatom, b_img) = t4 - t3
654 END DO !iatom
655 END DO !jatom
656 CALL dbt_batched_contract_finalize(ks_t_split(1))
657 CALL dbt_batched_contract_finalize(ks_t_split(2))
658
659 DO i_spin = 1, nspins
660 CALL dbt_copy(ks_t_split(i_spin), t_2c_ao_tmp(1), move_data=.true.)
661 CALL dbt_copy(t_2c_ao_tmp(1), ks_t_sub(i_spin, b_img), summation=.true.)
662 END DO
663 END DO !b_img
664 CALL dbt_batched_contract_finalize(t_3c_work_3(1))
665 CALL dbt_batched_contract_finalize(t_3c_work_3(2))
666 CALL dbt_batched_contract_finalize(t_3c_work_2(1))
667 CALL dbt_batched_contract_finalize(t_3c_work_2(2))
668 CALL para_env%sync()
669 CALL para_env%sum(ri_data%dbcsr_nflop)
670 CALL para_env%sum(ri_data%kp_cost)
671 t2 = m_walltime()
672 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
673
674 !transfer KS tensor from subgroup to main group
675 CALL gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
676
677 !Keep the 3c integrals on the subgroups to avoid communication at next SCF step
678 DO i_img = 1, nimg
679 CALL dbt_copy(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img), move_data=.true.)
680 END DO
681
682 !clean-up subgroup tensors
683 CALL dbt_destroy(t_2c_ao_tmp(1))
684 CALL dbt_destroy(ks_t_split(1))
685 CALL dbt_destroy(ks_t_split(2))
686 CALL dbt_destroy(t_2c_work(1))
687 CALL dbt_destroy(t_2c_work(2))
688 CALL dbt_destroy(t_3c_work_2(1))
689 CALL dbt_destroy(t_3c_work_2(2))
690 CALL dbt_destroy(t_3c_work_2(3))
691 CALL dbt_destroy(t_3c_work_3(1))
692 CALL dbt_destroy(t_3c_work_3(2))
693 CALL dbt_destroy(t_3c_work_3(3))
694 DO i_img = 1, nimg
695 CALL dbt_destroy(t_3c_int(i_img))
696 CALL dbcsr_release(mat_2c_pot(i_img))
697 DO i_spin = 1, nspins
698 CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
699 CALL dbt_destroy(ks_t_sub(i_spin, i_img))
700 END DO
701 END DO
702 IF (ASSOCIATED(dbcsr_template)) THEN
703 CALL dbcsr_release(dbcsr_template)
704 DEALLOCATE (dbcsr_template)
705 END IF
706
707 !End of subgroup parallelization
708 CALL cp_blacs_env_release(blacs_env_sub)
709 CALL para_env_sub%free()
710 DEALLOCATE (para_env_sub)
711
712 !Currently, rho_ao_t holds the density difference (wrt to pref SCF step).
713 !ks_t also hold that diff, while only having half the blocks => need to add to prev ks_t and symmetrize
714 !We need the full thing for the energy, on the next SCF step
715 CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
716 DO i_spin = 1, nspins
717 DO b_img = 1, nimg
718 CALL dbt_copy(ks_t(i_spin, b_img), ri_data%ks_t(i_spin, b_img), summation=.true.)
719
720 !desymmetrize
721 mb_img = get_opp_index(b_img, qs_env)
722 IF (mb_img > 0 .AND. mb_img .LE. nimg) THEN
723 CALL dbt_copy(ks_t(i_spin, mb_img), ri_data%ks_t(i_spin, b_img), order=[2, 1], summation=.true.)
724 END IF
725 END DO
726 END DO
727 DO b_img = 1, nimg
728 DO i_spin = 1, nspins
729 CALL dbt_destroy(ks_t(i_spin, b_img))
730 END DO
731 END DO
732
733 !calculate the energy
734 CALL dbt_create(ri_data%ks_t(1, 1), t_2c_ao_tmp(1))
735 CALL dbcsr_create(tmp, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_symmetric)
736 CALL dbcsr_create(ks_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
737 CALL dbcsr_create(rho_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
738 ehfx = 0.0_dp
739 DO i_img = 1, nimg
740 DO i_spin = 1, nspins
741 CALL dbt_filter(ri_data%ks_t(i_spin, i_img), ri_data%filter_eps)
742 CALL dbt_copy(ri_data%ks_t(i_spin, i_img), t_2c_ao_tmp(1))
743 CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), ks_desymm)
744 CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), tmp)
745 CALL dbcsr_add(ks_matrix(i_spin, i_img)%matrix, tmp, 1.0_dp, 1.0_dp)
746
747 CALL dbt_copy(ri_data%rho_ao_t(i_spin, i_img), t_2c_ao_tmp(1))
748 CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), rho_desymm)
749
750 CALL dbcsr_dot(ks_desymm, rho_desymm, etmp)
751 ehfx = ehfx + 0.5_dp*etmp
752
753 IF (.NOT. use_delta_p) CALL dbt_clear(ri_data%ks_t(i_spin, i_img))
754 END DO
755 END DO
756 CALL dbcsr_release(rho_desymm)
757 CALL dbcsr_release(ks_desymm)
758 CALL dbcsr_release(tmp)
759 CALL dbt_destroy(t_2c_ao_tmp(1))
760
761 CALL timestop(handle)
762
763 END SUBROUTINE hfx_ri_update_ks_kp
764
765! **************************************************************************************************
766!> \brief Update the K-points RI-HFX forces
767!> \param qs_env ...
768!> \param ri_data ...
769!> \param nspins ...
770!> \param hf_fraction ...
771!> \param rho_ao ...
772!> \param use_virial ...
773!> \note Because this routine uses stored quantities calculated in the energy calculation, they should
774!> always be called by pairs, and with the same input densities
775! **************************************************************************************************
776 SUBROUTINE hfx_ri_update_forces_kp(qs_env, ri_data, nspins, hf_fraction, rho_ao, use_virial)
777
778 TYPE(qs_environment_type), POINTER :: qs_env
779 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
780 INTEGER, INTENT(IN) :: nspins
781 REAL(kind=dp), INTENT(IN) :: hf_fraction
782 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
783 LOGICAL, INTENT(IN), OPTIONAL :: use_virial
784
785 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_forces_kp'
786
787 INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_loop, i_spin, &
788 i_xyz, iatom, iblk, igroup, j_xyz, jatom, k_xyz, n_batch, natom, ngroups, nimg, nimg_nze
789 INTEGER(int_8) :: nflop, nze
790 INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_ranges_at, &
791 batch_ranges_nze, dist1, dist2, &
792 i_images, idx_to_at_ao, idx_to_at_ri, &
793 kind_of
794 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
795 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: force_pattern, sparsity_pattern
796 INTEGER, DIMENSION(2, 1) :: bounds_iat, bounds_jat
797 LOGICAL :: use_virial_prv
798 REAL(dp) :: fac, occ, pref, t1, t2
799 REAL(dp), DIMENSION(3, 3) :: work_virial
800 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
801 TYPE(cell_type), POINTER :: cell
802 TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
803 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
804 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_pot, mat_der_pot_sub
805 TYPE(dbcsr_type), POINTER :: dbcsr_template
806 TYPE(dbt_type) :: t_2c_r, t_2c_r_split
807 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_bint, t_2c_binv, t_2c_der_pot, &
808 t_2c_inv, t_2c_metric, t_2c_work, &
809 t_3c_der_stack, t_3c_work_2, &
810 t_3c_work_3
811 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
812 t_2c_der_metric_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_ao, t_3c_der_ao_sub, t_3c_der_ri, &
813 t_3c_der_ri_sub
814 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
815 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
816 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
817 TYPE(section_vals_type), POINTER :: hfx_section
818 TYPE(virial_type), POINTER :: virial
819
820 NULLIFY (para_env, para_env_sub, hfx_section, blacs_env_sub, dbcsr_template, force, atomic_kind_set, &
821 virial, particle_set, cell)
822
823 CALL timeset(routinen, handle)
824
825 use_virial_prv = .false.
826 IF (PRESENT(use_virial)) use_virial_prv = use_virial
827
828 IF (nspins == 1) THEN
829 fac = 0.5_dp*hf_fraction
830 ELSE
831 fac = 1.0_dp*hf_fraction
832 END IF
833
834 CALL get_qs_env(qs_env, natom=natom, para_env=para_env, force=force, cell=cell, virial=virial, &
835 atomic_kind_set=atomic_kind_set, particle_set=particle_set)
836 CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
837
838 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
839 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
840
841 ALLOCATE (idx_to_at_ri(SIZE(ri_data%bsizes_RI_split)))
842 CALL get_idx_to_atom(idx_to_at_ri, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
843
844 nimg = ri_data%nimg
845 ALLOCATE (t_3c_der_ri(nimg, 3), t_3c_der_ao(nimg, 3), mat_der_pot(nimg, 3), t_2c_der_metric(natom, 3))
846
847 !We assume that the integrals are available from the SCF
848 !pre-calculate the derivs. 3c tensors as (P^0| sigma^a mu^0), with t_3c_der_AO holding deriv wrt mu^0
849 CALL precalc_derivatives(t_3c_der_ri, t_3c_der_ao, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
850
851 !Calculate the density matrix at each image
852 ALLOCATE (rho_ao_t(nspins, nimg))
853 CALL create_2c_tensor(rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
854 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
855 name="(AO | AO)")
856 DEALLOCATE (dist1, dist2)
857 IF (nspins == 2) CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(2, 1))
858 DO i_img = 2, nimg
859 DO i_spin = 1, nspins
860 CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(i_spin, i_img))
861 END DO
862 END DO
863 CALL get_pmat_images(rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
864
865 !Contract integrals with the density matrix
866 ALLOCATE (t_3c_apc(nspins, nimg))
867 DO i_img = 1, nimg
868 DO i_spin = 1, nspins
869 CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
870 END DO
871 END DO
872 CALL contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
873
874 !Setup the subgroups
875 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
876 CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
877 group_size = para_env%num_pe/ngroups
878 igroup = para_env%mepos/group_size
879
880 ALLOCATE (para_env_sub)
881 CALL para_env_sub%from_split(para_env, igroup)
882 CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
883
884 !Get the ususal sparsity pattern
885 ALLOCATE (sparsity_pattern(natom, natom, nimg))
886 CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
887 CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
888
889 !Get the 2-center quantities in the subgroups (note: main group derivs are deleted wihtin)
890 ALLOCATE (t_2c_inv(natom), mat_2c_pot(nimg), rho_ao_t_sub(nspins, nimg), t_2c_work(5), &
891 t_2c_der_metric_sub(natom, 3), mat_der_pot_sub(nimg, 3), t_2c_bint(natom), &
892 t_2c_metric(natom), t_2c_binv(natom))
893 CALL get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
894 rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
895 mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
896 CALL dbt_create(t_2c_work(1), t_2c_r) !nRI x nRI
897 CALL dbt_create(t_2c_work(5), t_2c_r_split) !nRI x nRI with split blocks
898
899 ALLOCATE (t_2c_der_pot(3))
900 DO i_xyz = 1, 3
901 CALL dbt_create(t_2c_r, t_2c_der_pot(i_xyz))
902 END DO
903
904 !Get the 3-center quantities in the subgroups. The integrals and t_3c_apc already there
905 ALLOCATE (t_3c_work_2(3), t_3c_work_3(4), t_3c_der_stack(6), t_3c_der_ao_sub(nimg, 3), &
906 t_3c_der_ri_sub(nimg, 3), t_3c_apc_sub(nspins, nimg))
907 CALL get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_ao, t_3c_der_ao_sub, &
908 t_3c_der_ri, t_3c_der_ri_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_stack, &
909 group_size, ngroups, para_env, para_env_sub, ri_data)
910
911 !Set up batched contraction (go atom by atom)
912 ALLOCATE (batch_ranges_at(natom + 1))
913 batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
914 iatom = 0
915 DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
916 IF (idx_to_at_ao(iblk) == iatom + 1) THEN
917 iatom = iatom + 1
918 batch_ranges_at(iatom) = iblk
919 END IF
920 END DO
921
922 CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
923 CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
924 CALL dbt_batched_contract_init(t_3c_work_3(3), batch_range_2=batch_ranges_at)
925 CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
926 CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
927
928 !Preparing for the stacking of 3c tensors
929 nimg_nze = ri_data%nimg_nze
930 batch_size = ri_data%kp_stack_size
931 n_batch = nimg_nze/batch_size
932 IF (modulo(nimg_nze, batch_size) .NE. 0) n_batch = n_batch + 1
933 ALLOCATE (batch_ranges_nze(n_batch + 1))
934 DO i_batch = 1, n_batch
935 batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
936 END DO
937 batch_ranges_nze(n_batch + 1) = nimg_nze + 1
938
939 !Applying the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 from left and right
940 !And keep the bump on LHS only version as well, with B*M^-1 = (M^-1*B)^T
941 DO iatom = 1, natom
942 CALL dbt_create(t_2c_inv(iatom), t_2c_binv(iatom))
943 CALL dbt_copy(t_2c_inv(iatom), t_2c_binv(iatom))
944 CALL apply_bump(t_2c_binv(iatom), iatom, ri_data, qs_env, from_left=.true., from_right=.false.)
945 CALL apply_bump(t_2c_inv(iatom), iatom, ri_data, qs_env, from_left=.true., from_right=.true.)
946 END DO
947
948 t1 = m_walltime()
949 work_virial = 0.0_dp
950 ALLOCATE (iapc_pairs(nimg, 2), i_images(nimg))
951 ALLOCATE (force_pattern(natom, natom, nimg))
952 force_pattern(:, :, :) = -1
953 !We proceed with 2 loops: one over the sparsity pattern from the SCF, one over the rest
954 !We use the SCF cost model for the first loop, while we calculate the cost of the upcoming loop
955 DO i_loop = 1, 2
956 DO b_img = 1, nimg
957 DO jatom = 1, natom
958 DO iatom = 1, natom
959
960 pref = -0.5_dp*fac
961 IF (i_loop == 1 .AND. (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup)) cycle
962 IF (i_loop == 2 .AND. (.NOT. force_pattern(iatom, jatom, b_img) == igroup)) cycle
963
964 !Get the proper HFX potential 2c integrals (R_i^0|S_j^b), times (S_j^b|Q_j^b)^-1
965 CALL timeset(routinen//"_2c_1", handle2)
966 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
967 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
968 dbcsr_template=dbcsr_template)
969 CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_inv(jatom), &
970 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
971 contract_1=[2], notcontract_1=[1], &
972 contract_2=[1], notcontract_2=[2], &
973 filter_eps=ri_data%filter_eps, flop=nflop)
974 CALL dbt_copy(t_2c_work(2), t_2c_work(5), move_data=.true.) !move to split blocks
975 CALL dbt_filter(t_2c_work(5), ri_data%filter_eps)
976 CALL timestop(handle2)
977
978 CALL timeset(routinen//"_3c", handle2)
979 bounds_iat(:, 1) = [sum(ri_data%bsizes_AO(1:iatom - 1)) + 1, sum(ri_data%bsizes_AO(1:iatom))]
980 bounds_jat(:, 1) = [sum(ri_data%bsizes_AO(1:jatom - 1)) + 1, sum(ri_data%bsizes_AO(1:jatom))]
981 CALL dbt_clear(t_2c_r_split)
982
983 DO i_spin = 1, nspins
984 CALL dbt_batched_contract_init(rho_ao_t_sub(i_spin, b_img))
985 END DO
986
987 CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env, i_images) !i = a+c-b
988 DO i_batch = 1, n_batch
989
990 !Stack the 3c derivatives to take the trace later on
991 DO i_xyz = 1, 3
992 CALL dbt_clear(t_3c_der_stack(i_xyz))
993 CALL fill_3c_stack(t_3c_der_stack(i_xyz), t_3c_der_ri_sub(:, i_xyz), &
994 iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
995 filter_dim=2, idx_to_at=idx_to_at_ao, &
996 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
997
998 CALL dbt_clear(t_3c_der_stack(3 + i_xyz))
999 CALL fill_3c_stack(t_3c_der_stack(3 + i_xyz), t_3c_der_ao_sub(:, i_xyz), &
1000 iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
1001 filter_dim=2, idx_to_at=idx_to_at_ao, &
1002 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1003 END DO
1004
1005 DO i_spin = 1, nspins
1006 !stack the t_3c_apc tensors
1007 CALL dbt_clear(t_3c_work_2(3))
1008 CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
1009 ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_ao, &
1010 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1011 CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
1012 IF (nze == 0) cycle
1013 CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.true.)
1014
1015 !Contract with the second density matrix: P_mu^0,nu^b * t_3c_apc,
1016 !where t_3c_apc = P_sigma^a,lambda^a+c (mu^0 P^0 sigma^a) *(P^0|R^0)^-1 (stacked along a+c)
1017 CALL dbt_contract(1.0_dp, rho_ao_t_sub(i_spin, b_img), t_3c_work_2(1), &
1018 0.0_dp, t_3c_work_2(2), map_1=[1], map_2=[2, 3], &
1019 contract_1=[1], notcontract_1=[2], &
1020 contract_2=[1], notcontract_2=[2, 3], &
1021 bounds_1=bounds_iat, bounds_2=bounds_jat, &
1022 filter_eps=ri_data%filter_eps, flop=nflop)
1023 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1024
1025 CALL get_tensor_occupancy(t_3c_work_2(2), nze, occ)
1026 IF (nze == 0) cycle
1027
1028 !Contract with V_PQ so that we can take the trace with (Q^b|nu^b lmabda^a+c)^(x)
1029 CALL dbt_copy(t_3c_work_2(2), t_3c_work_3(1), order=[2, 1, 3], move_data=.true.)
1030 CALL dbt_batched_contract_init(t_2c_work(5))
1031 CALL dbt_contract(1.0_dp, t_2c_work(5), t_3c_work_3(1), &
1032 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
1033 contract_1=[1], notcontract_1=[2], &
1034 contract_2=[1], notcontract_2=[2, 3], &
1035 filter_eps=ri_data%filter_eps, flop=nflop)
1036 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1037 CALL dbt_batched_contract_finalize(t_2c_work(5))
1038
1039 !Contract with the 3c derivatives to get the force/virial
1040 CALL dbt_copy(t_3c_work_3(2), t_3c_work_3(4), move_data=.true.)
1041 IF (use_virial_prv) THEN
1042 CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1043 t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1044 idx_to_at_ri, idx_to_at_ao, i_images, &
1045 batch_ranges_nze(i_batch), 2.0_dp*pref, &
1046 ri_data, qs_env, work_virial, cell, particle_set)
1047 ELSE
1048 CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1049 t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1050 idx_to_at_ri, idx_to_at_ao, i_images, &
1051 batch_ranges_nze(i_batch), 2.0_dp*pref, &
1052 ri_data, qs_env)
1053 END IF
1054 CALL dbt_clear(t_3c_work_3(4))
1055
1056 !Contract with the 3-center integrals in order to have a matrix R_PQ such that
1057 !we can take the trace sum_PQ R_PQ (P^0|Q^b)^(x)
1058 IF (i_loop == 2) cycle
1059
1060 !Stack the 3c integrals
1061 CALL fill_3c_stack(t_3c_work_3(4), ri_data%kp_t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
1062 filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_ao, &
1063 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1064 CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(3), move_data=.true.)
1065
1066 CALL dbt_batched_contract_init(t_2c_r_split)
1067 CALL dbt_contract(1.0_dp, t_3c_work_3(1), t_3c_work_3(3), &
1068 1.0_dp, t_2c_r_split, map_1=[1], map_2=[2], &
1069 contract_1=[2, 3], notcontract_1=[1], &
1070 contract_2=[2, 3], notcontract_2=[1], &
1071 filter_eps=ri_data%filter_eps, flop=nflop)
1072 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1073 CALL dbt_batched_contract_finalize(t_2c_r_split)
1074 CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(1))
1075 END DO
1076 END DO
1077 DO i_spin = 1, nspins
1078 CALL dbt_batched_contract_finalize(rho_ao_t_sub(i_spin, b_img))
1079 END DO
1080 CALL timestop(handle2)
1081
1082 IF (i_loop == 2) cycle
1083 pref = 2.0_dp*pref
1084 IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp*pref
1085
1086 CALL timeset(routinen//"_2c_2", handle2)
1087 !Note that the derivatives are in atomic block format (not split)
1088 CALL dbt_copy(t_2c_r_split, t_2c_r, move_data=.true.)
1089
1090 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
1091 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
1092 dbcsr_template=dbcsr_template)
1093
1094 !We have to calculate: S^-1(iat) * R_PQ * S^-1(jat) to trace with HFX pot der
1095 ! + R_PQ * S^-1(jat) * pot^T to trace with S^(x) (iat)
1096 ! + pot^T * S^-1(iat) *R_PQ to trace with S^(x) (jat)
1097
1098 !Because 3c tensors are all precontracted with the inverse RI metric,
1099 !t_2c_R is currently implicitely multiplied by S^-1(iat) from the left
1100 !and S^-1(jat) from the right, directly in the proper format for the trace
1101 !with the HFX potential derivative
1102
1103 !Trace with HFX pot deriv, that we need to build first
1104 DO i_xyz = 1, 3
1105 CALL get_ext_2c_int(t_2c_der_pot(i_xyz), mat_der_pot_sub(:, i_xyz), iatom, jatom, &
1106 b_img, ri_data, qs_env, blacs_env_ext=blacs_env_sub, &
1107 para_env_ext=para_env_sub, dbcsr_template=dbcsr_template)
1108 END DO
1109
1110 IF (use_virial_prv) THEN
1111 CALL get_2c_der_force(force, t_2c_r, t_2c_der_pot, atom_of_kind, kind_of, &
1112 b_img, pref, ri_data, qs_env, work_virial, cell, particle_set)
1113 ELSE
1114 CALL get_2c_der_force(force, t_2c_r, t_2c_der_pot, atom_of_kind, kind_of, &
1115 b_img, pref, ri_data, qs_env)
1116 END IF
1117
1118 DO i_xyz = 1, 3
1119 CALL dbt_clear(t_2c_der_pot(i_xyz))
1120 END DO
1121
1122 !R_PQ * S^-1(jat) * pot^T (=A)
1123 CALL dbt_contract(1.0_dp, t_2c_metric(iatom), t_2c_r, & !get rid of implicit S^-1(iat)
1124 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1125 contract_1=[2], notcontract_1=[1], &
1126 contract_2=[1], notcontract_2=[2], &
1127 filter_eps=ri_data%filter_eps, flop=nflop)
1128 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1129 CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_work(1), &
1130 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1131 contract_1=[2], notcontract_1=[1], &
1132 contract_2=[2], notcontract_2=[1], &
1133 filter_eps=ri_data%filter_eps, flop=nflop)
1134 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1135
1136 !With the RI bump function, things get more complex. M = (S|P)_D + B*(S|P)_OD*B
1137 !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1138 CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(iatom), &
1139 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1140 contract_1=[2], notcontract_1=[1], &
1141 contract_2=[1], notcontract_2=[2], &
1142 filter_eps=ri_data%filter_eps, flop=nflop)
1143 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1144
1145 CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1146 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1147 contract_1=[1], notcontract_1=[2], &
1148 contract_2=[1], notcontract_2=[2], &
1149 filter_eps=ri_data%filter_eps, flop=nflop)
1150 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1151
1152 CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.true.)
1153 CALL get_2c_bump_forces(force, t_2c_work(4), iatom, atom_of_kind, kind_of, pref, &
1154 ri_data, qs_env, work_virial)
1155
1156 !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1157 CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(2), &
1158 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1159 contract_1=[1], notcontract_1=[2], &
1160 contract_2=[1], notcontract_2=[2], &
1161 filter_eps=ri_data%filter_eps, flop=nflop)
1162 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1163
1164 IF (use_virial_prv) THEN
1165 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1166 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1167 diag=.true., offdiag=.false.)
1168 ELSE
1169 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1170 kind_of, 1, -pref, ri_data, qs_env, diag=.true., offdiag=.false.)
1171 END IF
1172
1173 !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1174 CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1175 CALL apply_bump(t_2c_work(2), iatom, ri_data, qs_env, from_left=.true., from_right=.true.)
1176
1177 IF (use_virial_prv) THEN
1178 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1179 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1180 diag=.false., offdiag=.true.)
1181 ELSE
1182 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1183 kind_of, 1, -pref, ri_data, qs_env, diag=.false., offdiag=.true.)
1184 END IF
1185
1186 !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1187 !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1188 CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(iatom), &
1189 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1190 contract_1=[2], notcontract_1=[1], &
1191 contract_2=[1], notcontract_2=[2], &
1192 filter_eps=ri_data%filter_eps, flop=nflop)
1193 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1194
1195 CALL dbt_contract(1.0_dp, t_2c_bint(iatom), t_2c_work(4), &
1196 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1197 contract_1=[1], notcontract_1=[2], &
1198 contract_2=[1], notcontract_2=[2], &
1199 filter_eps=ri_data%filter_eps, flop=nflop)
1200 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1201
1202 CALL get_2c_bump_forces(force, t_2c_work(2), iatom, atom_of_kind, kind_of, -pref, &
1203 ri_data, qs_env, work_virial)
1204
1205 ! pot^T * S^-1(iat) * R_PQ (=A)
1206 CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_r, &
1207 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1208 contract_1=[1], notcontract_1=[2], &
1209 contract_2=[1], notcontract_2=[2], &
1210 filter_eps=ri_data%filter_eps, flop=nflop)
1211 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1212
1213 CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_metric(jatom), & !get rid of implicit S^-1(jat)
1214 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1215 contract_1=[2], notcontract_1=[1], &
1216 contract_2=[1], notcontract_2=[2], &
1217 filter_eps=ri_data%filter_eps, flop=nflop)
1218 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1219
1220 !Do the same shenanigans with the S^(x) (jatom)
1221 !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1222 CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(jatom), &
1223 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1224 contract_1=[2], notcontract_1=[1], &
1225 contract_2=[1], notcontract_2=[2], &
1226 filter_eps=ri_data%filter_eps, flop=nflop)
1227 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1228
1229 CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1230 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1231 contract_1=[1], notcontract_1=[2], &
1232 contract_2=[1], notcontract_2=[2], &
1233 filter_eps=ri_data%filter_eps, flop=nflop)
1234 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1235
1236 CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.true.)
1237 CALL get_2c_bump_forces(force, t_2c_work(4), jatom, atom_of_kind, kind_of, pref, &
1238 ri_data, qs_env, work_virial)
1239
1240 !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1241 CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(2), &
1242 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1243 contract_1=[1], notcontract_1=[2], &
1244 contract_2=[1], notcontract_2=[2], &
1245 filter_eps=ri_data%filter_eps, flop=nflop)
1246 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1247
1248 IF (use_virial_prv) THEN
1249 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1250 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1251 diag=.true., offdiag=.false.)
1252 ELSE
1253 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1254 kind_of, 1, -pref, ri_data, qs_env, diag=.true., offdiag=.false.)
1255 END IF
1256
1257 !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1258 CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1259 CALL apply_bump(t_2c_work(2), jatom, ri_data, qs_env, from_left=.true., from_right=.true.)
1260
1261 IF (use_virial_prv) THEN
1262 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1263 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1264 diag=.false., offdiag=.true.)
1265 ELSE
1266 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1267 kind_of, 1, -pref, ri_data, qs_env, diag=.false., offdiag=.true.)
1268 END IF
1269
1270 !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1271 !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1272 CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(jatom), &
1273 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1274 contract_1=[2], notcontract_1=[1], &
1275 contract_2=[1], notcontract_2=[2], &
1276 filter_eps=ri_data%filter_eps, flop=nflop)
1277 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1278
1279 CALL dbt_contract(1.0_dp, t_2c_bint(jatom), t_2c_work(4), &
1280 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1281 contract_1=[1], notcontract_1=[2], &
1282 contract_2=[1], notcontract_2=[2], &
1283 filter_eps=ri_data%filter_eps, flop=nflop)
1284 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1285
1286 CALL get_2c_bump_forces(force, t_2c_work(2), jatom, atom_of_kind, kind_of, -pref, &
1287 ri_data, qs_env, work_virial)
1288
1289 CALL timestop(handle2)
1290 END DO !iatom
1291 END DO !jatom
1292 END DO !b_img
1293
1294 IF (i_loop == 1) THEN
1295 CALL update_pattern_to_forces(force_pattern, sparsity_pattern, ngroups, ri_data, qs_env)
1296 END IF
1297 END DO !i_loop
1298
1299 CALL dbt_batched_contract_finalize(t_3c_work_3(1))
1300 CALL dbt_batched_contract_finalize(t_3c_work_3(2))
1301 CALL dbt_batched_contract_finalize(t_3c_work_3(3))
1302 CALL dbt_batched_contract_finalize(t_3c_work_2(1))
1303 CALL dbt_batched_contract_finalize(t_3c_work_2(2))
1304
1305 IF (use_virial_prv) THEN
1306 DO k_xyz = 1, 3
1307 DO j_xyz = 1, 3
1308 DO i_xyz = 1, 3
1309 virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
1310 + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
1311 END DO
1312 END DO
1313 END DO
1314 END IF
1315
1316 !End of subgroup parallelization
1317 CALL cp_blacs_env_release(blacs_env_sub)
1318 CALL para_env_sub%free()
1319 DEALLOCATE (para_env_sub)
1320
1321 CALL para_env%sync()
1322 t2 = m_walltime()
1323 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1324
1325 !clean-up
1326 IF (ASSOCIATED(dbcsr_template)) THEN
1327 CALL dbcsr_release(dbcsr_template)
1328 DEALLOCATE (dbcsr_template)
1329 END IF
1330 CALL dbt_destroy(t_2c_r)
1331 CALL dbt_destroy(t_2c_r_split)
1332 CALL dbt_destroy(t_2c_work(1))
1333 CALL dbt_destroy(t_2c_work(2))
1334 CALL dbt_destroy(t_2c_work(3))
1335 CALL dbt_destroy(t_2c_work(4))
1336 CALL dbt_destroy(t_2c_work(5))
1337 CALL dbt_destroy(t_3c_work_2(1))
1338 CALL dbt_destroy(t_3c_work_2(2))
1339 CALL dbt_destroy(t_3c_work_2(3))
1340 CALL dbt_destroy(t_3c_work_3(1))
1341 CALL dbt_destroy(t_3c_work_3(2))
1342 CALL dbt_destroy(t_3c_work_3(3))
1343 CALL dbt_destroy(t_3c_work_3(4))
1344 CALL dbt_destroy(t_3c_der_stack(1))
1345 CALL dbt_destroy(t_3c_der_stack(2))
1346 CALL dbt_destroy(t_3c_der_stack(3))
1347 CALL dbt_destroy(t_3c_der_stack(4))
1348 CALL dbt_destroy(t_3c_der_stack(5))
1349 CALL dbt_destroy(t_3c_der_stack(6))
1350 DO i_xyz = 1, 3
1351 CALL dbt_destroy(t_2c_der_pot(i_xyz))
1352 END DO
1353 DO iatom = 1, natom
1354 CALL dbt_destroy(t_2c_inv(iatom))
1355 CALL dbt_destroy(t_2c_binv(iatom))
1356 CALL dbt_destroy(t_2c_bint(iatom))
1357 CALL dbt_destroy(t_2c_metric(iatom))
1358 DO i_xyz = 1, 3
1359 CALL dbt_destroy(t_2c_der_metric_sub(iatom, i_xyz))
1360 END DO
1361 END DO
1362 DO i_img = 1, nimg
1363 CALL dbcsr_release(mat_2c_pot(i_img))
1364 DO i_spin = 1, nspins
1365 CALL dbt_destroy(rho_ao_t_sub(i_spin, i_img))
1366 CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
1367 END DO
1368 END DO
1369 DO i_xyz = 1, 3
1370 DO i_img = 1, nimg
1371 CALL dbt_destroy(t_3c_der_ri_sub(i_img, i_xyz))
1372 CALL dbt_destroy(t_3c_der_ao_sub(i_img, i_xyz))
1373 CALL dbcsr_release(mat_der_pot_sub(i_img, i_xyz))
1374 END DO
1375 END DO
1376
1377 CALL timestop(handle)
1378
1379 END SUBROUTINE hfx_ri_update_forces_kp
1380
1381! **************************************************************************************************
1382!> \brief A routine the applies the RI bump matrix from the left and/or the right, given an input
1383!> matrix and the central RI atom. We assume atomic block sizes
1384!> \param t_2c_inout ...
1385!> \param atom_i ...
1386!> \param ri_data ...
1387!> \param qs_env ...
1388!> \param from_left ...
1389!> \param from_right ...
1390!> \param debump ...
1391! **************************************************************************************************
1392 SUBROUTINE apply_bump(t_2c_inout, atom_i, ri_data, qs_env, from_left, from_right, debump)
1393 TYPE(dbt_type), INTENT(INOUT) :: t_2c_inout
1394 INTEGER, INTENT(IN) :: atom_i
1395 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1396 TYPE(qs_environment_type), POINTER :: qs_env
1397 LOGICAL, INTENT(IN), OPTIONAL :: from_left, from_right, debump
1398
1399 INTEGER :: i_img, i_ri, iatom, ind(2), j_img, j_ri, &
1400 jatom, natom, nblks(2), nimg, nkind
1401 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1402 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1403 LOGICAL :: found, my_debump, my_left, my_right
1404 REAL(dp) :: bval, r0, r1, ri(3), rj(3), rref(3), &
1405 scoord(3)
1406 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1407 TYPE(cell_type), POINTER :: cell
1408 TYPE(dbt_iterator_type) :: iter
1409 TYPE(kpoint_type), POINTER :: kpoints
1410 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1411 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1412
1413 NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1414
1415 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1416 kpoints=kpoints, particle_set=particle_set)
1417 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1418
1419 my_debump = .false.
1420 IF (PRESENT(debump)) my_debump = debump
1421
1422 my_left = .false.
1423 IF (PRESENT(from_left)) my_left = from_left
1424
1425 my_right = .false.
1426 IF (PRESENT(from_right)) my_right = from_right
1427 cpassert(my_left .OR. my_right)
1428
1429 CALL dbt_get_info(t_2c_inout, nblks_total=nblks)
1430 cpassert(nblks(1) == ri_data%ncell_RI*natom)
1431 cpassert(nblks(2) == ri_data%ncell_RI*natom)
1432
1433 nimg = ri_data%nimg
1434
1435 !Loop over the RI cells and atoms, and apply bump accordingly
1436 r1 = ri_data%kp_RI_range
1437 r0 = ri_data%kp_bump_rad
1438 rref = pbc(particle_set(atom_i)%r, cell)
1439
1440!$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_inout,natom,ri_data,cell,particle_set,index_to_cell,my_left, &
1441!$OMP my_right,r0,r1,rref,my_debump) &
1442!$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,bval)
1443 CALL dbt_iterator_start(iter, t_2c_inout)
1444 DO WHILE (dbt_iterator_blocks_left(iter))
1445 CALL dbt_iterator_next_block(iter, ind)
1446 CALL dbt_get_block(t_2c_inout, ind, blk, found)
1447 IF (.NOT. found) cycle
1448
1449 i_ri = (ind(1) - 1)/natom + 1
1450 i_img = ri_data%RI_cell_to_img(i_ri)
1451 iatom = ind(1) - (i_ri - 1)*natom
1452
1453 CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
1454 CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
1455
1456 j_ri = (ind(2) - 1)/natom + 1
1457 j_img = ri_data%RI_cell_to_img(j_ri)
1458 jatom = ind(2) - (j_ri - 1)*natom
1459
1460 CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1461 CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1462
1463 IF (.NOT. my_debump) THEN
1464 IF (my_left) blk(:, :) = blk(:, :)*bump(norm2(ri - rref), r0, r1)
1465 IF (my_right) blk(:, :) = blk(:, :)*bump(norm2(rj - rref), r0, r1)
1466 ELSE
1467 !Note: by construction, the bump function is never quite zero, as its range is the same
1468 ! as that of the extended RI basis (but we are safe)
1469 bval = bump(norm2(ri - rref), r0, r1)
1470 IF (my_left .AND. bval > epsilon(1.0_dp)) blk(:, :) = blk(:, :)/bval
1471 bval = bump(norm2(rj - rref), r0, r1)
1472 IF (my_right .AND. bval > epsilon(1.0_dp)) blk(:, :) = blk(:, :)/bval
1473 END IF
1474
1475 CALL dbt_put_block(t_2c_inout, ind, shape(blk), blk)
1476
1477 DEALLOCATE (blk)
1478 END DO
1479 CALL dbt_iterator_stop(iter)
1480!$OMP END PARALLEL
1481 CALL dbt_filter(t_2c_inout, ri_data%filter_eps)
1482
1483 END SUBROUTINE apply_bump
1484
1485! **************************************************************************************************
1486!> \brief A routine that calculates the forces due to the derivative of the bump function
1487!> \param force ...
1488!> \param t_2c_in ...
1489!> \param atom_i ...
1490!> \param atom_of_kind ...
1491!> \param kind_of ...
1492!> \param pref ...
1493!> \param ri_data ...
1494!> \param qs_env ...
1495!> \param work_virial ...
1496! **************************************************************************************************
1497 SUBROUTINE get_2c_bump_forces(force, t_2c_in, atom_i, atom_of_kind, kind_of, pref, ri_data, &
1498 qs_env, work_virial)
1499 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1500 TYPE(dbt_type), INTENT(INOUT) :: t_2c_in
1501 INTEGER, INTENT(IN) :: atom_i
1502 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
1503 REAL(dp), INTENT(IN) :: pref
1504 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1505 TYPE(qs_environment_type), POINTER :: qs_env
1506 REAL(dp), DIMENSION(3, 3), INTENT(INOUT) :: work_virial
1507
1508 INTEGER :: i, i_img, i_ri, i_xyz, iat_of_kind, iatom, ikind, ind(2), j_img, j_ri, j_xyz, &
1509 jat_of_kind, jatom, jkind, natom, nblks(2), nimg, nkind
1510 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1511 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1512 LOGICAL :: found
1513 REAL(dp) :: new_force, r0, r1, ri(3), rj(3), &
1514 rref(3), scoord(3), x
1515 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1516 TYPE(cell_type), POINTER :: cell
1517 TYPE(dbt_iterator_type) :: iter
1518 TYPE(kpoint_type), POINTER :: kpoints
1519 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1520 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1521
1522 NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1523
1524 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1525 kpoints=kpoints, particle_set=particle_set)
1526 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1527
1528 CALL dbt_get_info(t_2c_in, nblks_total=nblks)
1529 cpassert(nblks(1) == ri_data%ncell_RI*natom)
1530 cpassert(nblks(2) == ri_data%ncell_RI*natom)
1531
1532 nimg = ri_data%nimg
1533
1534 !Loop over the RI cells and atoms, and apply bump accordingly
1535 r1 = ri_data%kp_RI_range
1536 r0 = ri_data%kp_bump_rad
1537 rref = pbc(particle_set(atom_i)%r, cell)
1538
1539 iat_of_kind = atom_of_kind(atom_i)
1540 ikind = kind_of(atom_i)
1541
1542!$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_in,natom,ri_data,cell,particle_set,index_to_cell,pref, &
1543!$OMP force,r0,r1,rref,atom_of_kind,kind_of,iat_of_kind,ikind,work_virial) &
1544!$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,jkind,jat_of_kind, &
1545!$OMP new_force,i_xyz,i,x,j_xyz)
1546 CALL dbt_iterator_start(iter, t_2c_in)
1547 DO WHILE (dbt_iterator_blocks_left(iter))
1548 CALL dbt_iterator_next_block(iter, ind)
1549 IF (ind(1) .NE. ind(2)) cycle !bump matrix is diagonal
1550
1551 CALL dbt_get_block(t_2c_in, ind, blk, found)
1552 IF (.NOT. found) cycle
1553
1554 !bump is a function of x = SQRT((R - Rref)^2). We refer to R as jatom, and Rref as atom_i
1555 j_ri = (ind(2) - 1)/natom + 1
1556 j_img = ri_data%RI_cell_to_img(j_ri)
1557 jatom = ind(2) - (j_ri - 1)*natom
1558 jat_of_kind = atom_of_kind(jatom)
1559 jkind = kind_of(jatom)
1560
1561 CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1562 CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1563 x = norm2(rj - rref)
1564 IF (x < r0 .OR. x > r1) cycle
1565
1566 new_force = 0.0_dp
1567 DO i = 1, SIZE(blk, 1)
1568 new_force = new_force + blk(i, i)
1569 END DO
1570 new_force = pref*new_force*dbump(x, r0, r1)
1571
1572 !x = SQRT((R - Rref)^2), so we multiply by dx/dR and dx/dRref
1573 DO i_xyz = 1, 3
1574 !Force acting on second atom
1575!$OMP ATOMIC
1576 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) + &
1577 new_force*(rj(i_xyz) - rref(i_xyz))/x
1578
1579 !virial acting on second atom
1580 CALL real_to_scaled(scoord, rj, cell)
1581 DO j_xyz = 1, 3
1582!$OMP ATOMIC
1583 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1584 + new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1585 END DO
1586
1587 !Force acting on reference atom, defining the RI basis
1588!$OMP ATOMIC
1589 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) - &
1590 new_force*(rj(i_xyz) - rref(i_xyz))/x
1591
1592 !virial of ref atom
1593 CALL real_to_scaled(scoord, rref, cell)
1594 DO j_xyz = 1, 3
1595!$OMP ATOMIC
1596 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1597 - new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1598 END DO
1599 END DO !i_xyz
1600
1601 DEALLOCATE (blk)
1602 END DO
1603 CALL dbt_iterator_stop(iter)
1604!$OMP END PARALLEL
1605
1606 END SUBROUTINE get_2c_bump_forces
1607
1608! **************************************************************************************************
1609!> \brief The bumb function as defined by Juerg
1610!> \param x ...
1611!> \param r0 ...
1612!> \param r1 ...
1613!> \return ...
1614! **************************************************************************************************
1615 FUNCTION bump(x, r0, r1) RESULT(b)
1616 REAL(dp), INTENT(IN) :: x, r0, r1
1617 REAL(dp) :: b
1618
1619 REAL(dp) :: r
1620
1621 !Head-Gordon
1622 !b = 1.0_dp/(1.0_dp+EXP((r1-r0)/(r1-x)-(r1-r0)/(x-r0)))
1623 !Juerg
1624 r = (x - r0)/(r1 - r0)
1625 b = -6.0_dp*r**5 + 15.0_dp*r**4 - 10.0_dp*r**3 + 1.0_dp
1626 IF (x .GE. r1) b = 0.0_dp
1627 IF (x .LE. r0) b = 1.0_dp
1628
1629 END FUNCTION bump
1630
1631! **************************************************************************************************
1632!> \brief The derivative of the bump function
1633!> \param x ...
1634!> \param r0 ...
1635!> \param r1 ...
1636!> \return ...
1637! **************************************************************************************************
1638 FUNCTION dbump(x, r0, r1) RESULT(b)
1639 REAL(dp), INTENT(IN) :: x, r0, r1
1640 REAL(dp) :: b
1641
1642 REAL(dp) :: r
1643
1644 r = (x - r0)/(r1 - r0)
1645 b = (-30.0_dp*r**4 + 60.0_dp*r**3 - 30.0_dp*r**2)/(r1 - r0)
1646 IF (x .GE. r1) b = 0.0_dp
1647 IF (x .LE. r0) b = 0.0_dp
1648
1649 END FUNCTION dbump
1650
1651! **************************************************************************************************
1652!> \brief return the cell index a+c corresponding to given cell index i and b, with i = a+c-b
1653!> \param i_index ...
1654!> \param b_index ...
1655!> \param qs_env ...
1656!> \return ...
1657! **************************************************************************************************
1658 FUNCTION get_apc_index_from_ib(i_index, b_index, qs_env) RESULT(apc_index)
1659 INTEGER, INTENT(IN) :: i_index, b_index
1660 TYPE(qs_environment_type), POINTER :: qs_env
1661 INTEGER :: apc_index
1662
1663 INTEGER, DIMENSION(3) :: cell_apc
1664 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1665 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1666 TYPE(kpoint_type), POINTER :: kpoints
1667
1668 CALL get_qs_env(qs_env, kpoints=kpoints)
1669 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1670
1671 !i = a+c-b => a+c = i+b
1672 cell_apc(:) = index_to_cell(:, i_index) + index_to_cell(:, b_index)
1673
1674 IF (any([cell_apc(1), cell_apc(2), cell_apc(3)] < lbound(cell_to_index)) .OR. &
1675 any([cell_apc(1), cell_apc(2), cell_apc(3)] > ubound(cell_to_index))) THEN
1676
1677 apc_index = 0
1678 ELSE
1679 apc_index = cell_to_index(cell_apc(1), cell_apc(2), cell_apc(3))
1680 END IF
1681
1682 END FUNCTION get_apc_index_from_ib
1683
1684! **************************************************************************************************
1685!> \brief return the cell index i corresponding to the summ of cell_a and cell_c
1686!> \param a_index ...
1687!> \param c_index ...
1688!> \param qs_env ...
1689!> \return ...
1690! **************************************************************************************************
1691 FUNCTION get_apc_index(a_index, c_index, qs_env) RESULT(i_index)
1692 INTEGER, INTENT(IN) :: a_index, c_index
1693 TYPE(qs_environment_type), POINTER :: qs_env
1694 INTEGER :: i_index
1695
1696 INTEGER, DIMENSION(3) :: cell_i
1697 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1698 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1699 TYPE(kpoint_type), POINTER :: kpoints
1700
1701 CALL get_qs_env(qs_env, kpoints=kpoints)
1702 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1703
1704 cell_i(:) = index_to_cell(:, a_index) + index_to_cell(:, c_index)
1705
1706 IF (any([cell_i(1), cell_i(2), cell_i(3)] < lbound(cell_to_index)) .OR. &
1707 any([cell_i(1), cell_i(2), cell_i(3)] > ubound(cell_to_index))) THEN
1708
1709 i_index = 0
1710 ELSE
1711 i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1712 END IF
1713
1714 END FUNCTION get_apc_index
1715
1716! **************************************************************************************************
1717!> \brief return the cell index i corresponding to the summ of cell_a + cell_c - cell_b
1718!> \param apc_index ...
1719!> \param b_index ...
1720!> \param qs_env ...
1721!> \return ...
1722! **************************************************************************************************
1723 FUNCTION get_i_index(apc_index, b_index, qs_env) RESULT(i_index)
1724 INTEGER, INTENT(IN) :: apc_index, b_index
1725 TYPE(qs_environment_type), POINTER :: qs_env
1726 INTEGER :: i_index
1727
1728 INTEGER, DIMENSION(3) :: cell_i
1729 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1730 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1731 TYPE(kpoint_type), POINTER :: kpoints
1732
1733 CALL get_qs_env(qs_env, kpoints=kpoints)
1734 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1735
1736 cell_i(:) = index_to_cell(:, apc_index) - index_to_cell(:, b_index)
1737
1738 IF (any([cell_i(1), cell_i(2), cell_i(3)] < lbound(cell_to_index)) .OR. &
1739 any([cell_i(1), cell_i(2), cell_i(3)] > ubound(cell_to_index))) THEN
1740
1741 i_index = 0
1742 ELSE
1743 i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1744 END IF
1745
1746 END FUNCTION get_i_index
1747
1748! **************************************************************************************************
1749!> \brief A routine that returns all allowed a,c pairs such that a+c images corresponds to the value
1750!> of the apc_index input. Takes into account that image a corresponds to 3c integrals, which
1751!> are ordered in their own way
1752!> \param ac_pairs ...
1753!> \param apc_index ...
1754!> \param ri_data ...
1755!> \param qs_env ...
1756! **************************************************************************************************
1757 SUBROUTINE get_ac_pairs(ac_pairs, apc_index, ri_data, qs_env)
1758 INTEGER, DIMENSION(:, :), INTENT(INOUT) :: ac_pairs
1759 INTEGER, INTENT(IN) :: apc_index
1760 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1761 TYPE(qs_environment_type), POINTER :: qs_env
1762
1763 INTEGER :: a_index, actual_img, c_index, nimg
1764
1765 nimg = SIZE(ac_pairs, 1)
1766
1767 ac_pairs(:, :) = 0
1768!$OMP PARALLEL DO DEFAULT(NONE) SHARED(ac_pairs,nimg,ri_data,qs_env,apc_index) &
1769!$OMP PRIVATE(a_index,actual_img,c_index)
1770 DO a_index = 1, nimg
1771 actual_img = ri_data%idx_to_img(a_index)
1772 !c = a+c - a
1773 c_index = get_i_index(apc_index, actual_img, qs_env)
1774 ac_pairs(a_index, 1) = a_index
1775 ac_pairs(a_index, 2) = c_index
1776 END DO
1777!$OMP END PARALLEL DO
1778
1779 END SUBROUTINE get_ac_pairs
1780
1781! **************************************************************************************************
1782!> \brief A routine that returns all allowed i,a+c pairs such that, for the given value of b, we have
1783!> i = a+c-b. Takes into account that image i corrsponds to the 3c ints, which are ordered in
1784!> their own way
1785!> \param iapc_pairs ...
1786!> \param b_index ...
1787!> \param ri_data ...
1788!> \param qs_env ...
1789!> \param actual_i_img ...
1790! **************************************************************************************************
1791 SUBROUTINE get_iapc_pairs(iapc_pairs, b_index, ri_data, qs_env, actual_i_img)
1792 INTEGER, DIMENSION(:, :), INTENT(INOUT) :: iapc_pairs
1793 INTEGER, INTENT(IN) :: b_index
1794 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1795 TYPE(qs_environment_type), POINTER :: qs_env
1796 INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: actual_i_img
1797
1798 INTEGER :: actual_img, apc_index, i_index, nimg
1799
1800 nimg = SIZE(iapc_pairs, 1)
1801 IF (PRESENT(actual_i_img)) actual_i_img(:) = 0
1802
1803 iapc_pairs(:, :) = 0
1804!$OMP PARALLEL DO DEFAULT(NONE) SHARED(iapc_pairs,nimg,ri_data,qs_env,b_index,actual_i_img) &
1805!$OMP PRIVATE(i_index,actual_img,apc_index)
1806 DO i_index = 1, nimg
1807 actual_img = ri_data%idx_to_img(i_index)
1808 apc_index = get_apc_index_from_ib(actual_img, b_index, qs_env)
1809 IF (apc_index == 0) cycle
1810 iapc_pairs(i_index, 1) = i_index
1811 iapc_pairs(i_index, 2) = apc_index
1812 IF (PRESENT(actual_i_img)) actual_i_img(i_index) = actual_img
1813 END DO
1814
1815 END SUBROUTINE get_iapc_pairs
1816
1817! **************************************************************************************************
1818!> \brief A function that, given a cell index a, returun the index corresponding to -a, and zero if
1819!> if out of bounds
1820!> \param a_index ...
1821!> \param qs_env ...
1822!> \return ...
1823! **************************************************************************************************
1824 FUNCTION get_opp_index(a_index, qs_env) RESULT(opp_index)
1825 INTEGER, INTENT(IN) :: a_index
1826 TYPE(qs_environment_type), POINTER :: qs_env
1827 INTEGER :: opp_index
1828
1829 INTEGER, DIMENSION(3) :: opp_cell
1830 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1831 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1832 TYPE(kpoint_type), POINTER :: kpoints
1833
1834 NULLIFY (kpoints, cell_to_index, index_to_cell)
1835
1836 CALL get_qs_env(qs_env, kpoints=kpoints)
1837 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1838
1839 opp_cell(:) = -index_to_cell(:, a_index)
1840
1841 IF (any([opp_cell(1), opp_cell(2), opp_cell(3)] < lbound(cell_to_index)) .OR. &
1842 any([opp_cell(1), opp_cell(2), opp_cell(3)] > ubound(cell_to_index))) THEN
1843
1844 opp_index = 0
1845 ELSE
1846 opp_index = cell_to_index(opp_cell(1), opp_cell(2), opp_cell(3))
1847 END IF
1848
1849 END FUNCTION get_opp_index
1850
1851! **************************************************************************************************
1852!> \brief A routine that returns the actual non-symemtric density matrix for each image, by Fourier
1853!> transforming the kpoint density matrix
1854!> \param rho_ao_t ...
1855!> \param rho_ao ...
1856!> \param scale_prev_p ...
1857!> \param ri_data ...
1858!> \param qs_env ...
1859! **************************************************************************************************
1860 SUBROUTINE get_pmat_images(rho_ao_t, rho_ao, scale_prev_p, ri_data, qs_env)
1861 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t
1862 TYPE(dbcsr_p_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao
1863 REAL(dp), INTENT(IN) :: scale_prev_p
1864 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1865 TYPE(qs_environment_type), POINTER :: qs_env
1866
1867 INTEGER :: cell_j(3), i_img, i_spin, iatom, icol, &
1868 irow, j_img, jatom, mi_img, mj_img, &
1869 nimg, nspins
1870 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1871 LOGICAL :: found
1872 REAL(dp) :: fac
1873 REAL(dp), DIMENSION(:, :), POINTER :: pblock, pblock_desymm
1874 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: matrix_ks, rho_desymm
1875 TYPE(dbt_type) :: tmp
1876 TYPE(dft_control_type), POINTER :: dft_control
1877 TYPE(kpoint_type), POINTER :: kpoints
1879 DIMENSION(:), POINTER :: nl_iterator
1880 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1881 POINTER :: sab_nl, sab_nl_nosym
1882 TYPE(qs_scf_env_type), POINTER :: scf_env
1883
1884 NULLIFY (rho_desymm, kpoints, sab_nl_nosym, scf_env, matrix_ks, dft_control, &
1885 sab_nl, nl_iterator, cell_to_index, pblock, pblock_desymm)
1886
1887 CALL get_qs_env(qs_env, kpoints=kpoints, scf_env=scf_env, matrix_ks_kp=matrix_ks, dft_control=dft_control)
1888 CALL get_kpoint_info(kpoints, sab_nl_nosym=sab_nl_nosym, cell_to_index=cell_to_index, sab_nl=sab_nl)
1889
1890 IF (dft_control%do_admm) THEN
1891 CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_kp=matrix_ks)
1892 END IF
1893
1894 nspins = SIZE(matrix_ks, 1)
1895 nimg = ri_data%nimg
1896
1897 ALLOCATE (rho_desymm(nspins, nimg))
1898 DO i_img = 1, nimg
1899 DO i_spin = 1, nspins
1900 ALLOCATE (rho_desymm(i_spin, i_img)%matrix)
1901 CALL dbcsr_create(rho_desymm(i_spin, i_img)%matrix, template=matrix_ks(i_spin, i_img)%matrix, &
1902 matrix_type=dbcsr_type_no_symmetry)
1903 CALL cp_dbcsr_alloc_block_from_nbl(rho_desymm(i_spin, i_img)%matrix, sab_nl_nosym)
1904 END DO
1905 END DO
1906 CALL dbt_create(rho_desymm(1, 1)%matrix, tmp)
1907
1908 !We transfor the symmtric typed (but not actually symmetric: P_ab^i = P_ba^-i) real-spaced density
1909 !matrix into proper non-symemtric ones (using the same nl for consistency)
1910 CALL neighbor_list_iterator_create(nl_iterator, sab_nl)
1911 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
1912 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
1913 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
1914 IF (j_img > nimg .OR. j_img < 1) cycle
1915
1916 fac = 1.0_dp
1917 IF (iatom == jatom) fac = 0.5_dp
1918 mj_img = get_opp_index(j_img, qs_env)
1919 !if no opposite image, then no sum of P^j + P^-j => need full diag
1920 IF (mj_img == 0) fac = 1.0_dp
1921
1922 irow = iatom
1923 icol = jatom
1924 IF (iatom > jatom) THEN
1925 !because symmetric nl. Value for atom pair i,j is actually stored in j,i if i > j
1926 irow = jatom
1927 icol = iatom
1928 END IF
1929
1930 DO i_spin = 1, nspins
1931 CALL dbcsr_get_block_p(rho_ao(i_spin, j_img)%matrix, irow, icol, pblock, found)
1932 IF (.NOT. found) cycle
1933
1934 !distribution of symm and non-symm matrix match in that way
1935 CALL dbcsr_get_block_p(rho_desymm(i_spin, j_img)%matrix, iatom, jatom, pblock_desymm, found)
1936 IF (.NOT. found) cycle
1937
1938 IF (iatom > jatom) THEN
1939 pblock_desymm(:, :) = fac*transpose(pblock(:, :))
1940 ELSE
1941 pblock_desymm(:, :) = fac*pblock(:, :)
1942 END IF
1943 END DO
1944 END DO
1945 CALL neighbor_list_iterator_release(nl_iterator)
1946
1947 DO i_img = 1, nimg
1948 DO i_spin = 1, nspins
1949 CALL dbt_scale(rho_ao_t(i_spin, i_img), scale_prev_p)
1950
1951 CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, i_img)%matrix, tmp)
1952 CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), summation=.true., move_data=.true.)
1953
1954 !symmetrize by addin transpose of opp img
1955 mi_img = get_opp_index(i_img, qs_env)
1956 IF (mi_img > 0 .AND. mi_img .LE. nimg) THEN
1957 CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, mi_img)%matrix, tmp)
1958 CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), order=[2, 1], summation=.true., move_data=.true.)
1959 END IF
1960 CALL dbt_filter(rho_ao_t(i_spin, i_img), ri_data%filter_eps)
1961 END DO
1962 END DO
1963
1964 DO i_img = 1, nimg
1965 DO i_spin = 1, nspins
1966 CALL dbcsr_release(rho_desymm(i_spin, i_img)%matrix)
1967 DEALLOCATE (rho_desymm(i_spin, i_img)%matrix)
1968 END DO
1969 END DO
1970
1971 CALL dbt_destroy(tmp)
1972 DEALLOCATE (rho_desymm)
1973
1974 END SUBROUTINE get_pmat_images
1975
1976! **************************************************************************************************
1977!> \brief A routine that, given a cell index b and atom indices ij, returns a 2c tensor with the HFX
1978!> potential (P_i^0|Q_j^b), within the extended RI basis
1979!> \param t_2c_pot ...
1980!> \param mat_orig ...
1981!> \param atom_i ...
1982!> \param atom_j ...
1983!> \param img_b ...
1984!> \param ri_data ...
1985!> \param qs_env ...
1986!> \param do_inverse ...
1987!> \param para_env_ext ...
1988!> \param blacs_env_ext ...
1989!> \param dbcsr_template ...
1990!> \param off_diagonal ...
1991!> \param skip_inverse ...
1992! **************************************************************************************************
1993 SUBROUTINE get_ext_2c_int(t_2c_pot, mat_orig, atom_i, atom_j, img_b, ri_data, qs_env, do_inverse, &
1994 para_env_ext, blacs_env_ext, dbcsr_template, off_diagonal, skip_inverse)
1995 TYPE(dbt_type), INTENT(INOUT) :: t_2c_pot
1996 TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_orig
1997 INTEGER, INTENT(IN) :: atom_i, atom_j, img_b
1998 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1999 TYPE(qs_environment_type), POINTER :: qs_env
2000 LOGICAL, INTENT(IN), OPTIONAL :: do_inverse
2001 TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env_ext
2002 TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: blacs_env_ext
2003 TYPE(dbcsr_type), OPTIONAL, POINTER :: dbcsr_template
2004 LOGICAL, INTENT(IN), OPTIONAL :: off_diagonal, skip_inverse
2005
2006 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_ext_2c_int'
2007
2008 INTEGER :: group, handle, handle2, i_img, i_ri, iatom, iblk, ikind, img_tot, j_img, j_ri, &
2009 jatom, jblk, jkind, n_dependent, natom, nblks_ri, nimg, nkind
2010 INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2
2011 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: present_atoms_i, present_atoms_j
2012 INTEGER, DIMENSION(3) :: cell_b, cell_i, cell_j, cell_tot
2013 INTEGER, DIMENSION(:), POINTER :: col_dist, col_dist_ext, ri_blk_size_ext, &
2014 row_dist, row_dist_ext
2015 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell, pgrid
2016 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
2017 LOGICAL :: do_inverse_prv, found, my_offd, &
2018 skip_inverse_prv, use_template
2019 REAL(dp) :: bfac, dij, r0, r1, threshold
2020 REAL(dp), DIMENSION(3) :: ri, rij, rj, rref, scoord
2021 REAL(dp), DIMENSION(:, :), POINTER :: pblock
2022 TYPE(cell_type), POINTER :: cell
2023 TYPE(cp_blacs_env_type), POINTER :: blacs_env
2024 TYPE(dbcsr_distribution_type) :: dbcsr_dist, dbcsr_dist_ext
2025 TYPE(dbcsr_iterator_type) :: dbcsr_iter
2026 TYPE(dbcsr_type) :: work, work_tight, work_tight_inv
2027 TYPE(dbt_type) :: t_2c_tmp
2028 TYPE(distribution_2d_type), POINTER :: dist_2d
2029 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2030 DIMENSION(:), TARGET :: basis_set_ri
2031 TYPE(kpoint_type), POINTER :: kpoints
2032 TYPE(mp_para_env_type), POINTER :: para_env
2034 DIMENSION(:), POINTER :: nl_iterator
2035 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2036 POINTER :: nl_2c
2037 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2038 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2039
2040 NULLIFY (qs_kind_set, nl_2c, nl_iterator, cell, kpoints, cell_to_index, index_to_cell, dist_2d, &
2041 para_env, pblock, blacs_env, particle_set, col_dist, row_dist, pgrid, &
2042 col_dist_ext, row_dist_ext)
2043
2044 CALL timeset(routinen, handle)
2045
2046 !Idea: run over the neighbor list once for i and once for j, and record in which cell the MIC
2047 ! atoms are. Then loop over the atoms and only take the pairs the we need
2048
2049 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
2050 kpoints=kpoints, para_env=para_env, blacs_env=blacs_env, particle_set=particle_set)
2051 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
2052
2053 do_inverse_prv = .false.
2054 IF (PRESENT(do_inverse)) do_inverse_prv = do_inverse
2055 IF (do_inverse_prv) THEN
2056 cpassert(atom_i == atom_j)
2057 END IF
2058
2059 skip_inverse_prv = .false.
2060 IF (PRESENT(skip_inverse)) skip_inverse_prv = skip_inverse
2061
2062 my_offd = .false.
2063 IF (PRESENT(off_diagonal)) my_offd = off_diagonal
2064
2065 IF (PRESENT(para_env_ext)) para_env => para_env_ext
2066 IF (PRESENT(blacs_env_ext)) blacs_env => blacs_env_ext
2067
2068 nimg = SIZE(mat_orig)
2069
2070 CALL timeset(routinen//"_nl_iter", handle2)
2071
2072 !create our own dist_2d in the subgroup
2073 ALLOCATE (dist1(natom), dist2(natom))
2074 DO iatom = 1, natom
2075 dist1(iatom) = mod(iatom, blacs_env%num_pe(1))
2076 dist2(iatom) = mod(iatom, blacs_env%num_pe(2))
2077 END DO
2078 CALL distribution_2d_create(dist_2d, dist1, dist2, nkind, particle_set, blacs_env_ext=blacs_env)
2079
2080 ALLOCATE (basis_set_ri(nkind))
2081 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
2082
2083 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
2084 "HFX_2c_nl_RI", qs_env, sym_ij=.false., dist_2d=dist_2d)
2085
2086 ALLOCATE (present_atoms_i(natom, nimg), present_atoms_j(natom, nimg))
2087 present_atoms_i = 0
2088 present_atoms_j = 0
2089
2090 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
2091 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
2092 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, r=rij, cell=cell_j, &
2093 ikind=ikind, jkind=jkind)
2094
2095 dij = norm2(rij)
2096
2097 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
2098 IF (j_img > nimg .OR. j_img < 1) cycle
2099
2100 IF (iatom == atom_i .AND. dij .LE. ri_data%kp_RI_range) present_atoms_i(jatom, j_img) = 1
2101 IF (iatom == atom_j .AND. dij .LE. ri_data%kp_RI_range) present_atoms_j(jatom, j_img) = 1
2102 END DO
2103 CALL neighbor_list_iterator_release(nl_iterator)
2104 CALL release_neighbor_list_sets(nl_2c)
2105 CALL distribution_2d_release(dist_2d)
2106 CALL timestop(handle2)
2107
2108 CALL para_env%sum(present_atoms_i)
2109 CALL para_env%sum(present_atoms_j)
2110
2111 !Need to build a work matrix with matching distribution to mat_orig
2112 !If template is provided, use it. If not, we create it.
2113 use_template = .false.
2114 IF (PRESENT(dbcsr_template)) THEN
2115 IF (ASSOCIATED(dbcsr_template)) use_template = .true.
2116 END IF
2117
2118 IF (use_template) THEN
2119 CALL dbcsr_create(work, template=dbcsr_template)
2120 ELSE
2121 CALL dbcsr_get_info(mat_orig(1), distribution=dbcsr_dist)
2122 CALL dbcsr_distribution_get(dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, pgrid=pgrid)
2123 ALLOCATE (row_dist_ext(ri_data%ncell_RI*natom), col_dist_ext(ri_data%ncell_RI*natom))
2124 ALLOCATE (ri_blk_size_ext(ri_data%ncell_RI*natom))
2125 DO i_ri = 1, ri_data%ncell_RI
2126 row_dist_ext((i_ri - 1)*natom + 1:i_ri*natom) = row_dist(:)
2127 col_dist_ext((i_ri - 1)*natom + 1:i_ri*natom) = col_dist(:)
2128 ri_blk_size_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
2129 END DO
2130
2131 CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2132 row_dist=row_dist_ext, col_dist=col_dist_ext)
2133 CALL dbcsr_create(work, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2134 row_blk_size=ri_blk_size_ext, col_blk_size=ri_blk_size_ext)
2135 CALL dbcsr_distribution_release(dbcsr_dist_ext)
2136 DEALLOCATE (col_dist_ext, row_dist_ext, ri_blk_size_ext)
2137
2138 IF (PRESENT(dbcsr_template)) THEN
2139 ALLOCATE (dbcsr_template)
2140 CALL dbcsr_create(dbcsr_template, template=work)
2141 END IF
2142 END IF !use_template
2143
2144 cell_b(:) = index_to_cell(:, img_b)
2145 DO i_img = 1, nimg
2146 i_ri = ri_data%img_to_RI_cell(i_img)
2147 IF (i_ri == 0) cycle
2148 cell_i(:) = index_to_cell(:, i_img)
2149 DO j_img = 1, nimg
2150 j_ri = ri_data%img_to_RI_cell(j_img)
2151 IF (j_ri == 0) cycle
2152 cell_j(:) = index_to_cell(:, j_img)
2153 cell_tot = cell_j - cell_i + cell_b
2154
2155 IF (any([cell_tot(1), cell_tot(2), cell_tot(3)] < lbound(cell_to_index)) .OR. &
2156 any([cell_tot(1), cell_tot(2), cell_tot(3)] > ubound(cell_to_index))) cycle
2157 img_tot = cell_to_index(cell_tot(1), cell_tot(2), cell_tot(3))
2158 IF (img_tot > nimg .OR. img_tot < 1) cycle
2159
2160 CALL dbcsr_iterator_start(dbcsr_iter, mat_orig(img_tot))
2161 DO WHILE (dbcsr_iterator_blocks_left(dbcsr_iter))
2162 CALL dbcsr_iterator_next_block(dbcsr_iter, row=iatom, column=jatom)
2163 IF (present_atoms_i(iatom, i_img) == 0) cycle
2164 IF (present_atoms_j(jatom, j_img) == 0) cycle
2165 IF (my_offd .AND. (i_ri - 1)*natom + iatom == (j_ri - 1)*natom + jatom) cycle
2166
2167 CALL dbcsr_get_block_p(mat_orig(img_tot), iatom, jatom, pblock, found)
2168 IF (.NOT. found) cycle
2169
2170 CALL dbcsr_put_block(work, (i_ri - 1)*natom + iatom, (j_ri - 1)*natom + jatom, pblock)
2171
2172 END DO
2173 CALL dbcsr_iterator_stop(dbcsr_iter)
2174
2175 END DO !j_img
2176 END DO !i_img
2177 CALL dbcsr_finalize(work)
2178
2179 IF (do_inverse_prv) THEN
2180
2181 r1 = ri_data%kp_RI_range
2182 r0 = ri_data%kp_bump_rad
2183
2184 !Because there are a lot of empty rows/cols in work, we need to get rid of them for inversion
2185 nblks_ri = sum(present_atoms_i)
2186 ALLOCATE (col_dist_ext(nblks_ri), row_dist_ext(nblks_ri), ri_blk_size_ext(nblks_ri))
2187 iblk = 0
2188 DO i_img = 1, nimg
2189 i_ri = ri_data%img_to_RI_cell(i_img)
2190 IF (i_ri == 0) cycle
2191 DO iatom = 1, natom
2192 IF (present_atoms_i(iatom, i_img) == 0) cycle
2193 iblk = iblk + 1
2194 col_dist_ext(iblk) = col_dist(iatom)
2195 row_dist_ext(iblk) = row_dist(iatom)
2196 ri_blk_size_ext(iblk) = ri_data%bsizes_RI(iatom)
2197 END DO
2198 END DO
2199
2200 CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2201 row_dist=row_dist_ext, col_dist=col_dist_ext)
2202 CALL dbcsr_create(work_tight, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2203 row_blk_size=ri_blk_size_ext, col_blk_size=ri_blk_size_ext)
2204 CALL dbcsr_create(work_tight_inv, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2205 row_blk_size=ri_blk_size_ext, col_blk_size=ri_blk_size_ext)
2206 CALL dbcsr_distribution_release(dbcsr_dist_ext)
2207 DEALLOCATE (col_dist_ext, row_dist_ext, ri_blk_size_ext)
2208
2209 !We apply a bump function to the RI metric inverse for smooth RI basis extension:
2210 ! S^-1 = B * ((P|Q)_D + B*(P|Q)_OD*B)^-1 * B, with D block-diagonal blocks and OD off-diagonal
2211 rref = pbc(particle_set(atom_i)%r, cell)
2212
2213 iblk = 0
2214 DO i_img = 1, nimg
2215 i_ri = ri_data%img_to_RI_cell(i_img)
2216 IF (i_ri == 0) cycle
2217 DO iatom = 1, natom
2218 IF (present_atoms_i(iatom, i_img) == 0) cycle
2219 iblk = iblk + 1
2220
2221 CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
2222 CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
2223
2224 jblk = 0
2225 DO j_img = 1, nimg
2226 j_ri = ri_data%img_to_RI_cell(j_img)
2227 IF (j_ri == 0) cycle
2228 DO jatom = 1, natom
2229 IF (present_atoms_j(jatom, j_img) == 0) cycle
2230 jblk = jblk + 1
2231
2232 CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
2233 CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
2234
2235 CALL dbcsr_get_block_p(work, (i_ri - 1)*natom + iatom, (j_ri - 1)*natom + jatom, pblock, found)
2236 IF (.NOT. found) cycle
2237
2238 bfac = 1.0_dp
2239 IF (iblk .NE. jblk) bfac = bump(norm2(ri - rref), r0, r1)*bump(norm2(rj - rref), r0, r1)
2240 CALL dbcsr_put_block(work_tight, iblk, jblk, bfac*pblock(:, :))
2241 END DO
2242 END DO
2243 END DO
2244 END DO
2245 CALL dbcsr_finalize(work_tight)
2246 CALL dbcsr_clear(work)
2247
2248 IF (.NOT. skip_inverse_prv) THEN
2249 SELECT CASE (ri_data%t2c_method)
2250 CASE (hfx_ri_do_2c_iter)
2251 threshold = max(ri_data%filter_eps, 1.0e-12_dp)
2252 CALL invert_hotelling(work_tight_inv, work_tight, threshold=threshold, silent=.false.)
2254 CALL dbcsr_copy(work_tight_inv, work_tight)
2255 CALL cp_dbcsr_cholesky_decompose(work_tight_inv, para_env=para_env, blacs_env=blacs_env)
2256 CALL cp_dbcsr_cholesky_invert(work_tight_inv, para_env=para_env, blacs_env=blacs_env, &
2257 uplo_to_full=.true.)
2258 CASE (hfx_ri_do_2c_diag)
2259 CALL dbcsr_copy(work_tight_inv, work_tight)
2260 CALL cp_dbcsr_power(work_tight_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
2261 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
2262 END SELECT
2263 ELSE
2264 CALL dbcsr_copy(work_tight_inv, work_tight)
2265 END IF
2266
2267 !move back data to standard extended RI pattern
2268 !Note: we apply the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 later, because this matrix
2269 ! is required for forces
2270 iblk = 0
2271 DO i_img = 1, nimg
2272 i_ri = ri_data%img_to_RI_cell(i_img)
2273 IF (i_ri == 0) cycle
2274 DO iatom = 1, natom
2275 IF (present_atoms_i(iatom, i_img) == 0) cycle
2276 iblk = iblk + 1
2277
2278 jblk = 0
2279 DO j_img = 1, nimg
2280 j_ri = ri_data%img_to_RI_cell(j_img)
2281 IF (j_ri == 0) cycle
2282 DO jatom = 1, natom
2283 IF (present_atoms_j(jatom, j_img) == 0) cycle
2284 jblk = jblk + 1
2285
2286 CALL dbcsr_get_block_p(work_tight_inv, iblk, jblk, pblock, found)
2287 IF (.NOT. found) cycle
2288
2289 CALL dbcsr_put_block(work, (i_ri - 1)*natom + iatom, (j_ri - 1)*natom + jatom, pblock)
2290 END DO
2291 END DO
2292 END DO
2293 END DO
2294 CALL dbcsr_finalize(work)
2295
2296 CALL dbcsr_release(work_tight)
2297 CALL dbcsr_release(work_tight_inv)
2298 END IF
2299
2300 CALL dbt_create(work, t_2c_tmp)
2301 CALL dbt_copy_matrix_to_tensor(work, t_2c_tmp)
2302 CALL dbt_copy(t_2c_tmp, t_2c_pot, move_data=.true.)
2303 CALL dbt_filter(t_2c_pot, ri_data%filter_eps)
2304
2305 CALL dbt_destroy(t_2c_tmp)
2306 CALL dbcsr_release(work)
2307
2308 CALL timestop(handle)
2309
2310 END SUBROUTINE get_ext_2c_int
2311
2312! **************************************************************************************************
2313!> \brief Pre-contract the density matrices with the 3-center integrals:
2314!> P_sigma^a,lambda^a+c (mu^0 sigma^a| P^0)
2315!> \param t_3c_apc ...
2316!> \param rho_ao_t ...
2317!> \param ri_data ...
2318!> \param qs_env ...
2319! **************************************************************************************************
2320 SUBROUTINE contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
2321 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, rho_ao_t
2322 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2323 TYPE(qs_environment_type), POINTER :: qs_env
2324
2325 CHARACTER(len=*), PARAMETER :: routinen = 'contract_pmat_3c'
2326
2327 INTEGER :: apc_img, batch_size, handle, i_batch, &
2328 i_img, i_spin, j_batch, n_batch_img, &
2329 n_batch_nze, nimg, nimg_nze, nspins
2330 INTEGER(int_8) :: nflop, nze
2331 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_img, batch_ranges_nze, &
2332 int_indices
2333 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ac_pairs
2334 REAL(dp) :: occ, t1, t2
2335 TYPE(dbt_type) :: t_3c_tmp
2336 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ints_stack, res_stack, rho_stack
2337 TYPE(dft_control_type), POINTER :: dft_control
2338
2339 CALL timeset(routinen, handle)
2340
2341 CALL get_qs_env(qs_env, dft_control=dft_control)
2342
2343 nimg = ri_data%nimg
2344 nimg_nze = ri_data%nimg_nze
2345 nspins = dft_control%nspins
2346
2347 CALL dbt_create(t_3c_apc(1, 1), t_3c_tmp)
2348
2349 batch_size = nimg/ri_data%n_mem
2350
2351 !batching over all images
2352 n_batch_img = nimg/batch_size
2353 IF (modulo(nimg, batch_size) .NE. 0) n_batch_img = n_batch_img + 1
2354 ALLOCATE (batch_ranges_img(n_batch_img + 1))
2355 DO i_batch = 1, n_batch_img
2356 batch_ranges_img(i_batch) = (i_batch - 1)*batch_size + 1
2357 END DO
2358 batch_ranges_img(n_batch_img + 1) = nimg + 1
2359
2360 !batching over images with non-zero 3c integrals
2361 n_batch_nze = nimg_nze/batch_size
2362 IF (modulo(nimg_nze, batch_size) .NE. 0) n_batch_nze = n_batch_nze + 1
2363 ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
2364 DO i_batch = 1, n_batch_nze
2365 batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
2366 END DO
2367 batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
2368
2369 !Create the stack tensors in the approriate distribution
2370 ALLOCATE (rho_stack(2), ints_stack(2), res_stack(2))
2371 CALL get_stack_tensors(res_stack, rho_stack, ints_stack, rho_ao_t(1, 1), &
2372 ri_data%t_3c_int_ctr_1(1, 1), batch_size, ri_data, qs_env)
2373
2374 ALLOCATE (ac_pairs(nimg, 2), int_indices(nimg_nze))
2375 DO i_img = 1, nimg_nze
2376 int_indices(i_img) = i_img
2377 END DO
2378
2379 t1 = m_walltime()
2380 DO j_batch = 1, n_batch_nze
2381 !First batch is over the integrals. They are always in the same order, consistent with get_ac_pairs
2382 CALL fill_3c_stack(ints_stack(1), ri_data%t_3c_int_ctr_1(1, :), int_indices, 3, ri_data, &
2383 img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)])
2384 CALL dbt_copy(ints_stack(1), ints_stack(2), move_data=.true.)
2385
2386 DO i_spin = 1, nspins
2387 DO i_batch = 1, n_batch_img
2388 !Second batch is over the P matrix. Here we fill the stacked rho tensors col by col
2389 DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2390 CALL get_ac_pairs(ac_pairs, apc_img, ri_data, qs_env)
2391 CALL fill_2c_stack(rho_stack(1), rho_ao_t(i_spin, :), ac_pairs(:, 2), 1, ri_data, &
2392 img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)], &
2393 shift=apc_img - batch_ranges_img(i_batch) + 1)
2394
2395 END DO !apc_img
2396 CALL get_tensor_occupancy(rho_stack(1), nze, occ)
2397 IF (nze == 0) cycle
2398 CALL dbt_copy(rho_stack(1), rho_stack(2), move_data=.true.)
2399
2400 !The actual contraction
2401 CALL dbt_batched_contract_init(rho_stack(2))
2402 CALL dbt_contract(1.0_dp, ints_stack(2), rho_stack(2), &
2403 0.0_dp, res_stack(2), map_1=[1, 2], map_2=[3], &
2404 contract_1=[3], notcontract_1=[1, 2], &
2405 contract_2=[1], notcontract_2=[2], &
2406 filter_eps=ri_data%filter_eps, flop=nflop)
2407 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2408 CALL dbt_batched_contract_finalize(rho_stack(2))
2409 CALL dbt_copy(res_stack(2), res_stack(1), move_data=.true.)
2410
2411 DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2412 !Destack the resulting tensor and put it in t_3c_apc with correct apc_img
2413 CALL unstack_t_3c_apc(t_3c_tmp, res_stack(1), apc_img - batch_ranges_img(i_batch) + 1)
2414 CALL dbt_copy(t_3c_tmp, t_3c_apc(i_spin, apc_img), summation=.true., move_data=.true.)
2415 END DO
2416
2417 END DO !i_batch
2418 END DO !i_spin
2419 END DO !j_batch
2420 DEALLOCATE (batch_ranges_img)
2421 DEALLOCATE (batch_ranges_nze)
2422 t2 = m_walltime()
2423 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2424
2425 CALL dbt_destroy(rho_stack(1))
2426 CALL dbt_destroy(rho_stack(2))
2427 CALL dbt_destroy(ints_stack(1))
2428 CALL dbt_destroy(ints_stack(2))
2429 CALL dbt_destroy(res_stack(1))
2430 CALL dbt_destroy(res_stack(2))
2431 CALL dbt_destroy(t_3c_tmp)
2432
2433 CALL timestop(handle)
2434
2435 END SUBROUTINE contract_pmat_3c
2436
2437! **************************************************************************************************
2438!> \brief Pre-contract 3-center integrals with the bumped invrse RI metric, for each atom
2439!> \param t_3c_int ...
2440!> \param ri_data ...
2441!> \param qs_env ...
2442! **************************************************************************************************
2443 SUBROUTINE precontract_3c_ints(t_3c_int, ri_data, qs_env)
2444 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_int
2445 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2446 TYPE(qs_environment_type), POINTER :: qs_env
2447
2448 CHARACTER(len=*), PARAMETER :: routinen = 'precontract_3c_ints'
2449
2450 INTEGER :: batch_size, handle, i_batch, i_img, &
2451 i_ri, iatom, is, n_batch, natom, &
2452 nblks, nblks_3c(3), nimg
2453 INTEGER(int_8) :: nflop
2454 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges, bsizes_ri_ext, bsizes_ri_ext_split, &
2455 bsizes_stack, dist1, dist2, dist3, dist_stack3, idx_to_at_ao, int_indices
2456 TYPE(dbt_distribution_type) :: t_dist
2457 TYPE(dbt_type) :: t_2c_ri_tmp(2), t_3c_tmp(3)
2458
2459 CALL timeset(routinen, handle)
2460
2461 CALL get_qs_env(qs_env, natom=natom)
2462
2463 nimg = ri_data%nimg
2464 ALLOCATE (int_indices(nimg))
2465 DO i_img = 1, nimg
2466 int_indices(i_img) = i_img
2467 END DO
2468
2469 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
2470 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2471
2472 nblks = SIZE(ri_data%bsizes_RI_split)
2473 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*natom))
2474 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks))
2475 DO i_ri = 1, ri_data%ncell_RI
2476 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
2477 bsizes_ri_ext_split((i_ri - 1)*nblks + 1:i_ri*nblks) = ri_data%bsizes_RI_split(:)
2478 END DO
2479 CALL create_2c_tensor(t_2c_ri_tmp(1), dist1, dist2, ri_data%pgrid_2d, &
2480 bsizes_ri_ext, bsizes_ri_ext, &
2481 name="(RI | RI)")
2482 DEALLOCATE (dist1, dist2)
2483 CALL create_2c_tensor(t_2c_ri_tmp(2), dist1, dist2, ri_data%pgrid_2d, &
2484 bsizes_ri_ext_split, bsizes_ri_ext_split, &
2485 name="(RI | RI)")
2486 DEALLOCATE (dist1, dist2)
2487
2488 !For more efficiency, we stack multiple images of the 3-center integrals into a single tensor
2489 batch_size = nimg/ri_data%n_mem
2490 n_batch = nimg/batch_size
2491 IF (modulo(nimg, batch_size) .NE. 0) n_batch = n_batch + 1
2492 ALLOCATE (batch_ranges(n_batch + 1))
2493 DO i_batch = 1, n_batch
2494 batch_ranges(i_batch) = (i_batch - 1)*batch_size + 1
2495 END DO
2496 batch_ranges(n_batch + 1) = nimg + 1
2497
2498 nblks = SIZE(ri_data%bsizes_AO_split)
2499 ALLOCATE (bsizes_stack(batch_size*nblks))
2500 DO is = 1, batch_size
2501 bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
2502 END DO
2503
2504 CALL dbt_get_info(t_3c_int(1, 1), nblks_total=nblks_3c)
2505 ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)), dist_stack3(batch_size*nblks_3c(3)))
2506 CALL dbt_get_info(t_3c_int(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, proc_dist_3=dist3)
2507 DO is = 1, batch_size
2508 dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
2509 END DO
2510
2511 CALL dbt_distribution_new(t_dist, ri_data%pgrid, dist1, dist2, dist_stack3)
2512 CALL dbt_create(t_3c_tmp(1), "ints_stack", t_dist, [1], [2, 3], bsizes_ri_ext_split, &
2513 ri_data%bsizes_AO_split, bsizes_stack)
2514 CALL dbt_distribution_destroy(t_dist)
2515 DEALLOCATE (dist1, dist2, dist3, dist_stack3)
2516
2517 CALL dbt_create(t_3c_tmp(1), t_3c_tmp(2))
2518 CALL dbt_create(t_3c_int(1, 1), t_3c_tmp(3))
2519
2520 DO iatom = 1, natom
2521 CALL dbt_copy(ri_data%t_2c_inv(1, iatom), t_2c_ri_tmp(1))
2522 CALL apply_bump(t_2c_ri_tmp(1), iatom, ri_data, qs_env, from_left=.true., from_right=.true.)
2523 CALL dbt_copy(t_2c_ri_tmp(1), t_2c_ri_tmp(2), move_data=.true.)
2524
2525 CALL dbt_batched_contract_init(t_2c_ri_tmp(2))
2526 DO i_batch = 1, n_batch
2527
2528 CALL fill_3c_stack(t_3c_tmp(1), t_3c_int(1, :), int_indices, 3, ri_data, &
2529 img_bounds=[batch_ranges(i_batch), batch_ranges(i_batch + 1)], &
2530 filter_at=iatom, filter_dim=2, idx_to_at=idx_to_at_ao)
2531
2532 CALL dbt_contract(1.0_dp, t_2c_ri_tmp(2), t_3c_tmp(1), &
2533 0.0_dp, t_3c_tmp(2), map_1=[1], map_2=[2, 3], &
2534 contract_1=[2], notcontract_1=[1], &
2535 contract_2=[1], notcontract_2=[2, 3], &
2536 filter_eps=ri_data%filter_eps, flop=nflop)
2537 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2538
2539 DO i_img = batch_ranges(i_batch), batch_ranges(i_batch + 1) - 1
2540 CALL unstack_t_3c_apc(t_3c_tmp(3), t_3c_tmp(2), i_img - batch_ranges(i_batch) + 1)
2541 CALL dbt_copy(t_3c_tmp(3), ri_data%t_3c_int_ctr_1(1, i_img), summation=.true., &
2542 order=[2, 1, 3], move_data=.true.)
2543 END DO
2544 CALL dbt_clear(t_3c_tmp(1))
2545 END DO
2546 CALL dbt_batched_contract_finalize(t_2c_ri_tmp(2))
2547
2548 END DO
2549 CALL dbt_destroy(t_2c_ri_tmp(1))
2550 CALL dbt_destroy(t_2c_ri_tmp(2))
2551 CALL dbt_destroy(t_3c_tmp(1))
2552 CALL dbt_destroy(t_3c_tmp(2))
2553 CALL dbt_destroy(t_3c_tmp(3))
2554
2555 DO i_img = 1, nimg
2556 CALL dbt_destroy(t_3c_int(1, i_img))
2557 END DO
2558
2559 CALL timestop(handle)
2560
2561 END SUBROUTINE precontract_3c_ints
2562
2563! **************************************************************************************************
2564!> \brief Copy the data of a 2D tensor living in the main MPI group to a sub-group, given the proc
2565!> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2566!> \param t2c_sub ...
2567!> \param t2c_main ...
2568!> \param group_size ...
2569!> \param ngroups ...
2570!> \param para_env ...
2571! **************************************************************************************************
2572 SUBROUTINE copy_2c_to_subgroup(t2c_sub, t2c_main, group_size, ngroups, para_env)
2573 TYPE(dbt_type), INTENT(INOUT) :: t2c_sub, t2c_main
2574 INTEGER, INTENT(IN) :: group_size, ngroups
2575 TYPE(mp_para_env_type), POINTER :: para_env
2576
2577 INTEGER :: batch_size, i, i_batch, i_msg, iblk, &
2578 igroup, iproc, ir, is, jblk, n_batch, &
2579 nocc, tag
2580 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2
2581 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: block_dest, block_source
2582 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: current_dest
2583 INTEGER, DIMENSION(2) :: ind, nblks
2584 LOGICAL :: found
2585 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
2586 TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2587 TYPE(dbt_iterator_type) :: iter
2588 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2589
2590 !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2591 ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2592 ! to have blocks pre-reserved though
2593
2594 CALL dbt_get_info(t2c_main, nblks_total=nblks)
2595
2596 !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2597 ALLOCATE (block_source(nblks(1), nblks(2)))
2598 block_source = -1
2599 nocc = 0
2600!$OMP PARALLEL DEFAULT(NONE) SHARED(t2c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2601 CALL dbt_iterator_start(iter, t2c_main)
2602 DO WHILE (dbt_iterator_blocks_left(iter))
2603 CALL dbt_iterator_next_block(iter, ind)
2604 CALL dbt_get_block(t2c_main, ind, blk, found)
2605 IF (.NOT. found) cycle
2606
2607 block_source(ind(1), ind(2)) = para_env%mepos
2608!$OMP ATOMIC
2609 nocc = nocc + 1
2610 DEALLOCATE (blk)
2611 END DO
2612 CALL dbt_iterator_stop(iter)
2613!$OMP END PARALLEL
2614
2615 CALL para_env%sum(nocc)
2616 CALL para_env%sum(block_source)
2617 block_source = block_source + para_env%num_pe - 1
2618 IF (nocc == 0) RETURN
2619
2620 !Loop over the sub tensor, get the block destination
2621 igroup = para_env%mepos/group_size
2622 ALLOCATE (block_dest(nblks(1), nblks(2)))
2623 block_dest = -1
2624 DO jblk = 1, nblks(2)
2625 DO iblk = 1, nblks(1)
2626 IF (block_source(iblk, jblk) == -1) cycle
2627
2628 CALL dbt_get_stored_coordinates(t2c_sub, [iblk, jblk], iproc)
2629 block_dest(iblk, jblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2630 END DO
2631 END DO
2632
2633 ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)))
2634 CALL dbt_get_info(t2c_main, blk_size_1=bsizes1, blk_size_2=bsizes2)
2635
2636 ALLOCATE (current_dest(nblks(1), nblks(2), 0:ngroups - 1))
2637 DO igroup = 0, ngroups - 1
2638 !for a given subgroup, need to make the destination available to everyone in the main group
2639 current_dest(:, :, igroup) = block_dest(:, :)
2640 CALL para_env%bcast(current_dest(:, :, igroup), source=igroup*group_size) !bcast from first proc in sub-group
2641 END DO
2642
2643 !We go by batches, which cannot be larger than the maximum MPI tag value
2644 batch_size = min(para_env%get_tag_ub(), 128000, nocc*ngroups)
2645 n_batch = (nocc*ngroups)/batch_size
2646 IF (modulo(nocc*ngroups, batch_size) .NE. 0) n_batch = n_batch + 1
2647
2648 DO i_batch = 1, n_batch
2649 !Loop over groups, blocks and send/receive
2650 ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2651 ALLOCATE (send_req(batch_size), recv_req(batch_size))
2652 ir = 0
2653 is = 0
2654 i_msg = 0
2655 DO jblk = 1, nblks(2)
2656 DO iblk = 1, nblks(1)
2657 DO igroup = 0, ngroups - 1
2658 IF (block_source(iblk, jblk) == -1) cycle
2659
2660 i_msg = i_msg + 1
2661 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2662
2663 !a unique tag per block, within this batch
2664 tag = i_msg - (i_batch - 1)*batch_size
2665
2666 found = .false.
2667 IF (para_env%mepos == block_source(iblk, jblk)) THEN
2668 CALL dbt_get_block(t2c_main, [iblk, jblk], blk, found)
2669 END IF
2670
2671 !If blocks live on same proc, simply copy. Else MPI send/recv
2672 IF (block_source(iblk, jblk) == current_dest(iblk, jblk, igroup)) THEN
2673 IF (found) CALL dbt_put_block(t2c_sub, [iblk, jblk], shape(blk), blk)
2674 ELSE
2675 IF (para_env%mepos == block_source(iblk, jblk) .AND. found) THEN
2676 ALLOCATE (send_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2677 send_buff(tag)%array(:, :) = blk(:, :)
2678 is = is + 1
2679 CALL para_env%isend(msgin=send_buff(tag)%array, dest=current_dest(iblk, jblk, igroup), &
2680 request=send_req(is), tag=tag)
2681 END IF
2682
2683 IF (para_env%mepos == current_dest(iblk, jblk, igroup)) THEN
2684 ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2685 ir = ir + 1
2686 CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk), &
2687 request=recv_req(ir), tag=tag)
2688 END IF
2689 END IF
2690
2691 IF (found) DEALLOCATE (blk)
2692 END DO
2693 END DO
2694 END DO
2695
2696 CALL mp_waitall(send_req(1:is))
2697 CALL mp_waitall(recv_req(1:ir))
2698 !clean-up
2699 DO i = 1, batch_size
2700 IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
2701 END DO
2702
2703 !Finally copy the data from the buffer to the sub-tensor
2704 i_msg = 0
2705 DO jblk = 1, nblks(2)
2706 DO iblk = 1, nblks(1)
2707 DO igroup = 0, ngroups - 1
2708 IF (block_source(iblk, jblk) == -1) cycle
2709
2710 i_msg = i_msg + 1
2711 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2712
2713 !a unique tag per block, within this batch
2714 tag = i_msg - (i_batch - 1)*batch_size
2715
2716 IF (para_env%mepos == current_dest(iblk, jblk, igroup) .AND. &
2717 block_source(iblk, jblk) .NE. current_dest(iblk, jblk, igroup)) THEN
2718
2719 ALLOCATE (blk(bsizes1(iblk), bsizes2(jblk)))
2720 blk(:, :) = recv_buff(tag)%array(:, :)
2721 CALL dbt_put_block(t2c_sub, [iblk, jblk], shape(blk), blk)
2722 DEALLOCATE (blk)
2723 END IF
2724 END DO
2725 END DO
2726 END DO
2727
2728 !clean-up
2729 DO i = 1, batch_size
2730 IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
2731 END DO
2732 DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
2733 END DO !i_batch
2734 CALL dbt_finalize(t2c_sub)
2735
2736 END SUBROUTINE copy_2c_to_subgroup
2737
2738! **************************************************************************************************
2739!> \brief Copy the data of a 3D tensor living in the main MPI group to a sub-group, given the proc
2740!> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2741!> \param t3c_sub ...
2742!> \param t3c_main ...
2743!> \param group_size ...
2744!> \param ngroups ...
2745!> \param para_env ...
2746!> \param iatom_to_subgroup ...
2747!> \param dim_at ...
2748!> \param idx_to_at ...
2749! **************************************************************************************************
2750 SUBROUTINE copy_3c_to_subgroup(t3c_sub, t3c_main, group_size, ngroups, para_env, iatom_to_subgroup, &
2751 dim_at, idx_to_at)
2752 TYPE(dbt_type), INTENT(INOUT) :: t3c_sub, t3c_main
2753 INTEGER, INTENT(IN) :: group_size, ngroups
2754 TYPE(mp_para_env_type), POINTER :: para_env
2755 TYPE(cp_1d_logical_p_type), DIMENSION(:), &
2756 INTENT(INOUT), OPTIONAL :: iatom_to_subgroup
2757 INTEGER, INTENT(IN), OPTIONAL :: dim_at
2758 INTEGER, DIMENSION(:), OPTIONAL :: idx_to_at
2759
2760 INTEGER :: batch_size, i, i_batch, i_msg, iatom, &
2761 iblk, igroup, iproc, ir, is, jblk, &
2762 kblk, n_batch, nocc, tag
2763 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2, bsizes3
2764 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: block_dest, block_source
2765 INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :) :: current_dest
2766 INTEGER, DIMENSION(3) :: ind, nblks
2767 LOGICAL :: filter_at, found
2768 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
2769 TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2770 TYPE(dbt_iterator_type) :: iter
2771 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2772
2773 !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2774 ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2775 ! to have blocks pre-reserved though
2776
2777 CALL dbt_get_info(t3c_main, nblks_total=nblks)
2778
2779 !in some cases, only copy a fraction of the 3c tensor to a given subgroup (corresponding to some atoms)
2780 filter_at = .false.
2781 IF (PRESENT(iatom_to_subgroup) .AND. PRESENT(dim_at) .AND. PRESENT(idx_to_at)) THEN
2782 filter_at = .true.
2783 cpassert(nblks(dim_at) == SIZE(idx_to_at))
2784 END IF
2785
2786 !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2787 ALLOCATE (block_source(nblks(1), nblks(2), nblks(3)))
2788 block_source = -1
2789 nocc = 0
2790!$OMP PARALLEL DEFAULT(NONE) SHARED(t3c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2791 CALL dbt_iterator_start(iter, t3c_main)
2792 DO WHILE (dbt_iterator_blocks_left(iter))
2793 CALL dbt_iterator_next_block(iter, ind)
2794 CALL dbt_get_block(t3c_main, ind, blk, found)
2795 IF (.NOT. found) cycle
2796
2797 block_source(ind(1), ind(2), ind(3)) = para_env%mepos
2798!$OMP ATOMIC
2799 nocc = nocc + 1
2800 DEALLOCATE (blk)
2801 END DO
2802 CALL dbt_iterator_stop(iter)
2803!$OMP END PARALLEL
2804
2805 CALL para_env%sum(nocc)
2806 CALL para_env%sum(block_source)
2807 block_source = block_source + para_env%num_pe - 1
2808 IF (nocc == 0) RETURN
2809
2810 !Loop over the sub tensor, get the block destination
2811 igroup = para_env%mepos/group_size
2812 ALLOCATE (block_dest(nblks(1), nblks(2), nblks(3)))
2813 block_dest = -1
2814 DO kblk = 1, nblks(3)
2815 DO jblk = 1, nblks(2)
2816 DO iblk = 1, nblks(1)
2817 IF (block_source(iblk, jblk, kblk) == -1) cycle
2818
2819 CALL dbt_get_stored_coordinates(t3c_sub, [iblk, jblk, kblk], iproc)
2820 block_dest(iblk, jblk, kblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2821 END DO
2822 END DO
2823 END DO
2824
2825 ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)), bsizes3(nblks(3)))
2826 CALL dbt_get_info(t3c_main, blk_size_1=bsizes1, blk_size_2=bsizes2, blk_size_3=bsizes3)
2827
2828 ALLOCATE (current_dest(nblks(1), nblks(2), nblks(3), 0:ngroups - 1))
2829 DO igroup = 0, ngroups - 1
2830 !for a given subgroup, need to make the destination available to everyone in the main group
2831 current_dest(:, :, :, igroup) = block_dest(:, :, :)
2832 CALL para_env%bcast(current_dest(:, :, :, igroup), source=igroup*group_size) !bcast from first proc in subgroup
2833 END DO
2834
2835 !We go by batches, which cannot be larger than the maximum MPI tag value
2836 batch_size = min(para_env%get_tag_ub(), 128000, nocc*ngroups)
2837 n_batch = (nocc*ngroups)/batch_size
2838 IF (modulo(nocc*ngroups, batch_size) .NE. 0) n_batch = n_batch + 1
2839
2840 DO i_batch = 1, n_batch
2841 !Loop over groups, blocks and send/receive
2842 ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2843 ALLOCATE (send_req(batch_size), recv_req(batch_size))
2844 ir = 0
2845 is = 0
2846 i_msg = 0
2847 DO kblk = 1, nblks(3)
2848 DO jblk = 1, nblks(2)
2849 DO iblk = 1, nblks(1)
2850 DO igroup = 0, ngroups - 1
2851 IF (block_source(iblk, jblk, kblk) == -1) cycle
2852
2853 i_msg = i_msg + 1
2854 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2855
2856 !a unique tag per block, within this batch
2857 tag = i_msg - (i_batch - 1)*batch_size
2858
2859 IF (filter_at) THEN
2860 ind(:) = [iblk, jblk, kblk]
2861 iatom = idx_to_at(ind(dim_at))
2862 IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) cycle
2863 END IF
2864
2865 found = .false.
2866 IF (para_env%mepos == block_source(iblk, jblk, kblk)) THEN
2867 CALL dbt_get_block(t3c_main, [iblk, jblk, kblk], blk, found)
2868 END IF
2869
2870 !If blocks live on same proc, simply copy. Else MPI send/recv
2871 IF (block_source(iblk, jblk, kblk) == current_dest(iblk, jblk, kblk, igroup)) THEN
2872 IF (found) CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], shape(blk), blk)
2873 ELSE
2874 IF (para_env%mepos == block_source(iblk, jblk, kblk) .AND. found) THEN
2875 ALLOCATE (send_buff(tag)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
2876 send_buff(tag)%array(:, :, :) = blk(:, :, :)
2877 is = is + 1
2878 CALL para_env%isend(msgin=send_buff(tag)%array, &
2879 dest=current_dest(iblk, jblk, kblk, igroup), &
2880 request=send_req(is), tag=tag)
2881 END IF
2882
2883 IF (para_env%mepos == current_dest(iblk, jblk, kblk, igroup)) THEN
2884 ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
2885 ir = ir + 1
2886 CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk, kblk), &
2887 request=recv_req(ir), tag=tag)
2888 END IF
2889 END IF
2890
2891 IF (found) DEALLOCATE (blk)
2892 END DO
2893 END DO
2894 END DO
2895 END DO
2896
2897 CALL mp_waitall(send_req(1:is))
2898 CALL mp_waitall(recv_req(1:ir))
2899 !clean-up
2900 DO i = 1, batch_size
2901 IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
2902 END DO
2903
2904 !Finally copy the data from the buffer to the sub-tensor
2905 i_msg = 0
2906 DO kblk = 1, nblks(3)
2907 DO jblk = 1, nblks(2)
2908 DO iblk = 1, nblks(1)
2909 DO igroup = 0, ngroups - 1
2910 IF (block_source(iblk, jblk, kblk) == -1) cycle
2911
2912 i_msg = i_msg + 1
2913 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2914
2915 !a unique tag per block, within this batch
2916 tag = i_msg - (i_batch - 1)*batch_size
2917
2918 IF (filter_at) THEN
2919 ind(:) = [iblk, jblk, kblk]
2920 iatom = idx_to_at(ind(dim_at))
2921 IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) cycle
2922 END IF
2923
2924 IF (para_env%mepos == current_dest(iblk, jblk, kblk, igroup) .AND. &
2925 block_source(iblk, jblk, kblk) .NE. current_dest(iblk, jblk, kblk, igroup)) THEN
2926
2927 ALLOCATE (blk(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
2928 blk(:, :, :) = recv_buff(tag)%array(:, :, :)
2929 CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], shape(blk), blk)
2930 DEALLOCATE (blk)
2931 END IF
2932 END DO
2933 END DO
2934 END DO
2935 END DO
2936
2937 !clean-up
2938 DO i = 1, batch_size
2939 IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
2940 END DO
2941 DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
2942 END DO !i_batch
2943 CALL dbt_finalize(t3c_sub)
2944
2945 END SUBROUTINE copy_3c_to_subgroup
2946
2947! **************************************************************************************************
2948!> \brief A routine that gather the pieces of the KS matrix accross the subgroup and puts it in the
2949!> main group. Each b_img, iatom, jatom tuple is one a single CPU
2950!> \param ks_t ...
2951!> \param ks_t_sub ...
2952!> \param group_size ...
2953!> \param sparsity_pattern ...
2954!> \param para_env ...
2955!> \param ri_data ...
2956! **************************************************************************************************
2957 SUBROUTINE gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
2958 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t, ks_t_sub
2959 INTEGER, INTENT(IN) :: group_size
2960 INTEGER, DIMENSION(:, :, :), INTENT(IN) :: sparsity_pattern
2961 TYPE(mp_para_env_type), POINTER :: para_env
2962 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2963
2964 CHARACTER(len=*), PARAMETER :: routinen = 'gather_ks_matrix'
2965
2966 INTEGER :: b_img, dest, handle, i, i_spin, iatom, &
2967 igroup, ir, is, jatom, n_mess, natom, &
2968 nimg, nspins, source, tag
2969 LOGICAL :: found
2970 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
2971 TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2972 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2973
2974 CALL timeset(routinen, handle)
2975
2976 nimg = SIZE(sparsity_pattern, 3)
2977 natom = SIZE(sparsity_pattern, 2)
2978 nspins = SIZE(ks_t, 1)
2979
2980 DO b_img = 1, nimg
2981 n_mess = 0
2982 DO i_spin = 1, nspins
2983 DO jatom = 1, natom
2984 DO iatom = 1, natom
2985 IF (sparsity_pattern(iatom, jatom, b_img) > -1) n_mess = n_mess + 1
2986 END DO
2987 END DO
2988 END DO
2989
2990 ALLOCATE (send_buff(n_mess), recv_buff(n_mess))
2991 ALLOCATE (send_req(n_mess), recv_req(n_mess))
2992 ir = 0
2993 is = 0
2994 n_mess = 0
2995 tag = 0
2996
2997 DO i_spin = 1, nspins
2998 DO jatom = 1, natom
2999 DO iatom = 1, natom
3000 IF (sparsity_pattern(iatom, jatom, b_img) < 0) cycle
3001 n_mess = n_mess + 1
3002 tag = tag + 1
3003
3004 !sending the message
3005 CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3006 CALL dbt_get_stored_coordinates(ks_t_sub(i_spin, b_img), [iatom, jatom], source) !source within sub
3007 igroup = sparsity_pattern(iatom, jatom, b_img)
3008 source = source + igroup*group_size
3009 IF (para_env%mepos == source) THEN
3010 CALL dbt_get_block(ks_t_sub(i_spin, b_img), [iatom, jatom], blk, found)
3011 IF (source == dest) THEN
3012 IF (found) CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], shape(blk), blk)
3013 ELSE
3014 ALLOCATE (send_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3015 send_buff(n_mess)%array(:, :) = 0.0_dp
3016 IF (found) THEN
3017 send_buff(n_mess)%array(:, :) = blk(:, :)
3018 END IF
3019 is = is + 1
3020 CALL para_env%isend(msgin=send_buff(n_mess)%array, dest=dest, &
3021 request=send_req(is), tag=tag)
3022 END IF
3023 DEALLOCATE (blk)
3024 END IF
3025
3026 !receiving the message
3027 IF (para_env%mepos == dest .AND. source .NE. dest) THEN
3028 ALLOCATE (recv_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3029 ir = ir + 1
3030 CALL para_env%irecv(msgout=recv_buff(n_mess)%array, source=source, &
3031 request=recv_req(ir), tag=tag)
3032 END IF
3033 END DO !iatom
3034 END DO !jatom
3035 END DO !ispin
3036
3037 CALL mp_waitall(send_req(1:is))
3038 CALL mp_waitall(recv_req(1:ir))
3039
3040 !Copy the messages received into the KS matrix
3041 n_mess = 0
3042 DO i_spin = 1, nspins
3043 DO jatom = 1, natom
3044 DO iatom = 1, natom
3045 IF (sparsity_pattern(iatom, jatom, b_img) < 0) cycle
3046 n_mess = n_mess + 1
3047
3048 CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3049 IF (para_env%mepos == dest) THEN
3050 IF (.NOT. ASSOCIATED(recv_buff(n_mess)%array)) cycle
3051 ALLOCATE (blk(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3052 blk(:, :) = recv_buff(n_mess)%array(:, :)
3053 CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], shape(blk), blk)
3054 DEALLOCATE (blk)
3055 END IF
3056 END DO
3057 END DO
3058 END DO
3059
3060 !clean-up
3061 DO i = 1, n_mess
3062 IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
3063 IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
3064 END DO
3065 DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
3066 END DO !b_img
3067
3068 CALL timestop(handle)
3069
3070 END SUBROUTINE gather_ks_matrix
3071
3072! **************************************************************************************************
3073!> \brief copy all required 2c tensors from the main MPI group to the subgroups
3074!> \param mat_2c_pot ...
3075!> \param t_2c_work ...
3076!> \param t_2c_ao_tmp ...
3077!> \param ks_t_split ...
3078!> \param ks_t_sub ...
3079!> \param group_size ...
3080!> \param ngroups ...
3081!> \param para_env ...
3082!> \param para_env_sub ...
3083!> \param ri_data ...
3084! **************************************************************************************************
3085 SUBROUTINE get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
3086 group_size, ngroups, para_env, para_env_sub, ri_data)
3087 TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3088 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work, t_2c_ao_tmp, ks_t_split
3089 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t_sub
3090 INTEGER, INTENT(IN) :: group_size, ngroups
3091 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3092 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3093
3094 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_2c_tensors'
3095
3096 INTEGER :: handle, i, i_img, i_ri, i_spin, iproc, &
3097 j, natom, nblks, nimg, nspins
3098 INTEGER(int_8) :: nze
3099 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3100 dist1, dist2
3101 INTEGER, DIMENSION(2) :: pdims_2d
3102 INTEGER, DIMENSION(:), POINTER :: col_dist, ri_blk_size, row_dist
3103 INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3104 REAL(dp) :: occ
3105 TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3106 TYPE(dbt_pgrid_type) :: pgrid_2d
3107 TYPE(dbt_type) :: work, work_sub
3108
3109 CALL timeset(routinen, handle)
3110
3111 !Create the 2d pgrid
3112 pdims_2d = 0
3113 CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3114
3115 natom = SIZE(ri_data%bsizes_RI)
3116 nblks = SIZE(ri_data%bsizes_RI_split)
3117 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*natom))
3118 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks))
3119 DO i_ri = 1, ri_data%ncell_RI
3120 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
3121 bsizes_ri_ext_split((i_ri - 1)*nblks + 1:i_ri*nblks) = ri_data%bsizes_RI_split(:)
3122 END DO
3123
3124 !nRI x nRI 2c tensors
3125 CALL create_2c_tensor(t_2c_work(1), dist1, dist2, pgrid_2d, &
3126 bsizes_ri_ext, bsizes_ri_ext, &
3127 name="(RI | RI)")
3128 DEALLOCATE (dist1, dist2)
3129
3130 CALL create_2c_tensor(t_2c_work(2), dist1, dist2, pgrid_2d, &
3131 bsizes_ri_ext_split, bsizes_ri_ext_split, &
3132 name="(RI | RI)")
3133 DEALLOCATE (dist1, dist2)
3134
3135 !the AO based tensors
3136 CALL create_2c_tensor(ks_t_split(1), dist1, dist2, pgrid_2d, &
3137 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3138 name="(AO | AO)")
3139 DEALLOCATE (dist1, dist2)
3140 CALL dbt_create(ks_t_split(1), ks_t_split(2))
3141
3142 CALL create_2c_tensor(t_2c_ao_tmp(1), dist1, dist2, pgrid_2d, &
3143 ri_data%bsizes_AO, ri_data%bsizes_AO, &
3144 name="(AO | AO)")
3145 DEALLOCATE (dist1, dist2)
3146
3147 nspins = SIZE(ks_t_sub, 1)
3148 nimg = SIZE(ks_t_sub, 2)
3149 DO i_img = 1, nimg
3150 DO i_spin = 1, nspins
3151 CALL dbt_create(t_2c_ao_tmp(1), ks_t_sub(i_spin, i_img))
3152 END DO
3153 END DO
3154
3155 !Finally the HFX potential matrices
3156 !For now, we do a convoluted things where we go to tensors first, then back to matrices.
3157 CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3158 ri_data%bsizes_RI, ri_data%bsizes_RI, &
3159 name="(RI | RI)")
3160 CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3161
3162 ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3163 iproc = 0
3164 DO i = 0, pdims_2d(1) - 1
3165 DO j = 0, pdims_2d(2) - 1
3166 dbcsr_pgrid(i, j) = iproc
3167 iproc = iproc + 1
3168 END DO
3169 END DO
3170
3171 !We need to have the same exact 2d block dist as the tensors
3172 ALLOCATE (col_dist(natom), row_dist(natom))
3173 row_dist(:) = dist1(:)
3174 col_dist(:) = dist2(:)
3175
3176 ALLOCATE (ri_blk_size(natom))
3177 ri_blk_size(:) = ri_data%bsizes_RI(:)
3178
3179 CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3180 row_dist=row_dist, col_dist=col_dist)
3181 CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3182 row_blk_size=ri_blk_size, col_blk_size=ri_blk_size)
3183
3184 DO i_img = 1, nimg
3185 IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3186 CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3187 CALL get_tensor_occupancy(work, nze, occ)
3188 IF (nze == 0) cycle
3189
3190 CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3191 CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3192 CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3193 CALL dbt_clear(work_sub)
3194 END DO
3195
3196 CALL dbt_destroy(work)
3197 CALL dbt_destroy(work_sub)
3198 CALL dbt_pgrid_destroy(pgrid_2d)
3199 CALL dbcsr_distribution_release(dbcsr_dist_sub)
3200 DEALLOCATE (col_dist, row_dist, ri_blk_size, dbcsr_pgrid)
3201 CALL timestop(handle)
3202
3203 END SUBROUTINE get_subgroup_2c_tensors
3204
3205! **************************************************************************************************
3206!> \brief copy all required 3c tensors from the main MPI group to the subgroups
3207!> \param t_3c_int ...
3208!> \param t_3c_work_2 ...
3209!> \param t_3c_work_3 ...
3210!> \param t_3c_apc ...
3211!> \param t_3c_apc_sub ...
3212!> \param group_size ...
3213!> \param ngroups ...
3214!> \param para_env ...
3215!> \param para_env_sub ...
3216!> \param ri_data ...
3217! **************************************************************************************************
3218 SUBROUTINE get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
3219 group_size, ngroups, para_env, para_env_sub, ri_data)
3220 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_int, t_3c_work_2, t_3c_work_3
3221 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, t_3c_apc_sub
3222 INTEGER, INTENT(IN) :: group_size, ngroups
3223 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3224 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3225
3226 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_3c_tensors'
3227
3228 INTEGER :: batch_size, bfac, bo(2), handle, &
3229 handle2, i_blk, i_img, i_ri, i_spin, &
3230 ib, natom, nblks_ao, nblks_ri, nimg, &
3231 nspins
3232 INTEGER(int_8) :: nze
3233 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3234 bsizes_stack, bsizes_tmp, dist1, &
3235 dist2, dist3, dist_stack, idx_to_at
3236 INTEGER, DIMENSION(3) :: pdims
3237 REAL(dp) :: occ
3238 TYPE(dbt_distribution_type) :: t_dist
3239 TYPE(dbt_pgrid_type) :: pgrid
3240 TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3241
3242 CALL timeset(routinen, handle)
3243
3244 nblks_ri = SIZE(ri_data%bsizes_RI_split)
3245 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks_ri))
3246 DO i_ri = 1, ri_data%ncell_RI
3247 bsizes_ri_ext_split((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
3248 END DO
3249
3250 !Preparing larger block sizes for efficient communication (less, bigger messages)
3251 !we put 2 atoms per RI block
3252 bfac = 2
3253 natom = SIZE(ri_data%bsizes_RI)
3254 nblks_ri = max(1, natom/bfac)
3255 ALLOCATE (bsizes_tmp(nblks_ri))
3256 DO i_blk = 1, nblks_ri
3257 bo = get_limit(natom, nblks_ri, i_blk - 1)
3258 bsizes_tmp(i_blk) = sum(ri_data%bsizes_RI(bo(1):bo(2)))
3259 END DO
3260 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*nblks_ri))
3261 DO i_ri = 1, ri_data%ncell_RI
3262 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = bsizes_tmp(:)
3263 END DO
3264
3265 batch_size = ri_data%kp_stack_size
3266 nblks_ao = SIZE(ri_data%bsizes_AO_split)
3267 ALLOCATE (bsizes_stack(batch_size*nblks_ao))
3268 DO ib = 1, batch_size
3269 bsizes_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = ri_data%bsizes_AO_split(:)
3270 END DO
3271
3272 !Create the pgrid for the configuration correspoinding to ri_data%t_3c_int_ctr_3
3273 natom = SIZE(ri_data%bsizes_RI)
3274 pdims = 0
3275 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3276 tensor_dims=[SIZE(bsizes_ri_ext_split), 1, batch_size*SIZE(ri_data%bsizes_AO_split)])
3277
3278 !Create all required 3c tensors in that configuration
3279 CALL create_3c_tensor(t_3c_int(1), dist1, dist2, dist3, &
3280 pgrid, bsizes_ri_ext_split, ri_data%bsizes_AO_split, &
3281 ri_data%bsizes_AO_split, [1], [2, 3], name="(RI | AO AO)")
3282 nimg = SIZE(t_3c_int)
3283 DO i_img = 2, nimg
3284 CALL dbt_create(t_3c_int(1), t_3c_int(i_img))
3285 END DO
3286
3287 !The stacked work tensors, in a distribution that matches that of t_3c_int
3288 ALLOCATE (dist_stack(batch_size*nblks_ao))
3289 DO ib = 1, batch_size
3290 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3291 END DO
3292
3293 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3294 CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3295 bsizes_ri_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3296 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3297 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3298 CALL dbt_distribution_destroy(t_dist)
3299 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3300
3301 !For more efficient communication, we use intermediate tensors with larger block size
3302 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3303 pgrid, bsizes_ri_ext, ri_data%bsizes_AO, &
3304 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3305 DEALLOCATE (dist1, dist2, dist3)
3306
3307 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3308 ri_data%pgrid, bsizes_ri_ext, ri_data%bsizes_AO, &
3309 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3310 DEALLOCATE (dist1, dist2, dist3)
3311
3312 !Finally copy the integrals into the subgroups (if not there already)
3313 CALL timeset(routinen//"_ints", handle2)
3314 IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
3315 DO i_img = 1, nimg
3316 CALL dbt_copy(ri_data%kp_t_3c_int(i_img), t_3c_int(i_img), move_data=.true.)
3317 END DO
3318 ELSE
3319 ALLOCATE (ri_data%kp_t_3c_int(nimg))
3320 DO i_img = 1, nimg
3321 CALL dbt_create(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img))
3322 CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, i_img), nze, occ)
3323 IF (nze == 0) cycle
3324 CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, i_img), work_atom_block, order=[2, 1, 3])
3325 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, group_size, ngroups, para_env)
3326 CALL dbt_copy(work_atom_block_sub, t_3c_int(i_img), move_data=.true.)
3327 END DO
3328 END IF
3329 CALL timestop(handle2)
3330 CALL dbt_pgrid_destroy(pgrid)
3331 CALL dbt_destroy(work_atom_block)
3332 CALL dbt_destroy(work_atom_block_sub)
3333
3334 !Do the same for the t_3c_ctr_2 configuration
3335 pdims = 0
3336 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3337 tensor_dims=[1, SIZE(bsizes_ri_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3338
3339 !For more efficient communication, we use intermediate tensors with larger block size
3340 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3341 pgrid, ri_data%bsizes_AO, bsizes_ri_ext, &
3342 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3343 DEALLOCATE (dist1, dist2, dist3)
3344
3345 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3346 ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_ri_ext, &
3347 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3348 DEALLOCATE (dist1, dist2, dist3)
3349
3350 !template for t_3c_apc_sub
3351 CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3352 pgrid, ri_data%bsizes_AO_split, bsizes_ri_ext_split, &
3353 ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3354
3355 !create t_3c_work_2 tensors in a distribution that matches the above
3356 ALLOCATE (dist_stack(batch_size*nblks_ao))
3357 DO ib = 1, batch_size
3358 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3359 END DO
3360
3361 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3362 CALL dbt_create(t_3c_work_2(1), "work_2_stack", t_dist, [1], [2, 3], &
3363 ri_data%bsizes_AO_split, bsizes_ri_ext_split, bsizes_stack)
3364 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3365 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3366 CALL dbt_distribution_destroy(t_dist)
3367 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3368
3369 !Finally copy data from t_3c_apc to the subgroups
3370 ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3371 CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3372 nspins = SIZE(t_3c_apc, 1)
3373 CALL timeset(routinen//"_apc", handle2)
3374 DO i_img = 1, nimg
3375 DO i_spin = 1, nspins
3376 CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3377 CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3378 IF (nze == 0) cycle
3379 CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.true.)
3380 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, group_size, &
3381 ngroups, para_env, ri_data%iatom_to_subgroup, 1, idx_to_at)
3382 CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.true.)
3383 END DO
3384 DO i_spin = 1, nspins
3385 CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3386 END DO
3387 END DO
3388 CALL timestop(handle2)
3389 CALL dbt_pgrid_destroy(pgrid)
3390 CALL dbt_destroy(tmp)
3391 CALL dbt_destroy(work_atom_block)
3392 CALL dbt_destroy(work_atom_block_sub)
3393
3394 CALL timestop(handle)
3395
3396 END SUBROUTINE get_subgroup_3c_tensors
3397
3398! **************************************************************************************************
3399!> \brief copy all required 2c force tensors from the main MPI group to the subgroups
3400!> \param t_2c_inv ...
3401!> \param t_2c_bint ...
3402!> \param t_2c_metric ...
3403!> \param mat_2c_pot ...
3404!> \param t_2c_work ...
3405!> \param rho_ao_t ...
3406!> \param rho_ao_t_sub ...
3407!> \param t_2c_der_metric ...
3408!> \param t_2c_der_metric_sub ...
3409!> \param mat_der_pot ...
3410!> \param mat_der_pot_sub ...
3411!> \param group_size ...
3412!> \param ngroups ...
3413!> \param para_env ...
3414!> \param para_env_sub ...
3415!> \param ri_data ...
3416!> \note Main MPI group tensors are deleted within this routine, for memory optimization
3417! **************************************************************************************************
3418 SUBROUTINE get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
3419 rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
3420 mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
3421 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_inv, t_2c_bint, t_2c_metric
3422 TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3423 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work
3424 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
3425 t_2c_der_metric_sub
3426 TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot, mat_der_pot_sub
3427 INTEGER, INTENT(IN) :: group_size, ngroups
3428 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3429 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3430
3431 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_2c_derivs'
3432
3433 INTEGER :: handle, i, i_img, i_ri, i_spin, i_xyz, &
3434 iatom, iproc, j, natom, nblks, nimg, &
3435 nspins
3436 INTEGER(int_8) :: nze
3437 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3438 dist1, dist2
3439 INTEGER, DIMENSION(2) :: pdims_2d
3440 INTEGER, DIMENSION(:), POINTER :: col_dist, ri_blk_size, row_dist
3441 INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3442 REAL(dp) :: occ
3443 TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3444 TYPE(dbt_pgrid_type) :: pgrid_2d
3445 TYPE(dbt_type) :: work, work_sub
3446
3447 CALL timeset(routinen, handle)
3448
3449 !Note: a fair portion of this routine is copied from the energy version of it
3450 !Create the 2d pgrid
3451 pdims_2d = 0
3452 CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3453
3454 natom = SIZE(ri_data%bsizes_RI)
3455 nblks = SIZE(ri_data%bsizes_RI_split)
3456 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*natom))
3457 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks))
3458 DO i_ri = 1, ri_data%ncell_RI
3459 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
3460 bsizes_ri_ext_split((i_ri - 1)*nblks + 1:i_ri*nblks) = ri_data%bsizes_RI_split(:)
3461 END DO
3462
3463 !nRI x nRI 2c tensors
3464 CALL create_2c_tensor(t_2c_inv(1), dist1, dist2, pgrid_2d, &
3465 bsizes_ri_ext, bsizes_ri_ext, &
3466 name="(RI | RI)")
3467 DEALLOCATE (dist1, dist2)
3468
3469 CALL dbt_create(t_2c_inv(1), t_2c_bint(1))
3470 CALL dbt_create(t_2c_inv(1), t_2c_metric(1))
3471 DO iatom = 2, natom
3472 CALL dbt_create(t_2c_inv(1), t_2c_inv(iatom))
3473 CALL dbt_create(t_2c_inv(1), t_2c_bint(iatom))
3474 CALL dbt_create(t_2c_inv(1), t_2c_metric(iatom))
3475 END DO
3476 CALL dbt_create(t_2c_inv(1), t_2c_work(1))
3477 CALL dbt_create(t_2c_inv(1), t_2c_work(2))
3478 CALL dbt_create(t_2c_inv(1), t_2c_work(3))
3479 CALL dbt_create(t_2c_inv(1), t_2c_work(4))
3480
3481 CALL create_2c_tensor(t_2c_work(5), dist1, dist2, pgrid_2d, &
3482 bsizes_ri_ext_split, bsizes_ri_ext_split, &
3483 name="(RI | RI)")
3484 DEALLOCATE (dist1, dist2)
3485
3486 !copy the data from the main group.
3487 DO iatom = 1, natom
3488 CALL copy_2c_to_subgroup(t_2c_inv(iatom), ri_data%t_2c_inv(1, iatom), group_size, ngroups, para_env)
3489 CALL copy_2c_to_subgroup(t_2c_bint(iatom), ri_data%t_2c_int(1, iatom), group_size, ngroups, para_env)
3490 CALL copy_2c_to_subgroup(t_2c_metric(iatom), ri_data%t_2c_pot(1, iatom), group_size, ngroups, para_env)
3491 END DO
3492
3493 !This includes the derivatives of the RI metric, for which there is one per atom
3494 DO i_xyz = 1, 3
3495 DO iatom = 1, natom
3496 CALL dbt_create(t_2c_inv(1), t_2c_der_metric_sub(iatom, i_xyz))
3497 CALL copy_2c_to_subgroup(t_2c_der_metric_sub(iatom, i_xyz), t_2c_der_metric(iatom, i_xyz), &
3498 group_size, ngroups, para_env)
3499 CALL dbt_destroy(t_2c_der_metric(iatom, i_xyz))
3500 END DO
3501 END DO
3502
3503 !AO x AO 2c tensors
3504 CALL create_2c_tensor(rho_ao_t_sub(1, 1), dist1, dist2, pgrid_2d, &
3505 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3506 name="(AO | AO)")
3507 DEALLOCATE (dist1, dist2)
3508 nspins = SIZE(rho_ao_t, 1)
3509 nimg = SIZE(rho_ao_t, 2)
3510
3511 DO i_img = 1, nimg
3512 DO i_spin = 1, nspins
3513 IF (.NOT. (i_img == 1 .AND. i_spin == 1)) &
3514 CALL dbt_create(rho_ao_t_sub(1, 1), rho_ao_t_sub(i_spin, i_img))
3515 CALL copy_2c_to_subgroup(rho_ao_t_sub(i_spin, i_img), rho_ao_t(i_spin, i_img), &
3516 group_size, ngroups, para_env)
3517 CALL dbt_destroy(rho_ao_t(i_spin, i_img))
3518 END DO
3519 END DO
3520
3521 !The RIxRI matrices, going through tensors
3522 CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3523 ri_data%bsizes_RI, ri_data%bsizes_RI, &
3524 name="(RI | RI)")
3525 CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3526
3527 ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3528 iproc = 0
3529 DO i = 0, pdims_2d(1) - 1
3530 DO j = 0, pdims_2d(2) - 1
3531 dbcsr_pgrid(i, j) = iproc
3532 iproc = iproc + 1
3533 END DO
3534 END DO
3535
3536 !We need to have the same exact 2d block dist as the tensors
3537 ALLOCATE (col_dist(natom), row_dist(natom))
3538 row_dist(:) = dist1(:)
3539 col_dist(:) = dist2(:)
3540
3541 ALLOCATE (ri_blk_size(natom))
3542 ri_blk_size(:) = ri_data%bsizes_RI(:)
3543
3544 CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3545 row_dist=row_dist, col_dist=col_dist)
3546 CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3547 row_blk_size=ri_blk_size, col_blk_size=ri_blk_size)
3548
3549 !The HFX potential
3550 DO i_img = 1, nimg
3551 IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3552 CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3553 CALL get_tensor_occupancy(work, nze, occ)
3554 IF (nze == 0) cycle
3555
3556 CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3557 CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3558 CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3559 CALL dbt_clear(work_sub)
3560 END DO
3561
3562 !The derivatives of the HFX potential
3563 DO i_xyz = 1, 3
3564 DO i_img = 1, nimg
3565 CALL dbcsr_create(mat_der_pot_sub(i_img, i_xyz), template=mat_2c_pot(1))
3566 CALL dbt_copy_matrix_to_tensor(mat_der_pot(i_img, i_xyz), work)
3567 CALL dbcsr_release(mat_der_pot(i_img, i_xyz))
3568 CALL get_tensor_occupancy(work, nze, occ)
3569 IF (nze == 0) cycle
3570
3571 CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3572 CALL dbt_copy_tensor_to_matrix(work_sub, mat_der_pot_sub(i_img, i_xyz))
3573 CALL dbcsr_filter(mat_der_pot_sub(i_img, i_xyz), ri_data%filter_eps)
3574 CALL dbt_clear(work_sub)
3575 END DO
3576 END DO
3577
3578 CALL dbt_destroy(work)
3579 CALL dbt_destroy(work_sub)
3580 CALL dbt_pgrid_destroy(pgrid_2d)
3581 CALL dbcsr_distribution_release(dbcsr_dist_sub)
3582 DEALLOCATE (col_dist, row_dist, ri_blk_size, dbcsr_pgrid)
3583
3584 CALL timestop(handle)
3585
3586 END SUBROUTINE get_subgroup_2c_derivs
3587
3588! **************************************************************************************************
3589!> \brief copy all required 3c derivative tensors from the main MPI group to the subgroups
3590!> \param t_3c_work_2 ...
3591!> \param t_3c_work_3 ...
3592!> \param t_3c_der_AO ...
3593!> \param t_3c_der_AO_sub ...
3594!> \param t_3c_der_RI ...
3595!> \param t_3c_der_RI_sub ...
3596!> \param t_3c_apc ...
3597!> \param t_3c_apc_sub ...
3598!> \param t_3c_der_stack ...
3599!> \param group_size ...
3600!> \param ngroups ...
3601!> \param para_env ...
3602!> \param para_env_sub ...
3603!> \param ri_data ...
3604!> \note the tensor containing the derivatives in the main MPI group are deleted for memory
3605! **************************************************************************************************
3606 SUBROUTINE get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_AO, t_3c_der_AO_sub, &
3607 t_3c_der_RI, t_3c_der_RI_sub, t_3c_apc, t_3c_apc_sub, &
3608 t_3c_der_stack, group_size, ngroups, para_env, para_env_sub, &
3609 ri_data)
3610 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_work_2, t_3c_work_3
3611 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_ao, t_3c_der_ao_sub, &
3612 t_3c_der_ri, t_3c_der_ri_sub, &
3613 t_3c_apc, t_3c_apc_sub
3614 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_der_stack
3615 INTEGER, INTENT(IN) :: group_size, ngroups
3616 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3617 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3618
3619 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_3c_derivs'
3620
3621 INTEGER :: batch_size, handle, i_img, i_ri, i_spin, &
3622 i_xyz, ib, nblks_ao, nblks_ri, nimg, &
3623 nspins, pdims(3)
3624 INTEGER(int_8) :: nze
3625 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3626 bsizes_stack, dist1, dist2, dist3, &
3627 dist_stack, idx_to_at
3628 REAL(dp) :: occ
3629 TYPE(dbt_distribution_type) :: t_dist
3630 TYPE(dbt_pgrid_type) :: pgrid
3631 TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3632
3633 CALL timeset(routinen, handle)
3634
3635 !We use intermediate tensors with larger block size for more optimized communication
3636 nblks_ri = SIZE(ri_data%bsizes_RI)
3637 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*nblks_ri))
3638 DO i_ri = 1, ri_data%ncell_RI
3639 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI(:)
3640 END DO
3641
3642 CALL dbt_get_info(ri_data%kp_t_3c_int(1), pdims=pdims)
3643 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3644
3645 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3646 pgrid, bsizes_ri_ext, ri_data%bsizes_AO, &
3647 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3648 DEALLOCATE (dist1, dist2, dist3)
3649
3650 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3651 ri_data%pgrid_2, bsizes_ri_ext, ri_data%bsizes_AO, &
3652 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3653 DEALLOCATE (dist1, dist2, dist3)
3654 CALL dbt_pgrid_destroy(pgrid)
3655
3656 !We use the 3c integrals on the subgroup as template for the derivatives
3657 nimg = ri_data%nimg
3658 DO i_xyz = 1, 3
3659 DO i_img = 1, nimg
3660 CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_ao_sub(i_img, i_xyz))
3661 CALL get_tensor_occupancy(t_3c_der_ao(i_img, i_xyz), nze, occ)
3662 IF (nze == 0) cycle
3663
3664 CALL dbt_copy(t_3c_der_ao(i_img, i_xyz), work_atom_block, move_data=.true.)
3665 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3666 group_size, ngroups, para_env)
3667 CALL dbt_copy(work_atom_block_sub, t_3c_der_ao_sub(i_img, i_xyz), move_data=.true.)
3668 END DO
3669
3670 DO i_img = 1, nimg
3671 CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_ri_sub(i_img, i_xyz))
3672 CALL get_tensor_occupancy(t_3c_der_ri(i_img, i_xyz), nze, occ)
3673 IF (nze == 0) cycle
3674
3675 CALL dbt_copy(t_3c_der_ri(i_img, i_xyz), work_atom_block, move_data=.true.)
3676 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3677 group_size, ngroups, para_env)
3678 CALL dbt_copy(work_atom_block_sub, t_3c_der_ri_sub(i_img, i_xyz), move_data=.true.)
3679 END DO
3680
3681 DO i_img = 1, nimg
3682 CALL dbt_destroy(t_3c_der_ri(i_img, i_xyz))
3683 CALL dbt_destroy(t_3c_der_ao(i_img, i_xyz))
3684 END DO
3685 END DO
3686 CALL dbt_destroy(work_atom_block_sub)
3687 CALL dbt_destroy(work_atom_block)
3688
3689 !Deal with t_3c_apc
3690 nblks_ri = SIZE(ri_data%bsizes_RI_split)
3691 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks_ri))
3692 DO i_ri = 1, ri_data%ncell_RI
3693 bsizes_ri_ext_split((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
3694 END DO
3695
3696 pdims = 0
3697 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3698 tensor_dims=[1, SIZE(bsizes_ri_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3699
3700 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3701 pgrid, ri_data%bsizes_AO, bsizes_ri_ext, &
3702 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3703 DEALLOCATE (dist1, dist2, dist3)
3704
3705 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3706 ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_ri_ext, &
3707 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3708 DEALLOCATE (dist1, dist2, dist3)
3709
3710 CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3711 pgrid, ri_data%bsizes_AO_split, bsizes_ri_ext_split, &
3712 ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3713 DEALLOCATE (dist1, dist2, dist3)
3714
3715 ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3716 CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3717 nspins = SIZE(t_3c_apc, 1)
3718 DO i_img = 1, nimg
3719 DO i_spin = 1, nspins
3720 CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3721 CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3722 IF (nze == 0) cycle
3723 CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.true.)
3724 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, group_size, &
3725 ngroups, para_env, ri_data%iatom_to_subgroup, 1, idx_to_at)
3726 CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.true.)
3727 END DO
3728 DO i_spin = 1, nspins
3729 CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3730 END DO
3731 END DO
3732 CALL dbt_destroy(tmp)
3733 CALL dbt_destroy(work_atom_block)
3734 CALL dbt_destroy(work_atom_block_sub)
3735 CALL dbt_pgrid_destroy(pgrid)
3736
3737 !t_3c_work_3 based on structure of 3c integrals/derivs
3738 batch_size = ri_data%kp_stack_size
3739 nblks_ao = SIZE(ri_data%bsizes_AO_split)
3740 ALLOCATE (bsizes_stack(batch_size*nblks_ao))
3741 DO ib = 1, batch_size
3742 bsizes_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = ri_data%bsizes_AO_split(:)
3743 END DO
3744
3745 ALLOCATE (dist1(ri_data%ncell_RI*nblks_ri), dist2(nblks_ao), dist3(nblks_ao))
3746 CALL dbt_get_info(ri_data%kp_t_3c_int(1), proc_dist_1=dist1, proc_dist_2=dist2, &
3747 proc_dist_3=dist3, pdims=pdims)
3748
3749 ALLOCATE (dist_stack(batch_size*nblks_ao))
3750 DO ib = 1, batch_size
3751 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3752 END DO
3753
3754 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3755 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3756 CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3757 bsizes_ri_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3758 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3759 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3760 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(4))
3761 CALL dbt_distribution_destroy(t_dist)
3762 CALL dbt_pgrid_destroy(pgrid)
3763 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3764
3765 !the derivatives are stacked in the same way
3766 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(1))
3767 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(2))
3768 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(3))
3769 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(4))
3770 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(5))
3771 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(6))
3772
3773 !t_3c_work_2 based on structure of t_3c_apc
3774 ALLOCATE (dist1(nblks_ao), dist2(ri_data%ncell_RI*nblks_ri), dist3(nblks_ao))
3775 CALL dbt_get_info(t_3c_apc_sub(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, &
3776 proc_dist_3=dist3, pdims=pdims)
3777
3778 ALLOCATE (dist_stack(batch_size*nblks_ao))
3779 DO ib = 1, batch_size
3780 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3781 END DO
3782
3783 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3784 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3785 CALL dbt_create(t_3c_work_2(1), "work_3_stack", t_dist, [1], [2, 3], &
3786 ri_data%bsizes_AO_split, bsizes_ri_ext_split, bsizes_stack)
3787 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3788 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3789 CALL dbt_distribution_destroy(t_dist)
3790 CALL dbt_pgrid_destroy(pgrid)
3791 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3792
3793 CALL timestop(handle)
3794
3795 END SUBROUTINE get_subgroup_3c_derivs
3796
3797! **************************************************************************************************
3798!> \brief A routine that reorders the t_3c_int tensors such that all items which are fully empty
3799!> are bunched together. This way, we can get much more efficient screening based on NZE
3800!> \param t_3c_ints ...
3801!> \param ri_data ...
3802! **************************************************************************************************
3803 SUBROUTINE reorder_3c_ints(t_3c_ints, ri_data)
3804 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_ints
3805 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3806
3807 CHARACTER(LEN=*), PARAMETER :: routinen = 'reorder_3c_ints'
3808
3809 INTEGER :: handle, i_img, idx, idx_empty, idx_full, &
3810 nimg
3811 INTEGER(int_8) :: nze
3812 REAL(dp) :: occ
3813 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
3814
3815 CALL timeset(routinen, handle)
3816
3817 nimg = ri_data%nimg
3818 ALLOCATE (t_3c_tmp(nimg))
3819 DO i_img = 1, nimg
3820 CALL dbt_create(t_3c_ints(i_img), t_3c_tmp(i_img))
3821 CALL dbt_copy(t_3c_ints(i_img), t_3c_tmp(i_img), move_data=.true.)
3822 END DO
3823
3824 !Loop over the images, check if ints have NZE == 0, and put them at the start or end of the
3825 !initial tensor array. Keep the mapping in an array
3826 ALLOCATE (ri_data%idx_to_img(nimg))
3827 idx_full = 0
3828 idx_empty = nimg + 1
3829
3830 DO i_img = 1, nimg
3831 CALL get_tensor_occupancy(t_3c_tmp(i_img), nze, occ)
3832 IF (nze == 0) THEN
3833 idx_empty = idx_empty - 1
3834 CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_empty), move_data=.true.)
3835 ri_data%idx_to_img(idx_empty) = i_img
3836 ELSE
3837 idx_full = idx_full + 1
3838 CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_full), move_data=.true.)
3839 ri_data%idx_to_img(idx_full) = i_img
3840 END IF
3841 CALL dbt_destroy(t_3c_tmp(i_img))
3842 END DO
3843
3844 !store the highest image index with non-zero integrals
3845 ri_data%nimg_nze = idx_full
3846
3847 ALLOCATE (ri_data%img_to_idx(nimg))
3848 DO idx = 1, nimg
3849 ri_data%img_to_idx(ri_data%idx_to_img(idx)) = idx
3850 END DO
3851
3852 CALL timestop(handle)
3853
3854 END SUBROUTINE reorder_3c_ints
3855
3856! **************************************************************************************************
3857!> \brief A routine that reorders the 3c derivatives, the same way that the integrals are, also to
3858!> increase efficiency of screening
3859!> \param t_3c_derivs ...
3860!> \param ri_data ...
3861! **************************************************************************************************
3862 SUBROUTINE reorder_3c_derivs(t_3c_derivs, ri_data)
3863 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_derivs
3864 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3865
3866 CHARACTER(LEN=*), PARAMETER :: routinen = 'reorder_3c_derivs'
3867
3868 INTEGER :: handle, i_img, i_xyz, idx, nimg
3869 INTEGER(int_8) :: nze
3870 REAL(dp) :: occ
3871 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
3872
3873 CALL timeset(routinen, handle)
3874
3875 nimg = ri_data%nimg
3876 ALLOCATE (t_3c_tmp(nimg))
3877 DO i_img = 1, nimg
3878 CALL dbt_create(t_3c_derivs(1, 1), t_3c_tmp(i_img))
3879 END DO
3880
3881 DO i_xyz = 1, 3
3882 DO i_img = 1, nimg
3883 CALL dbt_copy(t_3c_derivs(i_img, i_xyz), t_3c_tmp(i_img), move_data=.true.)
3884 END DO
3885 DO i_img = 1, nimg
3886 idx = ri_data%img_to_idx(i_img)
3887 CALL dbt_copy(t_3c_tmp(i_img), t_3c_derivs(idx, i_xyz), move_data=.true.)
3888 CALL get_tensor_occupancy(t_3c_derivs(idx, i_xyz), nze, occ)
3889 IF (nze > 0) ri_data%nimg_nze = max(idx, ri_data%nimg_nze)
3890 END DO
3891 END DO
3892
3893 DO i_img = 1, nimg
3894 CALL dbt_destroy(t_3c_tmp(i_img))
3895 END DO
3896
3897 CALL timestop(handle)
3898
3899 END SUBROUTINE reorder_3c_derivs
3900
3901! **************************************************************************************************
3902!> \brief Get the sparsity pattern related to the non-symmetric AO basis overlap neighbor list
3903!> \param pattern ...
3904!> \param ri_data ...
3905!> \param qs_env ...
3906! **************************************************************************************************
3907 SUBROUTINE get_sparsity_pattern(pattern, ri_data, qs_env)
3908 INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: pattern
3909 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3910 TYPE(qs_environment_type), POINTER :: qs_env
3911
3912 INTEGER :: iatom, j_img, jatom, mj_img, natom, nimg
3913 INTEGER, ALLOCATABLE, DIMENSION(:) :: bins
3914 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: tmp_pattern
3915 INTEGER, DIMENSION(3) :: cell_j
3916 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
3917 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
3918 TYPE(dft_control_type), POINTER :: dft_control
3919 TYPE(kpoint_type), POINTER :: kpoints
3920 TYPE(mp_para_env_type), POINTER :: para_env
3922 DIMENSION(:), POINTER :: nl_iterator
3923 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3924 POINTER :: nl_2c
3925
3926 NULLIFY (nl_2c, nl_iterator, kpoints, cell_to_index, dft_control, index_to_cell, para_env)
3927
3928 CALL get_qs_env(qs_env, kpoints=kpoints, dft_control=dft_control, para_env=para_env, natom=natom)
3929 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell, sab_nl=nl_2c)
3930
3931 nimg = ri_data%nimg
3932 pattern(:, :, :) = 0
3933
3934 !We use the symmetric nl for all images that have an opposite cell
3935 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
3936 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
3937 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
3938
3939 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
3940 IF (j_img > nimg .OR. j_img < 1) cycle
3941
3942 mj_img = get_opp_index(j_img, qs_env)
3943 IF (mj_img > nimg .OR. mj_img < 1) cycle
3944
3945 IF (ri_data%present_images(j_img) == 0) cycle
3946
3947 pattern(iatom, jatom, j_img) = 1
3948 END DO
3949 CALL neighbor_list_iterator_release(nl_iterator)
3950
3951 !If there is no opposite cell present, then we take into account the non-symmetric nl
3952 CALL get_kpoint_info(kpoints, sab_nl_nosym=nl_2c)
3953
3954 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
3955 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
3956 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
3957
3958 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
3959 IF (j_img > nimg .OR. j_img < 1) cycle
3960
3961 mj_img = get_opp_index(j_img, qs_env)
3962 IF (mj_img .LE. nimg .AND. mj_img > 0) cycle
3963
3964 IF (ri_data%present_images(j_img) == 0) cycle
3965
3966 pattern(iatom, jatom, j_img) = 1
3967 END DO
3968 CALL neighbor_list_iterator_release(nl_iterator)
3969
3970 CALL para_env%sum(pattern)
3971
3972 !If the opposite image is considered, then there is no need to compute diagonal twice
3973 DO j_img = 2, nimg
3974 DO iatom = 1, natom
3975 IF (pattern(iatom, iatom, j_img) .NE. 0) THEN
3976 mj_img = get_opp_index(j_img, qs_env)
3977 IF (mj_img > nimg .OR. mj_img < 1) cycle
3978 pattern(iatom, iatom, mj_img) = 0
3979 END IF
3980 END DO
3981 END DO
3982
3983 ! We want to equilibrate the sparsity pattern such that there are same amount of blocks
3984 ! for each atom i of i,j pairs
3985 ALLOCATE (bins(natom))
3986 bins(:) = 0
3987
3988 ALLOCATE (tmp_pattern(natom, natom, nimg))
3989 tmp_pattern(:, :, :) = 0
3990 DO j_img = 1, nimg
3991 DO jatom = 1, natom
3992 DO iatom = 1, natom
3993 IF (pattern(iatom, jatom, j_img) == 0) cycle
3994 mj_img = get_opp_index(j_img, qs_env)
3995
3996 !Should we take the i,j,b or th j,i,-b atomic block?
3997 IF (mj_img > nimg .OR. mj_img < 1) THEN
3998 !No opposite image, no choice
3999 bins(iatom) = bins(iatom) + 1
4000 tmp_pattern(iatom, jatom, j_img) = 1
4001 ELSE
4002
4003 IF (bins(iatom) > bins(jatom)) THEN
4004 bins(jatom) = bins(jatom) + 1
4005 tmp_pattern(jatom, iatom, mj_img) = 1
4006 ELSE
4007 bins(iatom) = bins(iatom) + 1
4008 tmp_pattern(iatom, jatom, j_img) = 1
4009 END IF
4010 END IF
4011 END DO
4012 END DO
4013 END DO
4014
4015 ! -1 => unoccupied, 0 => occupied
4016 pattern(:, :, :) = tmp_pattern(:, :, :) - 1
4017
4018 END SUBROUTINE get_sparsity_pattern
4019
4020! **************************************************************************************************
4021!> \brief Distribute the iatom, jatom, b_img triplet over the subgroupd to spread the load
4022!> the group id for each triplet is passed as the value of sparsity_pattern(i, j, b),
4023!> with -1 being an unoccupied block
4024!> \param sparsity_pattern ...
4025!> \param ngroups ...
4026!> \param ri_data ...
4027! **************************************************************************************************
4028 SUBROUTINE get_sub_dist(sparsity_pattern, ngroups, ri_data)
4029 INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: sparsity_pattern
4030 INTEGER, INTENT(IN) :: ngroups
4031 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4032
4033 INTEGER :: b_img, ctr, iat, iatom, igroup, jatom, &
4034 natom, nimg, ub
4035 INTEGER, ALLOCATABLE, DIMENSION(:) :: max_at_per_group
4036 REAL(dp) :: cost
4037 REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4038
4039 natom = SIZE(sparsity_pattern, 2)
4040 nimg = SIZE(sparsity_pattern, 3)
4041
4042 !To avoid unnecessary data replication accross the subgroups, we want to have a limited number
4043 !of subgroup with the data of a given iatom. At the minimum, all groups have 1 atom
4044 !We assume that the cost associated to each iatom is roughly the same
4045 IF (.NOT. ALLOCATED(ri_data%iatom_to_subgroup)) THEN
4046 ALLOCATE (ri_data%iatom_to_subgroup(natom), max_at_per_group(ngroups))
4047 DO iatom = 1, natom
4048 NULLIFY (ri_data%iatom_to_subgroup(iatom)%array)
4049 ALLOCATE (ri_data%iatom_to_subgroup(iatom)%array(ngroups))
4050 ri_data%iatom_to_subgroup(iatom)%array(:) = .false.
4051 END DO
4052
4053 ub = natom/ngroups
4054 IF (ub*ngroups < natom) ub = ub + 1
4055 max_at_per_group(:) = max(1, ub)
4056
4057 !We want each atom to be present the same amount of times. Some groups might have more atoms
4058 !than other to achieve this.
4059 ctr = 0
4060 DO WHILE (modulo(sum(max_at_per_group), natom) .NE. 0)
4061 igroup = modulo(ctr, ngroups) + 1
4062 max_at_per_group(igroup) = max_at_per_group(igroup) + 1
4063 ctr = ctr + 1
4064 END DO
4065
4066 ctr = 0
4067 DO igroup = 1, ngroups
4068 DO iat = 1, max_at_per_group(igroup)
4069 iatom = modulo(ctr, natom) + 1
4070 ri_data%iatom_to_subgroup(iatom)%array(igroup) = .true.
4071 ctr = ctr + 1
4072 END DO
4073 END DO
4074 END IF
4075
4076 ALLOCATE (bins(ngroups))
4077 bins = 0.0_dp
4078 DO b_img = 1, nimg
4079 DO jatom = 1, natom
4080 DO iatom = 1, natom
4081 IF (sparsity_pattern(iatom, jatom, b_img) == -1) cycle
4082 igroup = minloc(bins, 1, mask=ri_data%iatom_to_subgroup(iatom)%array) - 1
4083
4084 !Use cost information from previous SCF if available
4085 IF (any(ri_data%kp_cost > epsilon(0.0_dp))) THEN
4086 cost = ri_data%kp_cost(iatom, jatom, b_img)
4087 ELSE
4088 cost = real(ri_data%bsizes_AO(iatom)*ri_data%bsizes_AO(jatom), dp)
4089 END IF
4090 bins(igroup + 1) = bins(igroup + 1) + cost
4091 sparsity_pattern(iatom, jatom, b_img) = igroup
4092 END DO
4093 END DO
4094 END DO
4095
4096 END SUBROUTINE get_sub_dist
4097
4098! **************************************************************************************************
4099!> \brief A rouine that updates the sparsity pattern for force calculation, where all i,j,b combinations
4100!> are visited.
4101!> \param force_pattern ...
4102!> \param scf_pattern ...
4103!> \param ngroups ...
4104!> \param ri_data ...
4105!> \param qs_env ...
4106! **************************************************************************************************
4107 SUBROUTINE update_pattern_to_forces(force_pattern, scf_pattern, ngroups, ri_data, qs_env)
4108 INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: force_pattern, scf_pattern
4109 INTEGER, INTENT(IN) :: ngroups
4110 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4111 TYPE(qs_environment_type), POINTER :: qs_env
4112
4113 INTEGER :: b_img, iatom, igroup, jatom, mb_img, &
4114 natom, nimg
4115 REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4116
4117 natom = SIZE(scf_pattern, 2)
4118 nimg = SIZE(scf_pattern, 3)
4119
4120 ALLOCATE (bins(ngroups))
4121 bins = 0.0_dp
4122
4123 DO b_img = 1, nimg
4124 mb_img = get_opp_index(b_img, qs_env)
4125 DO jatom = 1, natom
4126 DO iatom = 1, natom
4127 !Important: same distribution as KS matrix, because reuse t_3c_apc
4128 igroup = minloc(bins, 1, mask=ri_data%iatom_to_subgroup(iatom)%array) - 1
4129
4130 !check that block not already treated
4131 IF (scf_pattern(iatom, jatom, b_img) > -1) cycle
4132
4133 !If not, take the cost of block j, i, -b (same energy contribution)
4134 IF (mb_img > 0 .AND. mb_img .LE. nimg) THEN
4135 IF (scf_pattern(jatom, iatom, mb_img) == -1) cycle
4136 bins(igroup + 1) = bins(igroup + 1) + ri_data%kp_cost(jatom, iatom, mb_img)
4137 force_pattern(iatom, jatom, b_img) = igroup
4138 END IF
4139 END DO
4140 END DO
4141 END DO
4142
4143 END SUBROUTINE update_pattern_to_forces
4144
4145! **************************************************************************************************
4146!> \brief A routine that determines the extend of the KP RI-HFX periodic images, including for the
4147!> extension of the RI basis
4148!> \param ri_data ...
4149!> \param qs_env ...
4150! **************************************************************************************************
4151 SUBROUTINE get_kp_and_ri_images(ri_data, qs_env)
4152 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4153 TYPE(qs_environment_type), POINTER :: qs_env
4154
4155 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_kp_and_ri_images'
4156
4157 INTEGER :: cell_j(3), cell_k(3), handle, i_img, iatom, ikind, j_img, jatom, jcell, katom, &
4158 kcell, kp_index_lbounds(3), kp_index_ubounds(3), natom, ngroups, nimg, nkind, pcoord(3), &
4159 pdims(3)
4160 INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_ao_1, dist_ao_2, dist_ri, &
4161 nri_per_atom, present_img, ri_cells
4162 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
4163 REAL(dp) :: bump_fact, dij, dik, image_range, &
4164 ri_range, rij(3), rik(3)
4165 TYPE(dbt_type) :: t_dummy
4166 TYPE(dft_control_type), POINTER :: dft_control
4167 TYPE(distribution_2d_type), POINTER :: dist_2d
4168 TYPE(distribution_3d_type) :: dist_3d
4169 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4170 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
4171 TYPE(kpoint_type), POINTER :: kpoints
4172 TYPE(mp_cart_type) :: mp_comm_t3c
4173 TYPE(mp_para_env_type), POINTER :: para_env
4174 TYPE(neighbor_list_3c_iterator_type) :: nl_3c_iter
4175 TYPE(neighbor_list_3c_type) :: nl_3c
4177 DIMENSION(:), POINTER :: nl_iterator
4178 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4179 POINTER :: nl_2c
4180 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4181 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4182 TYPE(section_vals_type), POINTER :: hfx_section
4183
4184 NULLIFY (qs_kind_set, dist_2d, nl_2c, nl_iterator, dft_control, &
4185 particle_set, kpoints, para_env, cell_to_index, hfx_section)
4186
4187 CALL timeset(routinen, handle)
4188
4189 CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, &
4190 dft_control=dft_control, particle_set=particle_set, kpoints=kpoints, &
4191 para_env=para_env, natom=natom)
4192 nimg = dft_control%nimages
4193 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
4194 kp_index_lbounds = lbound(cell_to_index)
4195 kp_index_ubounds = ubound(cell_to_index)
4196
4197 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
4198 CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
4199
4200 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
4201 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
4202 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
4203
4204 !In case of shortrange HFX potential, it is imprtant to be consistent with the rest of the KP
4205 !code, and use EPS_SCHWARZ to determine the range (rather than eps_filter_2c in normal RI-HFX)
4206 IF (ri_data%hfx_pot%potential_type == do_potential_short) THEN
4207 CALL erfc_cutoff(ri_data%eps_schwarz, ri_data%hfx_pot%omega, ri_data%hfx_pot%cutoff_radius)
4208 END IF
4209
4210 !Determine the range for contributing periodic images, and for the RI basis extension
4211 ri_data%kp_RI_range = 0.0_dp
4212 ri_data%kp_image_range = 0.0_dp
4213 DO ikind = 1, nkind
4214
4215 CALL init_interaction_radii_orb_basis(basis_set_ao(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4216 CALL get_gto_basis_set(basis_set_ao(ikind)%gto_basis_set, kind_radius=ri_range)
4217 ri_data%kp_RI_range = max(ri_range, ri_data%kp_RI_range)
4218
4219 CALL init_interaction_radii_orb_basis(basis_set_ao(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4220 CALL init_interaction_radii_orb_basis(basis_set_ri(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4221 CALL get_gto_basis_set(basis_set_ri(ikind)%gto_basis_set, kind_radius=image_range)
4222
4223 image_range = 2.0_dp*image_range + cutoff_screen_factor*ri_data%hfx_pot%cutoff_radius
4224 ri_data%kp_image_range = max(image_range, ri_data%kp_image_range)
4225 END DO
4226
4227 CALL section_vals_val_get(hfx_section, "KP_RI_BUMP_FACTOR", r_val=bump_fact)
4228 ri_data%kp_bump_rad = bump_fact*ri_data%kp_RI_range
4229
4230 !For the extent of the KP RI-HFX images, we are limited by the RI-HFX potential in
4231 !(mu^0 sigma^a|P^0) (P^0|Q^b) (Q^b|nu^b lambda^a+c), if there is no contact between
4232 !any P^0 and Q^b, then image b does not contribute
4233 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
4234 "HFX_2c_nl_RI", qs_env, sym_ij=.false., dist_2d=dist_2d)
4235
4236 ALLOCATE (present_img(nimg))
4237 present_img = 0
4238 ri_data%nimg = 0
4239 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4240 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4241 CALL get_iterator_info(nl_iterator, r=rij, cell=cell_j)
4242
4243 dij = norm2(rij)
4244
4245 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4246 IF (j_img > nimg .OR. j_img < 1) cycle
4247
4248 IF (dij > ri_data%kp_image_range) cycle
4249
4250 ri_data%nimg = max(j_img, ri_data%nimg)
4251 present_img(j_img) = 1
4252
4253 END DO
4254 CALL neighbor_list_iterator_release(nl_iterator)
4255 CALL release_neighbor_list_sets(nl_2c)
4256 CALL para_env%max(ri_data%nimg)
4257 IF (ri_data%nimg > nimg) &
4258 cpabort("Make sure the smallest exponent of the RI-HFX basis is larger than that of the ORB basis.")
4259
4260 !Keep track of which images will not contribute, so that can be ignored before calculation
4261 CALL para_env%sum(present_img)
4262 ALLOCATE (ri_data%present_images(ri_data%nimg))
4263 ri_data%present_images = 0
4264 DO i_img = 1, ri_data%nimg
4265 IF (present_img(i_img) > 0) ri_data%present_images(i_img) = 1
4266 END DO
4267
4268 CALL create_3c_tensor(t_dummy, dist_ao_1, dist_ao_2, dist_ri, &
4269 ri_data%pgrid, ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4270 map1=[1, 2], map2=[3], name="(AO AO | RI)")
4271
4272 CALL dbt_mp_environ_pgrid(ri_data%pgrid, pdims, pcoord)
4273 CALL mp_comm_t3c%create(ri_data%pgrid%mp_comm_2d, 3, pdims)
4274 CALL distribution_3d_create(dist_3d, dist_ao_1, dist_ao_2, dist_ri, &
4275 nkind, particle_set, mp_comm_t3c, own_comm=.true.)
4276 DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
4277 CALL dbt_destroy(t_dummy)
4278
4279 !For the extension of the RI basis P in (mu^0 sigma^a |P^i), we consider an atom if the distance,
4280 !between mu^0 and P^i if smaller or equal to the kind radius of mu^0
4281 CALL build_3c_neighbor_lists(nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, dist_3d, &
4282 ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.false., &
4283 own_dist=.true.)
4284
4285 ALLOCATE (ri_cells(nimg))
4286 ri_cells = 0
4287
4288 ALLOCATE (nri_per_atom(natom))
4289 nri_per_atom = 0
4290
4291 CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
4292 DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
4293 CALL get_3c_iterator_info(nl_3c_iter, cell_k=cell_k, rik=rik, cell_j=cell_j, &
4294 iatom=iatom, jatom=jatom, katom=katom)
4295 dik = norm2(rik)
4296
4297 IF (any([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
4298 any([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) cycle
4299
4300 jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4301 IF (jcell > nimg .OR. jcell < 1) cycle
4302
4303 IF (any([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
4304 any([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) cycle
4305
4306 kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
4307 IF (kcell > nimg .OR. kcell < 1) cycle
4308
4309 IF (dik > ri_data%kp_RI_range) cycle
4310 ri_cells(kcell) = 1
4311
4312 IF (jcell == 1 .AND. iatom == jatom) nri_per_atom(iatom) = nri_per_atom(iatom) + ri_data%bsizes_RI(katom)
4313 END DO
4314 CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
4315 CALL neighbor_list_3c_destroy(nl_3c)
4316 CALL para_env%sum(ri_cells)
4317 CALL para_env%sum(nri_per_atom)
4318
4319 ALLOCATE (ri_data%img_to_RI_cell(nimg))
4320 ri_data%ncell_RI = 0
4321 ri_data%img_to_RI_cell = 0
4322 DO i_img = 1, nimg
4323 IF (ri_cells(i_img) > 0) THEN
4324 ri_data%ncell_RI = ri_data%ncell_RI + 1
4325 ri_data%img_to_RI_cell(i_img) = ri_data%ncell_RI
4326 END IF
4327 END DO
4328
4329 ALLOCATE (ri_data%RI_cell_to_img(ri_data%ncell_RI))
4330 DO i_img = 1, nimg
4331 IF (ri_data%img_to_RI_cell(i_img) > 0) ri_data%RI_cell_to_img(ri_data%img_to_RI_cell(i_img)) = i_img
4332 END DO
4333
4334 !Print some info
4335 IF (ri_data%unit_nr > 0) THEN
4336 WRITE (ri_data%unit_nr, fmt="(/T3,A,I29)") &
4337 "KP-HFX_RI_INFO| Number of RI-KP parallel groups:", ngroups
4338 WRITE (ri_data%unit_nr, fmt="(T3,A,F31.3,A)") &
4339 "KP-HFX_RI_INFO| RI basis extension radius:", ri_data%kp_RI_range*angstrom, " Ang"
4340 WRITE (ri_data%unit_nr, fmt="(T3,A,F12.3,A, F6.3, A)") &
4341 "KP-HFX_RI_INFO| RI basis bump factor and bump radius:", bump_fact, " /", &
4342 ri_data%kp_bump_rad*angstrom, " Ang"
4343 WRITE (ri_data%unit_nr, fmt="(T3,A,I16,A)") &
4344 "KP-HFX_RI_INFO| The extended RI bases cover up to ", ri_data%ncell_RI, " unit cells"
4345 WRITE (ri_data%unit_nr, fmt="(T3,A,I18)") &
4346 "KP-HFX_RI_INFO| Average number of sgf in extended RI bases:", sum(nri_per_atom)/natom
4347 WRITE (ri_data%unit_nr, fmt="(T3,A,F13.3,A)") &
4348 "KP-HFX_RI_INFO| Consider all image cells within a radius of ", ri_data%kp_image_range*angstrom, " Ang"
4349 WRITE (ri_data%unit_nr, fmt="(T3,A,I27/)") &
4350 "KP-HFX_RI_INFO| Number of image cells considered: ", ri_data%nimg
4351 CALL m_flush(ri_data%unit_nr)
4352 END IF
4353
4354 CALL timestop(handle)
4355
4356 END SUBROUTINE get_kp_and_ri_images
4357
4358! **************************************************************************************************
4359!> \brief A routine that creates tensors structure for rho_ao and 3c_ints in a stacked format for
4360!> the efficient contractions of rho_sigma^0,lambda^c * (mu^0 sigam^a | P) => TAS tensors
4361!> \param res_stack ...
4362!> \param rho_stack ...
4363!> \param ints_stack ...
4364!> \param rho_template ...
4365!> \param ints_template ...
4366!> \param stack_size ...
4367!> \param ri_data ...
4368!> \param qs_env ...
4369!> \note The result tensor has the exact same shape and distribution as the integral tensor
4370! **************************************************************************************************
4371 SUBROUTINE get_stack_tensors(res_stack, rho_stack, ints_stack, rho_template, ints_template, &
4372 stack_size, ri_data, qs_env)
4373 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: res_stack, rho_stack, ints_stack
4374 TYPE(dbt_type), INTENT(INOUT) :: rho_template, ints_template
4375 INTEGER, INTENT(IN) :: stack_size
4376 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4377 TYPE(qs_environment_type), POINTER :: qs_env
4378
4379 INTEGER :: is, nblks, nblks_3c(3), pdims_3d(3)
4380 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_stack, dist1, &
4381 dist2, dist3, dist_stack1, &
4382 dist_stack2, dist_stack3
4383 TYPE(dbt_distribution_type) :: t_dist
4384 TYPE(dbt_pgrid_type) :: pgrid
4385 TYPE(mp_para_env_type), POINTER :: para_env
4386
4387 NULLIFY (para_env)
4388
4389 CALL get_qs_env(qs_env, para_env=para_env)
4390
4391 nblks = SIZE(ri_data%bsizes_AO_split)
4392 ALLOCATE (bsizes_stack(stack_size*nblks))
4393 DO is = 1, stack_size
4394 bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
4395 END DO
4396
4397 ALLOCATE (dist1(nblks), dist2(nblks), dist_stack1(stack_size*nblks), dist_stack2(stack_size*nblks))
4398 CALL dbt_get_info(rho_template, proc_dist_1=dist1, proc_dist_2=dist2)
4399 DO is = 1, stack_size
4400 dist_stack1((is - 1)*nblks + 1:is*nblks) = dist1(:)
4401 dist_stack2((is - 1)*nblks + 1:is*nblks) = dist2(:)
4402 END DO
4403
4404 !First 2c tensor matches the distribution of template
4405 !It is stacked in both directions
4406 CALL dbt_distribution_new(t_dist, ri_data%pgrid_2d, dist_stack1, dist_stack2)
4407 CALL dbt_create(rho_stack(1), "RHO_stack", t_dist, [1], [2], bsizes_stack, bsizes_stack)
4408 CALL dbt_distribution_destroy(t_dist)
4409 DEALLOCATE (dist1, dist2, dist_stack1, dist_stack2)
4410
4411 !Second 2c tensor has optimal distribution on the 2d pgrid
4412 CALL create_2c_tensor(rho_stack(2), dist1, dist2, ri_data%pgrid_2d, bsizes_stack, bsizes_stack, name="RHO_stack")
4413 DEALLOCATE (dist1, dist2)
4414
4415 CALL dbt_get_info(ints_template, nblks_total=nblks_3c)
4416 ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)))
4417 ALLOCATE (dist_stack3(stack_size*nblks_3c(3)), bsizes_ri_ext(nblks_3c(2)))
4418 CALL dbt_get_info(ints_template, proc_dist_1=dist1, proc_dist_2=dist2, &
4419 proc_dist_3=dist3, blk_size_2=bsizes_ri_ext)
4420 DO is = 1, stack_size
4421 dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
4422 END DO
4423
4424 !First 3c tensor matches the distribution of template
4425 CALL dbt_distribution_new(t_dist, ri_data%pgrid_1, dist1, dist2, dist_stack3)
4426 CALL dbt_create(ints_stack(1), "ints_stack", t_dist, [1, 2], [3], ri_data%bsizes_AO_split, &
4427 bsizes_ri_ext, bsizes_stack)
4428 CALL dbt_distribution_destroy(t_dist)
4429 DEALLOCATE (dist1, dist2, dist3, dist_stack3)
4430
4431 !Second 3c tensor has optimal pgrid
4432 pdims_3d = 0
4433 CALL dbt_pgrid_create(para_env, pdims_3d, pgrid, tensor_dims=[nblks_3c(1), nblks_3c(2), stack_size*nblks_3c(3)])
4434 CALL create_3c_tensor(ints_stack(2), dist1, dist2, dist3, pgrid, ri_data%bsizes_AO_split, &
4435 bsizes_ri_ext, bsizes_stack, [1, 2], [3], name="ints_stack")
4436 DEALLOCATE (dist1, dist2, dist3)
4437 CALL dbt_pgrid_destroy(pgrid)
4438
4439 !The result tensor has the same shape and dist as the integral tensor
4440 CALL dbt_create(ints_stack(1), res_stack(1))
4441 CALL dbt_create(ints_stack(2), res_stack(2))
4442
4443 END SUBROUTINE get_stack_tensors
4444
4445! **************************************************************************************************
4446!> \brief Fill the stack of 3c tensors accrding to the order in the images input
4447!> \param t_3c_stack ...
4448!> \param t_3c_in ...
4449!> \param images ...
4450!> \param stack_dim ...
4451!> \param ri_data ...
4452!> \param filter_at ...
4453!> \param filter_dim ...
4454!> \param idx_to_at ...
4455!> \param img_bounds ...
4456! **************************************************************************************************
4457 SUBROUTINE fill_3c_stack(t_3c_stack, t_3c_in, images, stack_dim, ri_data, filter_at, filter_dim, &
4458 idx_to_at, img_bounds)
4459 TYPE(dbt_type), INTENT(INOUT) :: t_3c_stack
4460 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_in
4461 INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4462 INTEGER, INTENT(IN) :: stack_dim
4463 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4464 INTEGER, INTENT(IN), OPTIONAL :: filter_at, filter_dim
4465 INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: idx_to_at
4466 INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2)
4467
4468 INTEGER :: dest(3), i_img, idx, ind(3), lb, nblks, &
4469 nimg, offset, ub
4470 LOGICAL :: do_filter, found
4471 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4472 TYPE(dbt_iterator_type) :: iter
4473
4474 !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4475 !in the stack tensor (corresponding to pair index). Distributions match by construction
4476 nimg = ri_data%nimg
4477 nblks = SIZE(ri_data%bsizes_AO_split)
4478
4479 do_filter = .false.
4480 IF (PRESENT(filter_at) .AND. PRESENT(filter_dim) .AND. PRESENT(idx_to_at)) do_filter = .true.
4481
4482 lb = 1
4483 ub = nimg
4484 offset = 0
4485 IF (PRESENT(img_bounds)) THEN
4486 lb = img_bounds(1)
4487 ub = img_bounds(2) - 1
4488 offset = lb - 1
4489 END IF
4490
4491 DO idx = lb, ub
4492 i_img = images(idx)
4493 IF (i_img == 0 .OR. i_img > nimg) cycle
4494
4495!$OMP PARALLEL DEFAULT(NONE) &
4496!$OMP SHARED(idx,i_img,t_3c_in,t_3c_stack,nblks,stack_dim,filter_at,filter_dim,idx_to_at,do_filter,offset) &
4497!$OMP PRIVATE(iter,ind,blk,found,dest)
4498 CALL dbt_iterator_start(iter, t_3c_in(i_img))
4499 DO WHILE (dbt_iterator_blocks_left(iter))
4500 CALL dbt_iterator_next_block(iter, ind)
4501 CALL dbt_get_block(t_3c_in(i_img), ind, blk, found)
4502 IF (.NOT. found) cycle
4503
4504 IF (do_filter) THEN
4505 IF (.NOT. idx_to_at(ind(filter_dim)) == filter_at) cycle
4506 END IF
4507
4508 IF (stack_dim == 1) THEN
4509 dest = [(idx - offset - 1)*nblks + ind(1), ind(2), ind(3)]
4510 ELSE IF (stack_dim == 2) THEN
4511 dest = [ind(1), (idx - offset - 1)*nblks + ind(2), ind(3)]
4512 ELSE
4513 dest = [ind(1), ind(2), (idx - offset - 1)*nblks + ind(3)]
4514 END IF
4515
4516 CALL dbt_put_block(t_3c_stack, dest, shape(blk), blk)
4517 DEALLOCATE (blk)
4518 END DO
4519 CALL dbt_iterator_stop(iter)
4520!$OMP END PARALLEL
4521 END DO !i_img
4522 CALL dbt_finalize(t_3c_stack)
4523
4524 END SUBROUTINE fill_3c_stack
4525
4526! **************************************************************************************************
4527!> \brief Fill the stack of 2c tensors based on the content of images input
4528!> \param t_2c_stack ...
4529!> \param t_2c_in ...
4530!> \param images ...
4531!> \param stack_dim ...
4532!> \param ri_data ...
4533!> \param img_bounds ...
4534!> \param shift ...
4535! **************************************************************************************************
4536 SUBROUTINE fill_2c_stack(t_2c_stack, t_2c_in, images, stack_dim, ri_data, img_bounds, shift)
4537 TYPE(dbt_type), INTENT(INOUT) :: t_2c_stack
4538 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_in
4539 INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4540 INTEGER, INTENT(IN) :: stack_dim
4541 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4542 INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2), shift
4543
4544 INTEGER :: dest(2), i_img, idx, ind(2), lb, &
4545 my_shift, nblks, nimg, offset, ub
4546 LOGICAL :: found
4547 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
4548 TYPE(dbt_iterator_type) :: iter
4549
4550 !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4551 !in the stack tensor (corresponding to pair index). Distributions match by construction
4552 nimg = ri_data%nimg
4553 nblks = SIZE(ri_data%bsizes_AO_split)
4554
4555 lb = 1
4556 ub = nimg
4557 offset = 0
4558 IF (PRESENT(img_bounds)) THEN
4559 lb = img_bounds(1)
4560 ub = img_bounds(2) - 1
4561 offset = lb - 1
4562 END IF
4563
4564 my_shift = 1
4565 IF (PRESENT(shift)) my_shift = shift
4566
4567 DO idx = lb, ub
4568 i_img = images(idx)
4569 IF (i_img == 0 .OR. i_img > nimg) cycle
4570
4571!$OMP PARALLEL DEFAULT(NONE) SHARED(idx,i_img,t_2c_in,t_2c_stack,nblks,stack_dim,offset,my_shift) &
4572!$OMP PRIVATE(iter,ind,blk,found,dest)
4573 CALL dbt_iterator_start(iter, t_2c_in(i_img))
4574 DO WHILE (dbt_iterator_blocks_left(iter))
4575 CALL dbt_iterator_next_block(iter, ind)
4576 CALL dbt_get_block(t_2c_in(i_img), ind, blk, found)
4577 IF (.NOT. found) cycle
4578
4579 IF (stack_dim == 1) THEN
4580 dest = [(idx - offset - 1)*nblks + ind(1), (my_shift - 1)*nblks + ind(2)]
4581 ELSE
4582 dest = [(my_shift - 1)*nblks + ind(1), (idx - offset - 1)*nblks + ind(2)]
4583 END IF
4584
4585 CALL dbt_put_block(t_2c_stack, dest, shape(blk), blk)
4586 DEALLOCATE (blk)
4587 END DO
4588 CALL dbt_iterator_stop(iter)
4589!$OMP END PARALLEL
4590 END DO !idx
4591 CALL dbt_finalize(t_2c_stack)
4592
4593 END SUBROUTINE fill_2c_stack
4594
4595! **************************************************************************************************
4596!> \brief Unstacks a stacked 3c tensor containing t_3c_apc
4597!> \param t_3c_apc ...
4598!> \param t_stacked ...
4599!> \param idx ...
4600! **************************************************************************************************
4601 SUBROUTINE unstack_t_3c_apc(t_3c_apc, t_stacked, idx)
4602 TYPE(dbt_type), INTENT(INOUT) :: t_3c_apc, t_stacked
4603 INTEGER, INTENT(IN) :: idx
4604
4605 INTEGER :: current_idx
4606 INTEGER, DIMENSION(3) :: ind, nblks_3c
4607 LOGICAL :: found
4608 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4609 TYPE(dbt_iterator_type) :: iter
4610
4611 !Note: t_3c_apc and t_stacked must have the same ditribution
4612 CALL dbt_get_info(t_3c_apc, nblks_total=nblks_3c)
4613
4614!$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_apc,t_stacked,idx,nblks_3c) PRIVATE(iter,ind,blk,found,current_idx)
4615 CALL dbt_iterator_start(iter, t_stacked)
4616 DO WHILE (dbt_iterator_blocks_left(iter))
4617 CALL dbt_iterator_next_block(iter, ind)
4618
4619 !tensor is stacked along the 3rd dimension
4620 current_idx = (ind(3) - 1)/nblks_3c(3) + 1
4621 IF (.NOT. idx == current_idx) cycle
4622
4623 CALL dbt_get_block(t_stacked, ind, blk, found)
4624 IF (.NOT. found) cycle
4625
4626 CALL dbt_put_block(t_3c_apc, [ind(1), ind(2), ind(3) - (idx - 1)*nblks_3c(3)], shape(blk), blk)
4627 DEALLOCATE (blk)
4628 END DO
4629 CALL dbt_iterator_stop(iter)
4630!$OMP END PARALLEL
4631
4632 END SUBROUTINE unstack_t_3c_apc
4633
4634! **************************************************************************************************
4635!> \brief copies the 3c integrals correspoinding to a single atom mu from the general (P^0| mu^0 sigam^a)
4636!> \param t_3c_at ...
4637!> \param t_3c_ints ...
4638!> \param iatom ...
4639!> \param dim_at ...
4640!> \param idx_to_at ...
4641! **************************************************************************************************
4642 SUBROUTINE get_atom_3c_ints(t_3c_at, t_3c_ints, iatom, dim_at, idx_to_at)
4643 TYPE(dbt_type), INTENT(INOUT) :: t_3c_at, t_3c_ints
4644 INTEGER, INTENT(IN) :: iatom, dim_at
4645 INTEGER, DIMENSION(:), INTENT(IN) :: idx_to_at
4646
4647 INTEGER, DIMENSION(3) :: ind
4648 LOGICAL :: found
4649 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4650 TYPE(dbt_iterator_type) :: iter
4651
4652!$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_ints,t_3c_at,iatom,idx_to_at,dim_at) PRIVATE(iter,ind,blk,found)
4653 CALL dbt_iterator_start(iter, t_3c_ints)
4654 DO WHILE (dbt_iterator_blocks_left(iter))
4655 CALL dbt_iterator_next_block(iter, ind)
4656 IF (.NOT. idx_to_at(ind(dim_at)) == iatom) cycle
4657
4658 CALL dbt_get_block(t_3c_ints, ind, blk, found)
4659 IF (.NOT. found) cycle
4660
4661 CALL dbt_put_block(t_3c_at, ind, shape(blk), blk)
4662 DEALLOCATE (blk)
4663 END DO
4664 CALL dbt_iterator_stop(iter)
4665!$OMP END PARALLEL
4666 CALL dbt_finalize(t_3c_at)
4667
4668 END SUBROUTINE get_atom_3c_ints
4669
4670! **************************************************************************************************
4671!> \brief Precalculate the 3c and 2c derivatives tensors
4672!> \param t_3c_der_RI ...
4673!> \param t_3c_der_AO ...
4674!> \param mat_der_pot ...
4675!> \param t_2c_der_metric ...
4676!> \param ri_data ...
4677!> \param qs_env ...
4678! **************************************************************************************************
4679 SUBROUTINE precalc_derivatives(t_3c_der_RI, t_3c_der_AO, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
4680 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_ri, t_3c_der_ao
4681 TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot
4682 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_2c_der_metric
4683 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4684 TYPE(qs_environment_type), POINTER :: qs_env
4685
4686 CHARACTER(LEN=*), PARAMETER :: routinen = 'precalc_derivatives'
4687
4688 INTEGER :: handle, handle2, i_img, i_mem, i_ri, &
4689 i_xyz, iatom, n_mem, natom, nblks_ri, &
4690 ncell_ri, nimg, nkind, nthreads
4691 INTEGER(int_8) :: nze
4692 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, dist_ao_1, &
4693 dist_ao_2, dist_ri, dist_ri_ext, dummy_end, dummy_start, end_blocks, start_blocks
4694 INTEGER, DIMENSION(3) :: pcoord, pdims
4695 INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
4696 REAL(dp) :: occ
4697 TYPE(dbcsr_distribution_type) :: dbcsr_dist
4698 TYPE(dbcsr_type) :: dbcsr_template
4699 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_metric
4700 TYPE(dbt_distribution_type) :: t_dist
4701 TYPE(dbt_pgrid_type) :: pgrid
4702 TYPE(dbt_type) :: t_3c_template
4703 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_ao_prv, t_3c_der_ri_prv
4704 TYPE(dft_control_type), POINTER :: dft_control
4705 TYPE(distribution_2d_type), POINTER :: dist_2d
4706 TYPE(distribution_3d_type) :: dist_3d
4707 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4708 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
4709 TYPE(mp_cart_type) :: mp_comm_t3c
4710 TYPE(mp_para_env_type), POINTER :: para_env
4711 TYPE(neighbor_list_3c_type) :: nl_3c
4712 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4713 POINTER :: nl_2c
4714 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4715 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4716
4717 NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env, row_bsize, col_bsize)
4718
4719 CALL timeset(routinen, handle)
4720
4721 CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
4722 particle_set=particle_set, dft_control=dft_control, para_env=para_env)
4723
4724 nimg = ri_data%nimg
4725 ncell_ri = ri_data%ncell_RI
4726
4727 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
4728 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
4729 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ri)
4730 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
4731 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ao)
4732
4733 !Dealing with the 3c derivatives
4734 nthreads = 1
4735!$ nthreads = omp_get_num_threads()
4736 pdims = 0
4737 CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[max(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
4738
4739 CALL create_3c_tensor(t_3c_template, dist_ao_1, dist_ao_2, dist_ri, pgrid, &
4740 ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4741 map1=[1, 2], map2=[3], name="tmp")
4742 CALL dbt_destroy(t_3c_template)
4743
4744 !We stack the RI basis images. Keep consistent distribution
4745 nblks_ri = SIZE(ri_data%bsizes_RI_split)
4746 ALLOCATE (dist_ri_ext(natom*ncell_ri))
4747 ALLOCATE (bsizes_ri_ext(natom*ncell_ri))
4748 ALLOCATE (bsizes_ri_ext_split(nblks_ri*ncell_ri))
4749 DO i_ri = 1, ncell_ri
4750 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
4751 dist_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = dist_ri(:)
4752 bsizes_ri_ext_split((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
4753 END DO
4754
4755 CALL dbt_distribution_new(t_dist, pgrid, dist_ao_1, dist_ao_2, dist_ri_ext)
4756 CALL dbt_create(t_3c_template, "KP_3c_der", t_dist, [1, 2], [3], &
4757 ri_data%bsizes_AO, ri_data%bsizes_AO, bsizes_ri_ext)
4758 CALL dbt_distribution_destroy(t_dist)
4759
4760 ALLOCATE (t_3c_der_ri_prv(nimg, 1, 3), t_3c_der_ao_prv(nimg, 1, 3))
4761 DO i_xyz = 1, 3
4762 DO i_img = 1, nimg
4763 CALL dbt_create(t_3c_template, t_3c_der_ri_prv(i_img, 1, i_xyz))
4764 CALL dbt_create(t_3c_template, t_3c_der_ao_prv(i_img, 1, i_xyz))
4765 END DO
4766 END DO
4767 CALL dbt_destroy(t_3c_template)
4768
4769 CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
4770 CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
4771 CALL distribution_3d_create(dist_3d, dist_ao_1, dist_ao_2, dist_ri, &
4772 nkind, particle_set, mp_comm_t3c, own_comm=.true.)
4773 DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
4774 CALL dbt_pgrid_destroy(pgrid)
4775
4776 CALL build_3c_neighbor_lists(nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, dist_3d, ri_data%ri_metric, &
4777 "HFX_3c_nl", qs_env, op_pos=2, sym_jk=.false., own_dist=.true.)
4778
4779 n_mem = ri_data%n_mem
4780 CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
4781 start_blocks, end_blocks)
4782 DEALLOCATE (dummy_start, dummy_end)
4783
4784 CALL create_3c_tensor(t_3c_template, dist_ri, dist_ao_1, dist_ao_2, ri_data%pgrid_2, &
4785 bsizes_ri_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
4786 map1=[1], map2=[2, 3], name="der (RI | AO AO)")
4787 DO i_xyz = 1, 3
4788 DO i_img = 1, nimg
4789 CALL dbt_create(t_3c_template, t_3c_der_ri(i_img, i_xyz))
4790 CALL dbt_create(t_3c_template, t_3c_der_ao(i_img, i_xyz))
4791 END DO
4792 END DO
4793
4794 DO i_mem = 1, n_mem
4795 CALL build_3c_derivatives(t_3c_der_ao_prv, t_3c_der_ri_prv, ri_data%filter_eps, qs_env, &
4796 nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, &
4797 ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=2, &
4798 do_kpoints=.true., do_hfx_kpoints=.true., &
4799 bounds_k=[start_blocks(i_mem), end_blocks(i_mem)], &
4800 ri_range=ri_data%kp_RI_range, img_to_ri_cell=ri_data%img_to_RI_cell)
4801
4802 CALL timeset(routinen//"_cpy", handle2)
4803 !We go from (mu^0 sigma^i | P^j) to (P^i| sigma^j mu^0) and finally to (P^i| mu^0 sigma^j)
4804 DO i_img = 1, nimg
4805 DO i_xyz = 1, 3
4806 !derivative wrt to mu^0
4807 CALL get_tensor_occupancy(t_3c_der_ao_prv(i_img, 1, i_xyz), nze, occ)
4808 IF (nze > 0) THEN
4809 CALL dbt_copy(t_3c_der_ao_prv(i_img, 1, i_xyz), t_3c_template, &
4810 order=[3, 2, 1], move_data=.true.)
4811 CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4812 CALL dbt_copy(t_3c_template, t_3c_der_ao(i_img, i_xyz), &
4813 order=[1, 3, 2], move_data=.true., summation=.true.)
4814 END IF
4815
4816 !derivative wrt to P^i
4817 CALL get_tensor_occupancy(t_3c_der_ri_prv(i_img, 1, i_xyz), nze, occ)
4818 IF (nze > 0) THEN
4819 CALL dbt_copy(t_3c_der_ri_prv(i_img, 1, i_xyz), t_3c_template, &
4820 order=[3, 2, 1], move_data=.true.)
4821 CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4822 CALL dbt_copy(t_3c_template, t_3c_der_ri(i_img, i_xyz), &
4823 order=[1, 3, 2], move_data=.true., summation=.true.)
4824 END IF
4825 END DO
4826 END DO
4827 CALL timestop(handle2)
4828 END DO
4829 CALL dbt_destroy(t_3c_template)
4830
4831 CALL neighbor_list_3c_destroy(nl_3c)
4832 DO i_xyz = 1, 3
4833 DO i_img = 1, nimg
4834 CALL dbt_destroy(t_3c_der_ri_prv(i_img, 1, i_xyz))
4835 CALL dbt_destroy(t_3c_der_ao_prv(i_img, 1, i_xyz))
4836 END DO
4837 END DO
4838 DEALLOCATE (t_3c_der_ri_prv, t_3c_der_ao_prv)
4839
4840 !Reorder 3c derivatives to be consistant with ints
4841 CALL reorder_3c_derivs(t_3c_der_ri, ri_data)
4842 CALL reorder_3c_derivs(t_3c_der_ao, ri_data)
4843
4844 CALL timeset(routinen//"_2c", handle2)
4845 !The 2-center derivatives
4846 CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
4847 ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
4848 ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
4849 row_bsize(:) = ri_data%bsizes_RI
4850 col_bsize(:) = ri_data%bsizes_RI
4851
4852 CALL dbcsr_create(dbcsr_template, "2c_der", dbcsr_dist, dbcsr_type_no_symmetry, &
4853 row_bsize, col_bsize)
4854 CALL dbcsr_distribution_release(dbcsr_dist)
4855 DEALLOCATE (col_bsize, row_bsize)
4856
4857 ALLOCATE (mat_der_metric(nimg, 3))
4858 DO i_xyz = 1, 3
4859 DO i_img = 1, nimg
4860 CALL dbcsr_create(mat_der_pot(i_img, i_xyz), template=dbcsr_template)
4861 CALL dbcsr_create(mat_der_metric(i_img, i_xyz), template=dbcsr_template)
4862 END DO
4863 END DO
4864 CALL dbcsr_release(dbcsr_template)
4865
4866 !HFX potential derivatives
4867 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
4868 "HFX_2c_nl_pot", qs_env, sym_ij=.false., dist_2d=dist_2d)
4869 CALL build_2c_derivatives(mat_der_pot, ri_data%filter_eps_2c, qs_env, nl_2c, &
4870 basis_set_ri, basis_set_ri, ri_data%hfx_pot, do_kpoints=.true.)
4871 CALL release_neighbor_list_sets(nl_2c)
4872
4873 !RI metric derivatives
4874 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
4875 "HFX_2c_nl_pot", qs_env, sym_ij=.false., dist_2d=dist_2d)
4876 CALL build_2c_derivatives(mat_der_metric, ri_data%filter_eps_2c, qs_env, nl_2c, &
4877 basis_set_ri, basis_set_ri, ri_data%ri_metric, do_kpoints=.true.)
4878 CALL release_neighbor_list_sets(nl_2c)
4879
4880 !Get into extended RI basis and tensor format
4881 DO i_xyz = 1, 3
4882 DO iatom = 1, natom
4883 CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_der_metric(iatom, i_xyz))
4884 CALL get_ext_2c_int(t_2c_der_metric(iatom, i_xyz), mat_der_metric(:, i_xyz), &
4885 iatom, iatom, 1, ri_data, qs_env)
4886 END DO
4887 DO i_img = 1, nimg
4888 CALL dbcsr_release(mat_der_metric(i_img, i_xyz))
4889 END DO
4890 END DO
4891 CALL timestop(handle2)
4892
4893 CALL timestop(handle)
4894
4895 END SUBROUTINE precalc_derivatives
4896
4897! **************************************************************************************************
4898!> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
4899!> \param force ...
4900!> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
4901!> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
4902!> \param atom_of_kind ...
4903!> \param kind_of ...
4904!> \param img in which periodic image the second center of the tensor is
4905!> \param pref ...
4906!> \param ri_data ...
4907!> \param qs_env ...
4908!> \param work_virial ...
4909!> \param cell ...
4910!> \param particle_set ...
4911!> \param diag ...
4912!> \param offdiag ...
4913!> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution. Atomic block sizes are
4914!> assumed
4915! **************************************************************************************************
4916 SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, img, pref, &
4917 ri_data, qs_env, work_virial, cell, particle_set, diag, offdiag)
4918
4919 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
4920 TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
4921 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_der
4922 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
4923 INTEGER, INTENT(IN) :: img
4924 REAL(dp), INTENT(IN) :: pref
4925 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4926 TYPE(qs_environment_type), POINTER :: qs_env
4927 REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
4928 TYPE(cell_type), OPTIONAL, POINTER :: cell
4929 TYPE(particle_type), DIMENSION(:), OPTIONAL, &
4930 POINTER :: particle_set
4931 LOGICAL, INTENT(IN), OPTIONAL :: diag, offdiag
4932
4933 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_2c_der_force'
4934
4935 INTEGER :: handle, i_img, i_ri, i_xyz, iat, &
4936 iat_of_kind, ikind, j_img, j_ri, &
4937 j_xyz, jat, jat_of_kind, jkind, natom
4938 INTEGER, DIMENSION(2) :: ind
4939 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
4940 LOGICAL :: found, my_diag, my_offdiag, use_virial
4941 REAL(dp) :: new_force
4942 REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
4943 REAL(dp), DIMENSION(3) :: scoord
4944 TYPE(dbt_iterator_type) :: iter
4945 TYPE(kpoint_type), POINTER :: kpoints
4946
4947 NULLIFY (kpoints, index_to_cell)
4948
4949 !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
4950 !update the relevant force
4951
4952 CALL timeset(routinen, handle)
4953
4954 use_virial = .false.
4955 IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .true.
4956
4957 my_diag = .false.
4958 IF (PRESENT(diag)) my_diag = diag
4959
4960 my_offdiag = .false.
4961 IF (PRESENT(diag)) my_offdiag = offdiag
4962
4963 CALL get_qs_env(qs_env, kpoints=kpoints, natom=natom)
4964 CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
4965
4966!$OMP PARALLEL DEFAULT(NONE) &
4967!$OMP SHARED(t_2c_der,t_2c_contr,work_virial,force,use_virial,natom,index_to_cell,ri_data,img) &
4968!$OMP SHARED(pref,atom_of_kind,kind_of,particle_set,cell,my_diag,my_offdiag) &
4969!$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,i_RI,i_img,j_RI,j_img) &
4970!$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
4971 DO i_xyz = 1, 3
4972 CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
4973 DO WHILE (dbt_iterator_blocks_left(iter))
4974 CALL dbt_iterator_next_block(iter, ind)
4975
4976 !Only take forecs due to block diagonal or block off-diagonal, depending on arguments
4977 IF ((my_diag .AND. .NOT. my_offdiag) .OR. (.NOT. my_diag .AND. my_offdiag)) THEN
4978 IF (my_diag .AND. (ind(1) .NE. ind(2))) cycle
4979 IF (my_offdiag .AND. (ind(1) == ind(2))) cycle
4980 END IF
4981
4982 CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
4983 cpassert(found)
4984 CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
4985
4986 IF (found) THEN
4987
4988 !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
4989 !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
4990 new_force = pref*sum(der_blk(:, :)*contr_blk(:, :))
4991
4992 i_ri = (ind(1) - 1)/natom + 1
4993 i_img = ri_data%RI_cell_to_img(i_ri)
4994 iat = ind(1) - (i_ri - 1)*natom
4995 iat_of_kind = atom_of_kind(iat)
4996 ikind = kind_of(iat)
4997
4998 j_ri = (ind(2) - 1)/natom + 1
4999 j_img = ri_data%RI_cell_to_img(j_ri)
5000 jat = ind(2) - (j_ri - 1)*natom
5001 jat_of_kind = atom_of_kind(jat)
5002 jkind = kind_of(jat)
5003
5004 !Force on iatom (first center)
5005!$OMP ATOMIC
5006 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5007 + new_force
5008
5009 IF (use_virial) THEN
5010
5011 CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5012 scoord(:) = scoord(:) + real(index_to_cell(:, i_img), dp)
5013
5014 DO j_xyz = 1, 3
5015!$OMP ATOMIC
5016 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5017 END DO
5018 END IF
5019
5020 !Force on jatom (second center)
5021!$OMP ATOMIC
5022 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5023 - new_force
5024
5025 IF (use_virial) THEN
5026
5027 CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5028 scoord(:) = scoord(:) + real(index_to_cell(:, j_img) + index_to_cell(:, img), dp)
5029
5030 DO j_xyz = 1, 3
5031!$OMP ATOMIC
5032 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) - new_force*scoord(j_xyz)
5033 END DO
5034 END IF
5035
5036 DEALLOCATE (contr_blk)
5037 END IF
5038
5039 DEALLOCATE (der_blk)
5040 END DO !iter
5041 CALL dbt_iterator_stop(iter)
5042
5043 END DO !i_xyz
5044!$OMP END PARALLEL
5045 CALL timestop(handle)
5046
5047 END SUBROUTINE get_2c_der_force
5048
5049! **************************************************************************************************
5050!> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
5051!> force = sum_ijk A_ijk B_ijk., the B tensor is (P^0| sigma^0 lambda^img), with P in the
5052!> extended RI basis. Note that all tensors are stacked along the 3rd dimension
5053!> \param force ...
5054!> \param t_3c_contr ...
5055!> \param t_3c_der_1 ...
5056!> \param t_3c_der_2 ...
5057!> \param atom_of_kind ...
5058!> \param kind_of ...
5059!> \param idx_to_at_RI ...
5060!> \param idx_to_at_AO ...
5061!> \param i_images ...
5062!> \param lb_img ...
5063!> \param pref ...
5064!> \param ri_data ...
5065!> \param qs_env ...
5066!> \param work_virial ...
5067!> \param cell ...
5068!> \param particle_set ...
5069! **************************************************************************************************
5070 SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der_1, t_3c_der_2, atom_of_kind, kind_of, &
5071 idx_to_at_RI, idx_to_at_AO, i_images, lb_img, pref, &
5072 ri_data, qs_env, work_virial, cell, particle_set)
5073
5074 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
5075 TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
5076 TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der_1, t_3c_der_2
5077 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at_ri, &
5078 idx_to_at_ao, i_images
5079 INTEGER, INTENT(IN) :: lb_img
5080 REAL(dp), INTENT(IN) :: pref
5081 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
5082 TYPE(qs_environment_type), POINTER :: qs_env
5083 REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
5084 TYPE(cell_type), OPTIONAL, POINTER :: cell
5085 TYPE(particle_type), DIMENSION(:), OPTIONAL, &
5086 POINTER :: particle_set
5087
5088 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_force_from_3c_trace'
5089
5090 INTEGER :: handle, i_ri, i_xyz, iat, iat_of_kind, idx, ikind, j_xyz, jat, jat_of_kind, &
5091 jkind, kat, kat_of_kind, kkind, nblks_ao, nblks_ri, ri_img
5092 INTEGER, DIMENSION(3) :: ind
5093 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
5094 LOGICAL :: found, found_1, found_2, use_virial
5095 REAL(dp) :: new_force
5096 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk_1, der_blk_2, &
5097 der_blk_3
5098 REAL(dp), DIMENSION(3) :: scoord
5099 TYPE(dbt_iterator_type) :: iter
5100 TYPE(kpoint_type), POINTER :: kpoints
5101
5102 NULLIFY (kpoints, index_to_cell)
5103
5104 CALL timeset(routinen, handle)
5105
5106 CALL get_qs_env(qs_env, kpoints=kpoints)
5107 CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
5108
5109 nblks_ri = SIZE(ri_data%bsizes_RI_split)
5110 nblks_ao = SIZE(ri_data%bsizes_AO_split)
5111
5112 use_virial = .false.
5113 IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .true.
5114
5115!$OMP PARALLEL DEFAULT(NONE) &
5116!$OMP SHARED(t_3c_der_1, t_3c_der_2,t_3c_contr,work_virial,force,use_virial,index_to_cell,i_images,lb_img) &
5117!$OMP SHARED(pref,idx_to_at_AO,atom_of_kind,kind_of,particle_set,cell,idx_to_at_RI,ri_data,nblks_RI,nblks_AO) &
5118!$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk_1,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord) &
5119!$OMP PRIVATE(jat,kat,jat_of_kind,kat_of_kind,jkind,kkind,i_RI,RI_img,der_blk_2,der_blk_3,found_1,found_2,idx)
5120 CALL dbt_iterator_start(iter, t_3c_contr)
5121 DO WHILE (dbt_iterator_blocks_left(iter))
5122 CALL dbt_iterator_next_block(iter, ind)
5123
5124 CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
5125 IF (found) THEN
5126
5127 DO i_xyz = 1, 3
5128 CALL dbt_get_block(t_3c_der_1(i_xyz), ind, der_blk_1, found_1)
5129 IF (.NOT. found_1) THEN
5130 DEALLOCATE (der_blk_1)
5131 ALLOCATE (der_blk_1(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5132 der_blk_1(:, :, :) = 0.0_dp
5133 END IF
5134 CALL dbt_get_block(t_3c_der_2(i_xyz), ind, der_blk_2, found_2)
5135 IF (.NOT. found_2) THEN
5136 DEALLOCATE (der_blk_2)
5137 ALLOCATE (der_blk_2(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5138 der_blk_2(:, :, :) = 0.0_dp
5139 END IF
5140
5141 ALLOCATE (der_blk_3(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5142 der_blk_3(:, :, :) = -(der_blk_1(:, :, :) + der_blk_2(:, :, :))
5143
5144 !We assume the tensors are in the format (P^0| sigma^0 mu^a+c-b), with P a member of the
5145 !extended RI basis set
5146
5147 !Force for the first center (RI extended basis, zero cell)
5148 new_force = pref*sum(der_blk_1(:, :, :)*contr_blk(:, :, :))
5149
5150 i_ri = (ind(1) - 1)/nblks_ri + 1
5151 ri_img = ri_data%RI_cell_to_img(i_ri)
5152 iat = idx_to_at_ri(ind(1) - (i_ri - 1)*nblks_ri)
5153 iat_of_kind = atom_of_kind(iat)
5154 ikind = kind_of(iat)
5155
5156!$OMP ATOMIC
5157 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5158 + new_force
5159
5160 IF (use_virial) THEN
5161
5162 CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5163 scoord(:) = scoord(:) + real(index_to_cell(:, ri_img), dp)
5164
5165 DO j_xyz = 1, 3
5166!$OMP ATOMIC
5167 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5168 END DO
5169 END IF
5170
5171 !Force with respect to the second center (AO basis, zero cell)
5172 new_force = pref*sum(der_blk_2(:, :, :)*contr_blk(:, :, :))
5173 jat = idx_to_at_ao(ind(2))
5174 jat_of_kind = atom_of_kind(jat)
5175 jkind = kind_of(jat)
5176
5177!$OMP ATOMIC
5178 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5179 + new_force
5180
5181 IF (use_virial) THEN
5182
5183 CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5184
5185 DO j_xyz = 1, 3
5186!$OMP ATOMIC
5187 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5188 END DO
5189 END IF
5190
5191 !Force with respect to the third center (AO basis, apc_img - b_img)
5192 !Note: tensors are stacked along the 3rd direction
5193 new_force = pref*sum(der_blk_3(:, :, :)*contr_blk(:, :, :))
5194 idx = (ind(3) - 1)/nblks_ao + 1
5195 kat = idx_to_at_ao(ind(3) - (idx - 1)*nblks_ao)
5196 kat_of_kind = atom_of_kind(kat)
5197 kkind = kind_of(kat)
5198
5199!$OMP ATOMIC
5200 force(kkind)%fock_4c(i_xyz, kat_of_kind) = force(kkind)%fock_4c(i_xyz, kat_of_kind) &
5201 + new_force
5202
5203 IF (use_virial) THEN
5204 CALL real_to_scaled(scoord, pbc(particle_set(kat)%r, cell), cell)
5205 scoord(:) = scoord(:) + real(index_to_cell(:, i_images(lb_img - 1 + idx)), dp)
5206
5207 DO j_xyz = 1, 3
5208!$OMP ATOMIC
5209 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5210 END DO
5211 END IF
5212
5213 DEALLOCATE (der_blk_1, der_blk_2, der_blk_3)
5214 END DO !i_xyz
5215 DEALLOCATE (contr_blk)
5216 END IF !found
5217 END DO !iter
5218 CALL dbt_iterator_stop(iter)
5219!$OMP END PARALLEL
5220 CALL timestop(handle)
5221
5222 END SUBROUTINE get_force_from_3c_trace
5223
5224END MODULE
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static GRID_HOST_DEVICE double fac(const int i)
Factorial function, e.g. fac(5) = 5! = 120.
Definition grid_common.h:51
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
Types and set/get functions for auxiliary density matrix methods.
Definition admm_types.F:15
subroutine, public get_admm_env(admm_env, mo_derivs_aux_fit, mos_aux_fit, sab_aux_fit, sab_aux_fit_asymm, sab_aux_fit_vs_orb, matrix_s_aux_fit, matrix_s_aux_fit_kp, matrix_s_aux_fit_vs_orb, matrix_s_aux_fit_vs_orb_kp, task_list_aux_fit, matrix_ks_aux_fit, matrix_ks_aux_fit_kp, matrix_ks_aux_fit_im, matrix_ks_aux_fit_dft, matrix_ks_aux_fit_hfx, matrix_ks_aux_fit_dft_kp, matrix_ks_aux_fit_hfx_kp, rho_aux_fit, rho_aux_fit_buffer, admm_dm)
Get routine for the ADMM env.
Definition admm_types.F:593
Define the atomic kind types and their sub types.
subroutine, public get_atomic_kind_set(atomic_kind_set, atom_of_kind, kind_of, natom_of_kind, maxatom, natom, nshell, fist_potential_present, shell_present, shell_adiabatic, shell_check_distance, damping_present)
Get attributes of an atomic kind set.
subroutine, public get_gto_basis_set(gto_basis_set, name, aliases, norm_type, kind_radius, ncgf, nset, nsgf, cgf_symbol, sgf_symbol, norm_cgf, set_radius, lmax, lmin, lx, ly, lz, m, ncgf_set, npgf, nsgf_set, nshell, cphi, pgf_radius, sphi, scon, zet, first_cgf, first_sgf, l, last_cgf, last_sgf, n, gcc, maxco, maxl, maxpgf, maxsgf_set, maxshell, maxso, nco_sum, npgf_sum, nshell_sum, maxder, short_kind_radius, npgf_seg_sum)
...
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public bussy2024
Handles all functions related to the CELL.
Definition cell_types.F:15
subroutine, public scaled_to_real(r, s, cell)
Transform scaled cell coordinates real coordinates. r=h*s.
Definition cell_types.F:516
subroutine, public real_to_scaled(s, r, cell)
Transform real to scaled cell coordinates. s=h_inv*r.
Definition cell_types.F:486
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
subroutine, public dbcsr_distribution_release(dist)
...
subroutine, public dbcsr_distribution_new(dist, template, group, pgrid, row_dist, col_dist, reuse_arrays)
...
subroutine, public dbcsr_iterator_next_block(iterator, row, column, block, block_number_argument_has_been_removed, row_size, col_size, row_offset, col_offset)
...
logical function, public dbcsr_iterator_blocks_left(iterator)
...
subroutine, public dbcsr_iterator_stop(iterator)
...
subroutine, public dbcsr_copy(matrix_b, matrix_a, name, keep_sparsity, keep_imaginary)
...
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_filter(matrix, eps)
...
subroutine, public dbcsr_finalize(matrix)
...
subroutine, public dbcsr_iterator_start(iterator, matrix, shared, dynamic, dynamic_byrows)
...
subroutine, public dbcsr_release(matrix)
...
subroutine, public dbcsr_clear(matrix)
...
subroutine, public dbcsr_put_block(matrix, row, col, block, summation)
...
subroutine, public dbcsr_add(matrix_a, matrix_b, alpha_scalar, beta_scalar)
...
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
Interface to (sca)lapack for the Cholesky based procedures.
subroutine, public cp_dbcsr_cholesky_decompose(matrix, n, para_env, blacs_env)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
subroutine, public cp_dbcsr_cholesky_invert(matrix, n, para_env, blacs_env, uplo_to_full)
used to replace the cholesky decomposition by the inverse
subroutine, public dbcsr_dot(matrix_a, matrix_b, trace)
Computes the dot product of two matrices, also known as the trace of their matrix product.
Interface to (sca)lapack for the Cholesky based procedures.
subroutine, public cp_dbcsr_power(matrix, exponent, threshold, n_dependent, para_env, blacs_env, verbose, eigenvectors, eigenvalues)
...
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_dist2d_to_dist(dist2d, dist)
Creates a DBCSR distribution from a distribution_2d.
This is the start of a dbt_api, all publically needed functions are exported here....
Definition dbt_api.F:17
stores a mapping of 2D info (e.g. matrix) on a 2D processor distribution (i.e. blacs grid) where cpus...
subroutine, public distribution_2d_create(distribution_2d, blacs_env, local_rows_ptr, n_local_rows, local_cols_ptr, row_distribution_ptr, col_distribution_ptr, n_local_cols, n_row_distribution, n_col_distribution)
initializes the distribution_2d
subroutine, public distribution_2d_release(distribution_2d)
...
RI-methods for HFX and K-points. \auhtor Augustin Bussy (01.2023)
Definition hfx_ri_kp.F:13
subroutine, public hfx_ri_update_forces_kp(qs_env, ri_data, nspins, hf_fraction, rho_ao, use_virial)
Update the K-points RI-HFX forces.
Definition hfx_ri_kp.F:777
subroutine, public hfx_ri_update_ks_kp(qs_env, ri_data, ks_matrix, ehfx, rho_ao, geometry_did_change, nspins, hf_fraction)
Update the KS matrices for each real-space image.
Definition hfx_ri_kp.F:423
RI-methods for HFX.
Definition hfx_ri.F:12
subroutine, public get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, deriv_dim)
This routines calculates the force contribution from a trace over 3D tensors, i.e....
Definition hfx_ri.F:3363
subroutine, public get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
a small utility function that returns the atom corresponding to a block of a split tensor
Definition hfx_ri.F:4109
subroutine, public hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_ri, t_2c_int_pot, t_3c_int, do_kpoints)
Calculate 2-center and 3-center integrals.
Definition hfx_ri.F:444
subroutine, public get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, do_ovlp)
Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
Definition hfx_ri.F:3449
Types and set/get functions for HFX.
Definition hfx_types.F:15
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public hfx_ri_do_2c_cholesky
integer, parameter, public hfx_ri_do_2c_diag
integer, parameter, public hfx_ri_do_2c_iter
integer, parameter, public do_potential_short
function that builds the hartree fock exchange section of the input
integer, parameter, public ri_pmat
objects that represent the structure of input sections and the data contained in an input section
subroutine, public section_vals_val_set(section_vals, keyword_name, i_rep_section, i_rep_val, val, l_val, i_val, r_val, c_val, l_vals_ptr, i_vals_ptr, r_vals_ptr, c_vals_ptr)
sets the requested value
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_val_get(section_vals, keyword_name, i_rep_section, i_rep_val, n_rep_val, val, l_val, i_val, r_val, c_val, l_vals, i_vals, r_vals, c_vals, explicit)
returns the requested value
Routines useful for iterative matrix calculations.
subroutine, public invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite matrix by Hotelling's method explicit symmetrization makes this ...
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
Types and basic routines needed for a kpoint calculation.
subroutine, public get_kpoint_info(kpoint, kp_scheme, nkp_grid, kp_shift, symmetry, verbose, full_grid, use_real_wfn, eps_geo, parallel_group_size, kp_range, nkp, xkp, wkp, para_env, blacs_env_all, para_env_kp, para_env_inter_kp, blacs_env, kp_env, kp_aux_env, mpools, iogrp, nkp_groups, kp_dist, cell_to_index, index_to_cell, sab_nl, sab_nl_nosym)
Retrieve information from a kpoint environment.
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
real(kind=dp), parameter, public cutoff_screen_factor
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:130
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:147
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
subroutine, public diag(n, a, d, v)
Diagonalize matrix a. The eigenvalues are returned in vector d and the eigenvectors are returned in m...
Definition mathlib.F:1496
subroutine, public erfc_cutoff(eps, omg, r_cutoff)
compute a truncation radius for the shortrange operator
Definition mathlib.F:1686
Interface to the message passing library MPI.
Define methods related to particle_type.
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Define the data structure for the particle information.
Definition of physical constants:
Definition physcon.F:68
real(kind=dp), parameter, public angstrom
Definition physcon.F:144
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, harris_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, eeq, rhs)
Get the QUICKSTEP environment.
Some utility functions for the calculation of integrals.
subroutine, public basis_set_list_setup(basis_set_list, basis_type, qs_kind_set)
Set up an easy accessible list of the basis sets for all kinds.
Calculate the interaction radii for the operator matrix calculation.
subroutine, public init_interaction_radii_orb_basis(orb_basis_set, eps_pgf_orb, eps_pgf_short)
...
Define the quickstep kind type and their sub types.
Define the neighbor list data types and the corresponding functionality.
subroutine, public release_neighbor_list_sets(nlists)
releases an array of neighbor_list_sets
subroutine, public neighbor_list_iterator_create(iterator_set, nl, search, nthread)
Neighbor list iterator functions.
subroutine, public neighbor_list_iterator_release(iterator_set)
...
integer function, public neighbor_list_iterate(iterator_set, mepos)
...
subroutine, public get_iterator_info(iterator_set, mepos, ikind, jkind, nkind, ilist, nlist, inode, nnode, iatom, jatom, r, cell)
...
module that contains the definitions of the scf types
Utility methods to build 3-center integral tensors of various types.
subroutine, public distribution_3d_create(dist_3d, dist1, dist2, dist3, nkind, particle_set, mp_comm_3d, own_comm)
Create a 3d distribution.
subroutine, public create_2c_tensor(t2c, dist_1, dist_2, pgrid, sizes_1, sizes_2, order, name)
...
subroutine, public create_tensor_batches(sizes, nbatches, starts_array, ends_array, starts_array_block, ends_array_block)
...
subroutine, public create_3c_tensor(t3c, dist_1, dist_2, dist_3, pgrid, sizes_1, sizes_2, sizes_3, map1, map2, name)
...
Utility methods to build 3-center integral tensors of various types.
Definition qs_tensors.F:11
subroutine, public build_3c_derivatives(t3c_der_i, t3c_der_k, filter_eps, qs_env, nl_3c, basis_i, basis_j, basis_k, potential_parameter, der_eps, op_pos, do_kpoints, do_hfx_kpoints, bounds_i, bounds_j, bounds_k, ri_range, img_to_ri_cell)
Build 3-center derivative tensors.
Definition qs_tensors.F:926
subroutine, public build_2c_neighbor_lists(ij_list, basis_i, basis_j, potential_parameter, name, qs_env, sym_ij, molecular, dist_2d, pot_to_rad)
Build 2-center neighborlists adapted to different operators This mainly wraps build_neighbor_lists fo...
Definition qs_tensors.F:143
recursive integer function, public neighbor_list_3c_iterate(iterator)
Iterate 3c-nl iterator.
Definition qs_tensors.F:467
subroutine, public neighbor_list_3c_iterator_destroy(iterator)
Destroy 3c-nl iterator.
Definition qs_tensors.F:445
subroutine, public neighbor_list_3c_destroy(ijk_list)
Destroy 3c neighborlist.
Definition qs_tensors.F:383
subroutine, public build_2c_derivatives(t2c_der, filter_eps, qs_env, nl_2c, basis_i, basis_j, potential_parameter, do_kpoints)
Calculates the derivatives of 2-center integrals, wrt to the first center.
subroutine, public get_tensor_occupancy(tensor, nze, occ)
...
subroutine, public build_3c_neighbor_lists(ijk_list, basis_i, basis_j, basis_k, dist_3d, potential_parameter, name, qs_env, sym_ij, sym_jk, sym_ik, molecular, op_pos, own_dist)
Build a 3-center neighbor list.
Definition qs_tensors.F:282
subroutine, public neighbor_list_3c_iterator_create(iterator, ijk_nl)
Create a 3-center neighborlist iterator.
Definition qs_tensors.F:400
subroutine, public get_3c_iterator_info(iterator, ikind, jkind, kkind, nkind, iatom, jatom, katom, rij, rjk, rik, cell_j, cell_k)
Get info of current iteration.
Definition qs_tensors.F:564
All kind of helpful little routines.
Definition util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333
Provides all information about an atomic kind.
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
represent a pointer to a 1d array
represent a pointer to a 2d array
represent a pointer to a 3d array
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
distributes pairs on a 2d grid of processors
Contains information about kpoints.
stores all the informations relevant to an mpi environment
Provides all information about a quickstep kind.