(git:15c1bfc)
Loading...
Searching...
No Matches
hfx_ri_kp.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief RI-methods for HFX and K-points.
10!> \auhtor Augustin Bussy (01.2023)
11! **************************************************************************************************
12
14 USE admm_types, ONLY: get_admm_env
19 USE bibliography, ONLY: bussy2024,&
20 cite_reference
21 USE cell_types, ONLY: cell_type,&
22 pbc,&
32 USE cp_dbcsr_api, ONLY: &
37 dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_type, dbcsr_type_no_symmetry, &
38 dbcsr_type_symmetric
45 USE dbt_api, ONLY: &
46 dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
47 dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, dbt_destroy, &
48 dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, dbt_filter, &
49 dbt_finalize, dbt_get_block, dbt_get_info, dbt_get_stored_coordinates, &
50 dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
51 dbt_iterator_type, dbt_mp_environ_pgrid, dbt_pgrid_create, dbt_pgrid_destroy, &
52 dbt_pgrid_type, dbt_put_block, dbt_scale, dbt_type
55 USE hfx_ri, ONLY: get_idx_to_atom,&
57 USE hfx_types, ONLY: hfx_ri_type
62 USE input_cp2k_hfx, ONLY: ri_pmat
68 USE kinds, ONLY: default_string_length,&
69 dp,&
70 int_8
71 USE kpoint_types, ONLY: get_kpoint_info,&
74 USE machine, ONLY: m_flush,&
75 m_memory,&
77 USE mathlib, ONLY: erfc_cutoff
78 USE message_passing, ONLY: mp_cart_type,&
84 USE physcon, ONLY: angstrom
99 USE qs_tensors, ONLY: &
112 USE util, ONLY: get_limit
113 USE virial_types, ONLY: virial_type
114#include "./base/base_uses.f90"
115
116!$ USE OMP_LIB, ONLY: omp_get_num_threads
117
118 IMPLICIT NONE
119 PRIVATE
120
122
123 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri_kp'
124CONTAINS
125
126! **************************************************************************************************
127!> \brief I_1nitialize the ri_data for K-point. For now, we take the normal, usual existing ri_data
128!> and we adapt it to our needs
129!> \param dbcsr_template ...
130!> \param ri_data ...
131!> \param qs_env ...
132! **************************************************************************************************
133 SUBROUTINE adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
134 TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
135 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
136 TYPE(qs_environment_type), POINTER :: qs_env
137
138 INTEGER :: i_img, i_RI, i_spin, iatom, natom, &
139 nblks_RI, nimg, nkind, nspins
140 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, dist1, dist2, dist3
141 TYPE(dft_control_type), POINTER :: dft_control
142 TYPE(mp_para_env_type), POINTER :: para_env
143
144 NULLIFY (dft_control, para_env)
145
146 !The main thing that we need to do is to allocate more space for the integrals, such that there
147 !is room for each periodic image. Note that we only go in 1D, i.e. we store (mu^0 sigma^a|P^0),
148 !and (P^0|Q^a) => the RI basis is always in the main cell.
149
150 !Get kpoint info
151 CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, para_env=para_env, nkind=nkind)
152 nimg = ri_data%nimg
153
154 !Along the RI direction we have basis elements spread accross ncell_RI images.
155 nblks_ri = SIZE(ri_data%bsizes_RI_split)
156 ALLOCATE (bsizes_ri_ext(nblks_ri*ri_data%ncell_RI))
157 DO i_ri = 1, ri_data%ncell_RI
158 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
159 END DO
160
161 ALLOCATE (ri_data%t_3c_int_ctr_1(1, nimg))
162 CALL create_3c_tensor(ri_data%t_3c_int_ctr_1(1, 1), dist1, dist2, dist3, &
163 ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_ri_ext, &
164 ri_data%bsizes_AO_split, [1, 2], [3], name="(AO RI | AO)")
165
166 DO i_img = 2, nimg
167 CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_1(1, i_img))
168 END DO
169 DEALLOCATE (dist1, dist2, dist3)
170
171 ALLOCATE (ri_data%t_3c_int_ctr_2(1, 1))
172 CALL create_3c_tensor(ri_data%t_3c_int_ctr_2(1, 1), dist1, dist2, dist3, &
173 ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_ri_ext, &
174 ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
175 DEALLOCATE (dist1, dist2, dist3)
176
177 !We use full block sizes for the 2c quantities
178 DEALLOCATE (bsizes_ri_ext)
179 nblks_ri = SIZE(ri_data%bsizes_RI)
180 ALLOCATE (bsizes_ri_ext(nblks_ri*ri_data%ncell_RI))
181 DO i_ri = 1, ri_data%ncell_RI
182 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI(:)
183 END DO
184
185 ALLOCATE (ri_data%t_2c_inv(1, natom), ri_data%t_2c_int(1, natom), ri_data%t_2c_pot(1, natom))
186 CALL create_2c_tensor(ri_data%t_2c_inv(1, 1), dist1, dist2, ri_data%pgrid_2d, &
187 bsizes_ri_ext, bsizes_ri_ext, &
188 name="(RI | RI)")
189 DEALLOCATE (dist1, dist2)
190 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, 1))
191 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1))
192 DO iatom = 2, natom
193 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_inv(1, iatom))
194 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, iatom))
195 CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, iatom))
196 END DO
197
198 ALLOCATE (ri_data%kp_cost(natom, natom, nimg))
199 ri_data%kp_cost = 0.0_dp
200
201 !We store the density and KS matrix in tensor format
202 nspins = dft_control%nspins
203 ALLOCATE (ri_data%rho_ao_t(nspins, nimg), ri_data%ks_t(nspins, nimg))
204 CALL create_2c_tensor(ri_data%rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
205 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
206 name="(AO | AO)")
207 DEALLOCATE (dist1, dist2)
208
209 CALL dbt_create(dbcsr_template, ri_data%ks_t(1, 1))
210
211 IF (nspins == 2) THEN
212 CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(2, 1))
213 CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(2, 1))
214 END IF
215 DO i_img = 2, nimg
216 DO i_spin = 1, nspins
217 CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(i_spin, i_img))
218 CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(i_spin, i_img))
219 END DO
220 END DO
221
222 END SUBROUTINE adapt_ri_data_to_kp
223
224! **************************************************************************************************
225!> \brief The pre-scf steps for RI-HFX k-points calculation. Namely the calculation of the integrals
226!> \param dbcsr_template ...
227!> \param ri_data ...
228!> \param qs_env ...
229! **************************************************************************************************
230 SUBROUTINE hfx_ri_pre_scf_kp(dbcsr_template, ri_data, qs_env)
231 TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
232 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
233 TYPE(qs_environment_type), POINTER :: qs_env
234
235 CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_kp'
236
237 INTEGER :: handle, i_img, iatom, natom, nimg, nkind
238 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: t_2c_op_pot, t_2c_op_RI
239 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
240 TYPE(dft_control_type), POINTER :: dft_control
241
242 NULLIFY (dft_control)
243
244 CALL timeset(routinen, handle)
245
246 CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, nkind=nkind)
247
248 CALL cleanup_kp(ri_data)
249
250 !We do all the checks on what we allow in this initial implementation
251 IF (ri_data%flavor .NE. ri_pmat) cpabort("K-points RI-HFX only with RHO flavor")
252 IF (ri_data%same_op) ri_data%same_op = .false. !force the full calculation with RI metric
253 IF (abs(ri_data%eps_pgf_orb - dft_control%qs_control%eps_pgf_orb) > 1.0e-16_dp) &
254 cpabort("RI%EPS_PGF_ORB and QS%EPS_PGF_ORB must be identical for RI-HFX k-points")
255
256 CALL get_kp_and_ri_images(ri_data, qs_env)
257 nimg = ri_data%nimg
258
259 !Calculate the integrals
260 ALLOCATE (t_2c_op_pot(nimg), t_2c_op_ri(nimg))
261 ALLOCATE (t_3c_int(1, nimg))
262 CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_ri, t_2c_op_pot, t_3c_int, do_kpoints=.true.)
263
264 !Make sure the internals have the k-point format
265 CALL adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
266
267 !For each atom i, we calculate the inverse RI metric (P^0 | Q^0)^-1 without external bumping yet
268 !Also store the off-diagonal integrals of the RI metric in case of forces, bumped from the left
269 DO iatom = 1, natom
270 CALL get_ext_2c_int(ri_data%t_2c_inv(1, iatom), t_2c_op_ri, iatom, iatom, 1, ri_data, qs_env, &
271 do_inverse=.true.)
272 !for the forces:
273 !off-diagonl RI metric bumped from the left
274 CALL get_ext_2c_int(ri_data%t_2c_int(1, iatom), t_2c_op_ri, iatom, iatom, 1, ri_data, &
275 qs_env, off_diagonal=.true.)
276 CALL apply_bump(ri_data%t_2c_int(1, iatom), iatom, ri_data, qs_env, from_left=.true., from_right=.false.)
277
278 !RI metric with bumped off-diagonal blocks (but not inverted), depumed from left and right
279 CALL get_ext_2c_int(ri_data%t_2c_pot(1, iatom), t_2c_op_ri, iatom, iatom, 1, ri_data, qs_env, &
280 do_inverse=.true., skip_inverse=.true.)
281 CALL apply_bump(ri_data%t_2c_pot(1, iatom), iatom, ri_data, qs_env, from_left=.true., &
282 from_right=.true., debump=.true.)
283
284 END DO
285
286 DO i_img = 1, nimg
287 CALL dbcsr_release(t_2c_op_ri(i_img))
288 END DO
289
290 ALLOCATE (ri_data%kp_mat_2c_pot(1, nimg))
291 DO i_img = 1, nimg
292 CALL dbcsr_create(ri_data%kp_mat_2c_pot(1, i_img), template=t_2c_op_pot(i_img))
293 CALL dbcsr_copy(ri_data%kp_mat_2c_pot(1, i_img), t_2c_op_pot(i_img))
294 CALL dbcsr_release(t_2c_op_pot(i_img))
295 END DO
296
297 !reorder the 3c integrals such that empty images are bunched up together
298 CALL reorder_3c_ints(t_3c_int(1, :), ri_data)
299
300 !Pre-contract all 3c integrals with the bumped inverse RI metric (P^0|Q^0)^-1,
301 !and store in ri_data%t_3c_int_ctr_1
302 CALL precontract_3c_ints(t_3c_int, ri_data, qs_env)
303
304 CALL timestop(handle)
305
306 END SUBROUTINE hfx_ri_pre_scf_kp
307
308! **************************************************************************************************
309!> \brief clean-up the KP specific data from ri_data
310!> \param ri_data ...
311! **************************************************************************************************
312 SUBROUTINE cleanup_kp(ri_data)
313 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
314
315 INTEGER :: i, j
316
317 IF (ALLOCATED(ri_data%kp_cost)) DEALLOCATE (ri_data%kp_cost)
318 IF (ALLOCATED(ri_data%idx_to_img)) DEALLOCATE (ri_data%idx_to_img)
319 IF (ALLOCATED(ri_data%img_to_idx)) DEALLOCATE (ri_data%img_to_idx)
320 IF (ALLOCATED(ri_data%present_images)) DEALLOCATE (ri_data%present_images)
321 IF (ALLOCATED(ri_data%img_to_RI_cell)) DEALLOCATE (ri_data%img_to_RI_cell)
322 IF (ALLOCATED(ri_data%RI_cell_to_img)) DEALLOCATE (ri_data%RI_cell_to_img)
323
324 IF (ALLOCATED(ri_data%kp_mat_2c_pot)) THEN
325 DO j = 1, SIZE(ri_data%kp_mat_2c_pot, 2)
326 DO i = 1, SIZE(ri_data%kp_mat_2c_pot, 1)
327 CALL dbcsr_release(ri_data%kp_mat_2c_pot(i, j))
328 END DO
329 END DO
330 DEALLOCATE (ri_data%kp_mat_2c_pot)
331 END IF
332
333 IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
334 DO i = 1, SIZE(ri_data%kp_t_3c_int)
335 CALL dbt_destroy(ri_data%kp_t_3c_int(i))
336 END DO
337 DEALLOCATE (ri_data%kp_t_3c_int)
338 END IF
339
340 IF (ALLOCATED(ri_data%t_2c_inv)) THEN
341 DO j = 1, SIZE(ri_data%t_2c_inv, 2)
342 DO i = 1, SIZE(ri_data%t_2c_inv, 1)
343 CALL dbt_destroy(ri_data%t_2c_inv(i, j))
344 END DO
345 END DO
346 DEALLOCATE (ri_data%t_2c_inv)
347 END IF
348
349 IF (ALLOCATED(ri_data%t_2c_int)) THEN
350 DO j = 1, SIZE(ri_data%t_2c_int, 2)
351 DO i = 1, SIZE(ri_data%t_2c_int, 1)
352 CALL dbt_destroy(ri_data%t_2c_int(i, j))
353 END DO
354 END DO
355 DEALLOCATE (ri_data%t_2c_int)
356 END IF
357
358 IF (ALLOCATED(ri_data%t_2c_pot)) THEN
359 DO j = 1, SIZE(ri_data%t_2c_pot, 2)
360 DO i = 1, SIZE(ri_data%t_2c_pot, 1)
361 CALL dbt_destroy(ri_data%t_2c_pot(i, j))
362 END DO
363 END DO
364 DEALLOCATE (ri_data%t_2c_pot)
365 END IF
366
367 IF (ALLOCATED(ri_data%t_3c_int_ctr_1)) THEN
368 DO j = 1, SIZE(ri_data%t_3c_int_ctr_1, 2)
369 DO i = 1, SIZE(ri_data%t_3c_int_ctr_1, 1)
370 CALL dbt_destroy(ri_data%t_3c_int_ctr_1(i, j))
371 END DO
372 END DO
373 DEALLOCATE (ri_data%t_3c_int_ctr_1)
374 END IF
375
376 IF (ALLOCATED(ri_data%t_3c_int_ctr_2)) THEN
377 DO j = 1, SIZE(ri_data%t_3c_int_ctr_2, 2)
378 DO i = 1, SIZE(ri_data%t_3c_int_ctr_2, 1)
379 CALL dbt_destroy(ri_data%t_3c_int_ctr_2(i, j))
380 END DO
381 END DO
382 DEALLOCATE (ri_data%t_3c_int_ctr_2)
383 END IF
384
385 IF (ALLOCATED(ri_data%rho_ao_t)) THEN
386 DO j = 1, SIZE(ri_data%rho_ao_t, 2)
387 DO i = 1, SIZE(ri_data%rho_ao_t, 1)
388 CALL dbt_destroy(ri_data%rho_ao_t(i, j))
389 END DO
390 END DO
391 DEALLOCATE (ri_data%rho_ao_t)
392 END IF
393
394 IF (ALLOCATED(ri_data%ks_t)) THEN
395 DO j = 1, SIZE(ri_data%ks_t, 2)
396 DO i = 1, SIZE(ri_data%ks_t, 1)
397 CALL dbt_destroy(ri_data%ks_t(i, j))
398 END DO
399 END DO
400 DEALLOCATE (ri_data%ks_t)
401 END IF
402
403 END SUBROUTINE cleanup_kp
404
405! **************************************************************************************************
406!> \brief Prints a progress bar for the k-point RI-HFX triple loop
407!> \param b_img ...
408!> \param nimg ...
409!> \param iprint ...
410!> \param ri_data ...
411! **************************************************************************************************
412 SUBROUTINE print_progress_bar(b_img, nimg, iprint, ri_data)
413 INTEGER, INTENT(IN) :: b_img, nimg
414 INTEGER, INTENT(INOUT) :: iprint
415 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
416
417 CHARACTER(LEN=default_string_length) :: bar
418 INTEGER :: rep
419
420 IF (ri_data%unit_nr > 0) THEN
421 IF (b_img == 1) THEN
422 WRITE (ri_data%unit_nr, '(/T6,A)', advance="no") '[-'
423 CALL m_flush(ri_data%unit_nr)
424 END IF
425 IF (b_img > iprint*nimg/71) THEN
426 rep = max(1, 71/nimg)
427 bar = repeat("-", rep)
428 WRITE (ri_data%unit_nr, '(A)', advance="no") trim(bar)
429 CALL m_flush(ri_data%unit_nr)
430 iprint = iprint + 1
431 END IF
432 IF (b_img == nimg) THEN
433 rep = max(0, 1 + 71 - iprint*rep)
434 bar = repeat("-", rep)
435 WRITE (ri_data%unit_nr, '(A,A)') trim(bar), '-]'
436 CALL m_flush(ri_data%unit_nr)
437 END IF
438 END IF
439
440 END SUBROUTINE print_progress_bar
441
442! **************************************************************************************************
443!> \brief Update the KS matrices for each real-space image
444!> \param qs_env ...
445!> \param ri_data ...
446!> \param ks_matrix ...
447!> \param ehfx ...
448!> \param rho_ao ...
449!> \param geometry_did_change ...
450!> \param nspins ...
451!> \param hf_fraction ...
452! **************************************************************************************************
453 SUBROUTINE hfx_ri_update_ks_kp(qs_env, ri_data, ks_matrix, ehfx, rho_ao, &
454 geometry_did_change, nspins, hf_fraction)
455
456 TYPE(qs_environment_type), POINTER :: qs_env
457 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
458 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
459 REAL(kind=dp), INTENT(OUT) :: ehfx
460 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
461 LOGICAL, INTENT(IN) :: geometry_did_change
462 INTEGER, INTENT(IN) :: nspins
463 REAL(kind=dp), INTENT(IN) :: hf_fraction
464
465 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_ks_kp'
466
467 INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_spin, iatom, &
468 iblk, igroup, iprint, jatom, mb_img, n_batch_nze, n_nze, natom, ngroups, nimg, nimg_nze
469 INTEGER(int_8) :: mem, nflop, nze
470 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_at, batch_ranges_nze, &
471 idx_to_at_ao
472 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
473 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: sparsity_pattern
474 LOGICAL :: estimate_mem, print_progress, use_delta_p
475 REAL(dp) :: etmp, fac, occ, pfac, pref, t1, t2, t3, &
476 t4
477 TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
478 TYPE(dbcsr_type) :: ks_desymm, rho_desymm, tmp
479 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
480 TYPE(dbcsr_type), POINTER :: dbcsr_template
481 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ks_t_split, t_2c_ao_tmp, t_2c_work, &
482 t_3c_int, t_3c_work_2, t_3c_work_3
483 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: ks_t, ks_t_sub, t_3c_apc, t_3c_apc_sub
484 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
485 TYPE(section_vals_type), POINTER :: hfx_section, print_section
486
487 NULLIFY (para_env, para_env_sub, blacs_env_sub, hfx_section, dbcsr_template, print_section)
488
489 CALL cite_reference(bussy2024)
490
491 CALL timeset(routinen, handle)
492
493 CALL get_qs_env(qs_env, para_env=para_env, natom=natom)
494
495 IF (nspins == 1) THEN
496 fac = 0.5_dp*hf_fraction
497 ELSE
498 fac = 1.0_dp*hf_fraction
499 END IF
500
501 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
502 CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
503 CALL section_vals_val_get(hfx_section, "KP_STACK_SIZE", i_val=batch_size)
504 CALL section_vals_val_get(hfx_section, "KP_USE_DELTA_P", l_val=use_delta_p)
505 ri_data%kp_stack_size = batch_size
506 ri_data%kp_ngroups = ngroups
507
508 IF (geometry_did_change) THEN
509 CALL hfx_ri_pre_scf_kp(ks_matrix(1, 1)%matrix, ri_data, qs_env)
510 END IF
511 nimg = ri_data%nimg
512 nimg_nze = ri_data%nimg_nze
513
514 !We need to calculate the KS matrix for each periodic cell with index b: F_mu^0,nu^b
515 !F_mu^0,nu^b = -0.5 sum_a,c P_sigma^0,lambda^c (mu^0, sigma^a| P^0) V_P^0,Q^b (Q^b| nu^b lambda^a+c)
516 !with V_P^0,Q^b = (P^0|R^0)^-1 * (R^0|S^b) * (S^b|Q^b)^-1
517
518 !We use a local RI basis set for each atom in the system, which inlcudes RI basis elements for
519 !each neighboring atom standing within the KIND radius (decay of Gaussian with smallest exponent)
520
521 !We also limit the number of periodic images we consider accorrding to the HFX potentail in the
522 !RI basis, because if V_P^0,Q^b is zero everywhere, then image b can be ignored (RI basis less diffuse)
523
524 !We manage to calculate each KS matrix doing a double loop on iamges, and a double loop on atoms
525 !First, we pre-contract and store P_sigma^0,lambda^c (mu^0, sigma^a| P^0) (P^0|R^0)^-1 into T_mu^0,lambda^a+c,P^0
526 !Then, we loop over b_img, iatom, jatom to get (R^0|S^b)
527 !Finally, we do an additional loop over a+c images where we do (R^0|S^b) (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c)
528 !and the final contraction with T_mu^0,lambda^a+c,P^0
529
530 !Note that the 3-center integrals are pre-contracted with the RI metric, and that the same tensor can be used
531 !(mu^0, sigma^a| P^0) (P^0|R^0) <===> (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c) by relabelling the images
532
533 !By default, build the density tensor based on the difference of this SCF P and that of the prev. SCF
534 pfac = -1.0_dp
535 IF (.NOT. use_delta_p) pfac = 0.0_dp
536 CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, pfac, ri_data, qs_env)
537
538 n_nze = 0
539 DO i_img = 1, nimg
540 DO i_spin = 1, nspins
541 CALL get_tensor_occupancy(ri_data%rho_ao_t(i_spin, i_img), nze, occ)
542 IF (nze > 0) THEN
543 n_nze = n_nze + 1
544 END IF
545 END DO
546 END DO
547 IF (n_nze == nspins) THEN
548 cpwarn("It is highly recommended to restart from a converged GGA K-point calculations.")
549 END IF
550
551 ALLOCATE (ks_t(nspins, nimg))
552 DO i_img = 1, nimg
553 DO i_spin = 1, nspins
554 CALL dbt_create(ri_data%ks_t(1, 1), ks_t(i_spin, i_img))
555 END DO
556 END DO
557
558 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
559 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
560
561 !First we calculate and store T^1_mu^0,lambda^a+c,P = P_mu^0,lambda^c * (mu_0 sigma^a | P^0) (P^0|R^0)^-1
562 !To avoid doing nimg**2 tiny contractions that do not scale well with a large number of CPUs,
563 !we instead do a single loop over the a+c image index. For each a+c, we get a list of allowed
564 !combination of a,c indices. Then we build TAS tensors P_mu^0,lambda^c with all concerned c's
565 !and (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 with all a's. Then we perform a single contraction with larger tensors,
566 !were the sum over a,c is automatically taken care of
567 ALLOCATE (t_3c_apc(nspins, nimg))
568 DO i_img = 1, nimg
569 DO i_spin = 1, nspins
570 CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
571 END DO
572 END DO
573 CALL contract_pmat_3c(t_3c_apc, ri_data%rho_ao_t, ri_data, qs_env)
574
575 IF (mod(para_env%num_pe, ngroups) .NE. 0) THEN
576 cpwarn("KP_NGROUPS must be an integer divisor of the total number of MPI ranks. It was set to 1.")
577 ngroups = 1
578 CALL section_vals_val_set(hfx_section, "KP_NGROUPS", i_val=ngroups)
579 END IF
580 IF ((mod(ngroups, natom) .NE. 0) .AND. (mod(natom, ngroups) .NE. 0) .AND. geometry_did_change) THEN
581 IF (ngroups > 1) THEN
582 cpwarn("Better load balancing is reached if NGROUPS is a multiple/divisor of the number of atoms")
583 END IF
584 END IF
585 group_size = para_env%num_pe/ngroups
586 igroup = para_env%mepos/group_size
587
588 ALLOCATE (para_env_sub)
589 CALL para_env_sub%from_split(para_env, igroup)
590 CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
591
592 ! The sparsity pattern of each iatom, jatom pair, on each b_img, and on which subgroup
593 ALLOCATE (sparsity_pattern(natom, natom, nimg))
594 CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
595 CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
596
597 !Get all the required tensors in the subgroups
598 ALLOCATE (mat_2c_pot(nimg), ks_t_sub(nspins, nimg), t_2c_ao_tmp(1), ks_t_split(2), t_2c_work(3))
599 CALL get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
600 group_size, ngroups, para_env, para_env_sub, ri_data)
601
602 ALLOCATE (t_3c_int(nimg), t_3c_apc_sub(nspins, nimg), t_3c_work_2(3), t_3c_work_3(3))
603 CALL get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
604 group_size, ngroups, para_env, para_env_sub, ri_data)
605
606 !We go atom by atom, therefore there is an automatic batching along that direction
607 !Also, because we stack the 3c tensors nimg times, we naturally do some batching there too
608 ALLOCATE (batch_ranges_at(natom + 1))
609 batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
610 iatom = 0
611 DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
612 IF (idx_to_at_ao(iblk) == iatom + 1) THEN
613 iatom = iatom + 1
614 batch_ranges_at(iatom) = iblk
615 END IF
616 END DO
617
618 n_batch_nze = nimg_nze/batch_size
619 IF (modulo(nimg_nze, batch_size) .NE. 0) n_batch_nze = n_batch_nze + 1
620 ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
621 DO i_batch = 1, n_batch_nze
622 batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
623 END DO
624 batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
625
626 print_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI%PRINT")
627 CALL section_vals_val_get(print_section, "KP_RI_PROGRESS_BAR", l_val=print_progress)
628 CALL section_vals_val_get(print_section, "KP_RI_MEMORY_ESTIMATE", l_val=estimate_mem)
629
630 ALLOCATE (iapc_pairs(nimg, 2))
631 IF (estimate_mem .AND. geometry_did_change) THEN
632 !Populate work tensors to simulate maximum usage
633 CALL get_iapc_pairs(iapc_pairs, 1, ri_data, qs_env)
634 CALL fill_3c_stack(t_3c_work_3(1), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
635 filter_at=1, filter_dim=2, idx_to_at=idx_to_at_ao, &
636 img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
637 CALL fill_3c_stack(t_3c_work_3(2), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
638 filter_at=1, filter_dim=2, idx_to_at=idx_to_at_ao, &
639 img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
640 CALL fill_3c_stack(t_3c_work_2(1), t_3c_apc_sub(1, :), iapc_pairs(:, 2), 3, &
641 ri_data, filter_at=1, filter_dim=1, idx_to_at=idx_to_at_ao, &
642 img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
643 CALL fill_3c_stack(t_3c_work_2(2), t_3c_apc_sub(1, :), iapc_pairs(:, 2), 3, &
644 ri_data, filter_at=1, filter_dim=1, idx_to_at=idx_to_at_ao, &
645 img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
646 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, 1, 1, 1, ri_data, qs_env, &
647 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
648 dbcsr_template=dbcsr_template)
649 CALL m_memory(mem)
650 CALL para_env%max(mem)
651 CALL dbt_clear(t_3c_work_2(1))
652 CALL dbt_clear(t_3c_work_2(2))
653 CALL dbt_clear(t_3c_work_3(1))
654 CALL dbt_clear(t_3c_work_3(2))
655 CALL dbt_clear(t_2c_work(1))
656
657 IF (ri_data%unit_nr > 0) THEN
658 WRITE (ri_data%unit_nr, fmt="(T3,A,I14)") &
659 "KP-HFX_RI_INFO| Estimated peak memory usage per MPI rank (MiB):", mem/(1024*1024)
660 CALL m_flush(ri_data%unit_nr)
661 END IF
662 END IF
663
664 CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
665 CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
666 CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
667 CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
668
669 iprint = 1
670 t1 = m_walltime()
671 ri_data%kp_cost(:, :, :) = 0.0_dp
672 DO b_img = 1, nimg
673 IF (print_progress) CALL print_progress_bar(b_img, nimg, iprint, ri_data)
674 CALL dbt_batched_contract_init(ks_t_split(1))
675 CALL dbt_batched_contract_init(ks_t_split(2))
676 DO jatom = 1, natom
677 DO iatom = 1, natom
678 IF (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup) cycle
679 pref = 1.0_dp
680 IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp
681
682 !measure the cost of the given i, j, b configuration
683 t3 = m_walltime()
684
685 !Get the proper HFX potential 2c integrals (R_i^0|S_j^b)
686 CALL timeset(routinen//"_2c", handle2)
687 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
688 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
689 dbcsr_template=dbcsr_template)
690 CALL dbt_copy(t_2c_work(1), t_2c_work(2), move_data=.true.) !move to split blocks
691 CALL dbt_filter(t_2c_work(2), ri_data%filter_eps)
692 CALL timestop(handle2)
693
694 CALL dbt_batched_contract_init(t_2c_work(2))
695 CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env)
696 CALL timeset(routinen//"_3c", handle2)
697
698 !Stack the (S^b|Q^b)^-1 * (Q^b| nu^b lambda^a+c) integrals over a+c and multiply by (R_i^0|S_j^b)
699 DO i_batch = 1, n_batch_nze
700 CALL fill_3c_stack(t_3c_work_3(3), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
701 filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_ao, &
702 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
703 CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1), move_data=.true.)
704
705 CALL dbt_contract(1.0_dp, t_2c_work(2), t_3c_work_3(1), &
706 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
707 contract_1=[2], notcontract_1=[1], &
708 contract_2=[1], notcontract_2=[2, 3], &
709 filter_eps=ri_data%filter_eps, flop=nflop)
710 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
711 CALL dbt_copy(t_3c_work_3(2), t_3c_work_2(2), order=[2, 1, 3], move_data=.true.)
712 CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1))
713
714 !Stack the P_sigma^a,lambda^a+c * (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 integrals over a+c and contract
715 !to get the final block of the KS matrix
716 DO i_spin = 1, nspins
717 CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
718 ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_ao, &
719 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
720 CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
721
722 IF (nze == 0) cycle
723 CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.true.)
724 CALL dbt_contract(-pref*fac, t_3c_work_2(1), t_3c_work_2(2), &
725 1.0_dp, ks_t_split(i_spin), map_1=[1], map_2=[2], &
726 contract_1=[2, 3], notcontract_1=[1], &
727 contract_2=[2, 3], notcontract_2=[1], &
728 filter_eps=ri_data%filter_eps, &
729 move_data=i_spin == nspins, flop=nflop)
730 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
731 END DO
732 END DO !i_batch
733 CALL timestop(handle2)
734 CALL dbt_batched_contract_finalize(t_2c_work(2))
735
736 t4 = m_walltime()
737 ri_data%kp_cost(iatom, jatom, b_img) = t4 - t3
738 END DO !iatom
739 END DO !jatom
740 CALL dbt_batched_contract_finalize(ks_t_split(1))
741 CALL dbt_batched_contract_finalize(ks_t_split(2))
742
743 DO i_spin = 1, nspins
744 CALL dbt_copy(ks_t_split(i_spin), t_2c_ao_tmp(1), move_data=.true.)
745 CALL dbt_copy(t_2c_ao_tmp(1), ks_t_sub(i_spin, b_img), summation=.true.)
746 END DO
747 END DO !b_img
748 CALL dbt_batched_contract_finalize(t_3c_work_3(1))
749 CALL dbt_batched_contract_finalize(t_3c_work_3(2))
750 CALL dbt_batched_contract_finalize(t_3c_work_2(1))
751 CALL dbt_batched_contract_finalize(t_3c_work_2(2))
752 CALL para_env%sync()
753 CALL para_env%sum(ri_data%dbcsr_nflop)
754 CALL para_env%sum(ri_data%kp_cost)
755 t2 = m_walltime()
756 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
757
758 !transfer KS tensor from subgroup to main group
759 CALL gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
760
761 !Keep the 3c integrals on the subgroups to avoid communication at next SCF step
762 DO i_img = 1, nimg
763 CALL dbt_copy(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img), move_data=.true.)
764 END DO
765
766 !clean-up subgroup tensors
767 CALL dbt_destroy(t_2c_ao_tmp(1))
768 CALL dbt_destroy(ks_t_split(1))
769 CALL dbt_destroy(ks_t_split(2))
770 CALL dbt_destroy(t_2c_work(1))
771 CALL dbt_destroy(t_2c_work(2))
772 CALL dbt_destroy(t_3c_work_2(1))
773 CALL dbt_destroy(t_3c_work_2(2))
774 CALL dbt_destroy(t_3c_work_2(3))
775 CALL dbt_destroy(t_3c_work_3(1))
776 CALL dbt_destroy(t_3c_work_3(2))
777 CALL dbt_destroy(t_3c_work_3(3))
778 DO i_img = 1, nimg
779 CALL dbt_destroy(t_3c_int(i_img))
780 CALL dbcsr_release(mat_2c_pot(i_img))
781 DO i_spin = 1, nspins
782 CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
783 CALL dbt_destroy(ks_t_sub(i_spin, i_img))
784 END DO
785 END DO
786 IF (ASSOCIATED(dbcsr_template)) THEN
787 CALL dbcsr_release(dbcsr_template)
788 DEALLOCATE (dbcsr_template)
789 END IF
790
791 !End of subgroup parallelization
792 CALL cp_blacs_env_release(blacs_env_sub)
793 CALL para_env_sub%free()
794 DEALLOCATE (para_env_sub)
795
796 !Currently, rho_ao_t holds the density difference (wrt to pref SCF step).
797 !ks_t also hold that diff, while only having half the blocks => need to add to prev ks_t and symmetrize
798 !We need the full thing for the energy, on the next SCF step
799 CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
800 DO i_spin = 1, nspins
801 DO b_img = 1, nimg
802 CALL dbt_copy(ks_t(i_spin, b_img), ri_data%ks_t(i_spin, b_img), summation=.true.)
803
804 !desymmetrize
805 mb_img = get_opp_index(b_img, qs_env)
806 IF (mb_img > 0 .AND. mb_img .LE. nimg) THEN
807 CALL dbt_copy(ks_t(i_spin, mb_img), ri_data%ks_t(i_spin, b_img), order=[2, 1], summation=.true.)
808 END IF
809 END DO
810 END DO
811 DO b_img = 1, nimg
812 DO i_spin = 1, nspins
813 CALL dbt_destroy(ks_t(i_spin, b_img))
814 END DO
815 END DO
816
817 !calculate the energy
818 CALL dbt_create(ri_data%ks_t(1, 1), t_2c_ao_tmp(1))
819 CALL dbcsr_create(tmp, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_symmetric)
820 CALL dbcsr_create(ks_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
821 CALL dbcsr_create(rho_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
822 ehfx = 0.0_dp
823 DO i_img = 1, nimg
824 DO i_spin = 1, nspins
825 CALL dbt_filter(ri_data%ks_t(i_spin, i_img), ri_data%filter_eps)
826 CALL dbt_copy(ri_data%ks_t(i_spin, i_img), t_2c_ao_tmp(1))
827 CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), ks_desymm)
828 CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), tmp)
829 CALL dbcsr_add(ks_matrix(i_spin, i_img)%matrix, tmp, 1.0_dp, 1.0_dp)
830
831 CALL dbt_copy(ri_data%rho_ao_t(i_spin, i_img), t_2c_ao_tmp(1))
832 CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), rho_desymm)
833
834 CALL dbcsr_dot(ks_desymm, rho_desymm, etmp)
835 ehfx = ehfx + 0.5_dp*etmp
836
837 IF (.NOT. use_delta_p) CALL dbt_clear(ri_data%ks_t(i_spin, i_img))
838 END DO
839 END DO
840 CALL dbcsr_release(rho_desymm)
841 CALL dbcsr_release(ks_desymm)
842 CALL dbcsr_release(tmp)
843 CALL dbt_destroy(t_2c_ao_tmp(1))
844
845 CALL timestop(handle)
846
847 END SUBROUTINE hfx_ri_update_ks_kp
848
849! **************************************************************************************************
850!> \brief Update the K-points RI-HFX forces
851!> \param qs_env ...
852!> \param ri_data ...
853!> \param nspins ...
854!> \param hf_fraction ...
855!> \param rho_ao ...
856!> \param use_virial ...
857!> \note Because this routine uses stored quantities calculated in the energy calculation, they should
858!> always be called by pairs, and with the same input densities
859! **************************************************************************************************
860 SUBROUTINE hfx_ri_update_forces_kp(qs_env, ri_data, nspins, hf_fraction, rho_ao, use_virial)
861
862 TYPE(qs_environment_type), POINTER :: qs_env
863 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
864 INTEGER, INTENT(IN) :: nspins
865 REAL(kind=dp), INTENT(IN) :: hf_fraction
866 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
867 LOGICAL, INTENT(IN), OPTIONAL :: use_virial
868
869 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_forces_kp'
870
871 INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_loop, i_spin, &
872 i_xyz, iatom, iblk, igroup, j_xyz, jatom, k_xyz, n_batch, natom, ngroups, nimg, nimg_nze
873 INTEGER(int_8) :: nflop, nze
874 INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_ranges_at, &
875 batch_ranges_nze, dist1, dist2, &
876 i_images, idx_to_at_ao, idx_to_at_ri, &
877 kind_of
878 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
879 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: force_pattern, sparsity_pattern
880 INTEGER, DIMENSION(2, 1) :: bounds_iat, bounds_jat
881 LOGICAL :: use_virial_prv
882 REAL(dp) :: fac, occ, pref, t1, t2
883 REAL(dp), DIMENSION(3, 3) :: work_virial
884 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
885 TYPE(cell_type), POINTER :: cell
886 TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
887 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
888 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_pot, mat_der_pot_sub
889 TYPE(dbcsr_type), POINTER :: dbcsr_template
890 TYPE(dbt_type) :: t_2c_r, t_2c_r_split
891 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_bint, t_2c_binv, t_2c_der_pot, &
892 t_2c_inv, t_2c_metric, t_2c_work, &
893 t_3c_der_stack, t_3c_work_2, &
894 t_3c_work_3
895 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
896 t_2c_der_metric_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_ao, t_3c_der_ao_sub, t_3c_der_ri, &
897 t_3c_der_ri_sub
898 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
899 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
900 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
901 TYPE(section_vals_type), POINTER :: hfx_section
902 TYPE(virial_type), POINTER :: virial
903
904 NULLIFY (para_env, para_env_sub, hfx_section, blacs_env_sub, dbcsr_template, force, atomic_kind_set, &
905 virial, particle_set, cell)
906
907 CALL timeset(routinen, handle)
908
909 use_virial_prv = .false.
910 IF (PRESENT(use_virial)) use_virial_prv = use_virial
911
912 IF (nspins == 1) THEN
913 fac = 0.5_dp*hf_fraction
914 ELSE
915 fac = 1.0_dp*hf_fraction
916 END IF
917
918 CALL get_qs_env(qs_env, natom=natom, para_env=para_env, force=force, cell=cell, virial=virial, &
919 atomic_kind_set=atomic_kind_set, particle_set=particle_set)
920 CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
921
922 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
923 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
924
925 ALLOCATE (idx_to_at_ri(SIZE(ri_data%bsizes_RI_split)))
926 CALL get_idx_to_atom(idx_to_at_ri, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
927
928 nimg = ri_data%nimg
929 ALLOCATE (t_3c_der_ri(nimg, 3), t_3c_der_ao(nimg, 3), mat_der_pot(nimg, 3), t_2c_der_metric(natom, 3))
930
931 !We assume that the integrals are available from the SCF
932 !pre-calculate the derivs. 3c tensors as (P^0| sigma^a mu^0), with t_3c_der_AO holding deriv wrt mu^0
933 CALL precalc_derivatives(t_3c_der_ri, t_3c_der_ao, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
934
935 !Calculate the density matrix at each image
936 ALLOCATE (rho_ao_t(nspins, nimg))
937 CALL create_2c_tensor(rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
938 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
939 name="(AO | AO)")
940 DEALLOCATE (dist1, dist2)
941 IF (nspins == 2) CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(2, 1))
942 DO i_img = 2, nimg
943 DO i_spin = 1, nspins
944 CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(i_spin, i_img))
945 END DO
946 END DO
947 CALL get_pmat_images(rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
948
949 !Contract integrals with the density matrix
950 ALLOCATE (t_3c_apc(nspins, nimg))
951 DO i_img = 1, nimg
952 DO i_spin = 1, nspins
953 CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
954 END DO
955 END DO
956 CALL contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
957
958 !Setup the subgroups
959 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
960 CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
961 group_size = para_env%num_pe/ngroups
962 igroup = para_env%mepos/group_size
963
964 ALLOCATE (para_env_sub)
965 CALL para_env_sub%from_split(para_env, igroup)
966 CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
967
968 !Get the ususal sparsity pattern
969 ALLOCATE (sparsity_pattern(natom, natom, nimg))
970 CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
971 CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
972
973 !Get the 2-center quantities in the subgroups (note: main group derivs are deleted wihtin)
974 ALLOCATE (t_2c_inv(natom), mat_2c_pot(nimg), rho_ao_t_sub(nspins, nimg), t_2c_work(5), &
975 t_2c_der_metric_sub(natom, 3), mat_der_pot_sub(nimg, 3), t_2c_bint(natom), &
976 t_2c_metric(natom), t_2c_binv(natom))
977 CALL get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
978 rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
979 mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
980 CALL dbt_create(t_2c_work(1), t_2c_r) !nRI x nRI
981 CALL dbt_create(t_2c_work(5), t_2c_r_split) !nRI x nRI with split blocks
982
983 ALLOCATE (t_2c_der_pot(3))
984 DO i_xyz = 1, 3
985 CALL dbt_create(t_2c_r, t_2c_der_pot(i_xyz))
986 END DO
987
988 !Get the 3-center quantities in the subgroups. The integrals and t_3c_apc already there
989 ALLOCATE (t_3c_work_2(3), t_3c_work_3(4), t_3c_der_stack(6), t_3c_der_ao_sub(nimg, 3), &
990 t_3c_der_ri_sub(nimg, 3), t_3c_apc_sub(nspins, nimg))
991 CALL get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_ao, t_3c_der_ao_sub, &
992 t_3c_der_ri, t_3c_der_ri_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_stack, &
993 group_size, ngroups, para_env, para_env_sub, ri_data)
994
995 !Set up batched contraction (go atom by atom)
996 ALLOCATE (batch_ranges_at(natom + 1))
997 batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
998 iatom = 0
999 DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
1000 IF (idx_to_at_ao(iblk) == iatom + 1) THEN
1001 iatom = iatom + 1
1002 batch_ranges_at(iatom) = iblk
1003 END IF
1004 END DO
1005
1006 CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
1007 CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
1008 CALL dbt_batched_contract_init(t_3c_work_3(3), batch_range_2=batch_ranges_at)
1009 CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
1010 CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
1011
1012 !Preparing for the stacking of 3c tensors
1013 nimg_nze = ri_data%nimg_nze
1014 batch_size = ri_data%kp_stack_size
1015 n_batch = nimg_nze/batch_size
1016 IF (modulo(nimg_nze, batch_size) .NE. 0) n_batch = n_batch + 1
1017 ALLOCATE (batch_ranges_nze(n_batch + 1))
1018 DO i_batch = 1, n_batch
1019 batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
1020 END DO
1021 batch_ranges_nze(n_batch + 1) = nimg_nze + 1
1022
1023 !Applying the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 from left and right
1024 !And keep the bump on LHS only version as well, with B*M^-1 = (M^-1*B)^T
1025 DO iatom = 1, natom
1026 CALL dbt_create(t_2c_inv(iatom), t_2c_binv(iatom))
1027 CALL dbt_copy(t_2c_inv(iatom), t_2c_binv(iatom))
1028 CALL apply_bump(t_2c_binv(iatom), iatom, ri_data, qs_env, from_left=.true., from_right=.false.)
1029 CALL apply_bump(t_2c_inv(iatom), iatom, ri_data, qs_env, from_left=.true., from_right=.true.)
1030 END DO
1031
1032 t1 = m_walltime()
1033 work_virial = 0.0_dp
1034 ALLOCATE (iapc_pairs(nimg, 2), i_images(nimg))
1035 ALLOCATE (force_pattern(natom, natom, nimg))
1036 force_pattern(:, :, :) = -1
1037 !We proceed with 2 loops: one over the sparsity pattern from the SCF, one over the rest
1038 !We use the SCF cost model for the first loop, while we calculate the cost of the upcoming loop
1039 DO i_loop = 1, 2
1040 DO b_img = 1, nimg
1041 DO jatom = 1, natom
1042 DO iatom = 1, natom
1043
1044 pref = -0.5_dp*fac
1045 IF (i_loop == 1 .AND. (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup)) cycle
1046 IF (i_loop == 2 .AND. (.NOT. force_pattern(iatom, jatom, b_img) == igroup)) cycle
1047
1048 !Get the proper HFX potential 2c integrals (R_i^0|S_j^b), times (S_j^b|Q_j^b)^-1
1049 CALL timeset(routinen//"_2c_1", handle2)
1050 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
1051 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
1052 dbcsr_template=dbcsr_template)
1053 CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_inv(jatom), &
1054 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1055 contract_1=[2], notcontract_1=[1], &
1056 contract_2=[1], notcontract_2=[2], &
1057 filter_eps=ri_data%filter_eps, flop=nflop)
1058 CALL dbt_copy(t_2c_work(2), t_2c_work(5), move_data=.true.) !move to split blocks
1059 CALL dbt_filter(t_2c_work(5), ri_data%filter_eps)
1060 CALL timestop(handle2)
1061
1062 CALL timeset(routinen//"_3c", handle2)
1063 bounds_iat(:, 1) = [sum(ri_data%bsizes_AO(1:iatom - 1)) + 1, sum(ri_data%bsizes_AO(1:iatom))]
1064 bounds_jat(:, 1) = [sum(ri_data%bsizes_AO(1:jatom - 1)) + 1, sum(ri_data%bsizes_AO(1:jatom))]
1065 CALL dbt_clear(t_2c_r_split)
1066
1067 DO i_spin = 1, nspins
1068 CALL dbt_batched_contract_init(rho_ao_t_sub(i_spin, b_img))
1069 END DO
1070
1071 CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env, i_images) !i = a+c-b
1072 DO i_batch = 1, n_batch
1073
1074 !Stack the 3c derivatives to take the trace later on
1075 DO i_xyz = 1, 3
1076 CALL dbt_clear(t_3c_der_stack(i_xyz))
1077 CALL fill_3c_stack(t_3c_der_stack(i_xyz), t_3c_der_ri_sub(:, i_xyz), &
1078 iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
1079 filter_dim=2, idx_to_at=idx_to_at_ao, &
1080 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1081
1082 CALL dbt_clear(t_3c_der_stack(3 + i_xyz))
1083 CALL fill_3c_stack(t_3c_der_stack(3 + i_xyz), t_3c_der_ao_sub(:, i_xyz), &
1084 iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
1085 filter_dim=2, idx_to_at=idx_to_at_ao, &
1086 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1087 END DO
1088
1089 DO i_spin = 1, nspins
1090 !stack the t_3c_apc tensors
1091 CALL dbt_clear(t_3c_work_2(3))
1092 CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
1093 ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_ao, &
1094 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1095 CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
1096 IF (nze == 0) cycle
1097 CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.true.)
1098
1099 !Contract with the second density matrix: P_mu^0,nu^b * t_3c_apc,
1100 !where t_3c_apc = P_sigma^a,lambda^a+c (mu^0 P^0 sigma^a) *(P^0|R^0)^-1 (stacked along a+c)
1101 CALL dbt_contract(1.0_dp, rho_ao_t_sub(i_spin, b_img), t_3c_work_2(1), &
1102 0.0_dp, t_3c_work_2(2), map_1=[1], map_2=[2, 3], &
1103 contract_1=[1], notcontract_1=[2], &
1104 contract_2=[1], notcontract_2=[2, 3], &
1105 bounds_1=bounds_iat, bounds_2=bounds_jat, &
1106 filter_eps=ri_data%filter_eps, flop=nflop)
1107 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1108
1109 CALL get_tensor_occupancy(t_3c_work_2(2), nze, occ)
1110 IF (nze == 0) cycle
1111
1112 !Contract with V_PQ so that we can take the trace with (Q^b|nu^b lmabda^a+c)^(x)
1113 CALL dbt_copy(t_3c_work_2(2), t_3c_work_3(1), order=[2, 1, 3], move_data=.true.)
1114 CALL dbt_batched_contract_init(t_2c_work(5))
1115 CALL dbt_contract(1.0_dp, t_2c_work(5), t_3c_work_3(1), &
1116 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
1117 contract_1=[1], notcontract_1=[2], &
1118 contract_2=[1], notcontract_2=[2, 3], &
1119 filter_eps=ri_data%filter_eps, flop=nflop)
1120 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1121 CALL dbt_batched_contract_finalize(t_2c_work(5))
1122
1123 !Contract with the 3c derivatives to get the force/virial
1124 CALL dbt_copy(t_3c_work_3(2), t_3c_work_3(4), move_data=.true.)
1125 IF (use_virial_prv) THEN
1126 CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1127 t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1128 idx_to_at_ri, idx_to_at_ao, i_images, &
1129 batch_ranges_nze(i_batch), 2.0_dp*pref, &
1130 ri_data, qs_env, work_virial, cell, particle_set)
1131 ELSE
1132 CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1133 t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1134 idx_to_at_ri, idx_to_at_ao, i_images, &
1135 batch_ranges_nze(i_batch), 2.0_dp*pref, &
1136 ri_data, qs_env)
1137 END IF
1138 CALL dbt_clear(t_3c_work_3(4))
1139
1140 !Contract with the 3-center integrals in order to have a matrix R_PQ such that
1141 !we can take the trace sum_PQ R_PQ (P^0|Q^b)^(x)
1142 IF (i_loop == 2) cycle
1143
1144 !Stack the 3c integrals
1145 CALL fill_3c_stack(t_3c_work_3(4), ri_data%kp_t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
1146 filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_ao, &
1147 img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1148 CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(3), move_data=.true.)
1149
1150 CALL dbt_batched_contract_init(t_2c_r_split)
1151 CALL dbt_contract(1.0_dp, t_3c_work_3(1), t_3c_work_3(3), &
1152 1.0_dp, t_2c_r_split, map_1=[1], map_2=[2], &
1153 contract_1=[2, 3], notcontract_1=[1], &
1154 contract_2=[2, 3], notcontract_2=[1], &
1155 filter_eps=ri_data%filter_eps, flop=nflop)
1156 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1157 CALL dbt_batched_contract_finalize(t_2c_r_split)
1158 CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(1))
1159 END DO
1160 END DO
1161 DO i_spin = 1, nspins
1162 CALL dbt_batched_contract_finalize(rho_ao_t_sub(i_spin, b_img))
1163 END DO
1164 CALL timestop(handle2)
1165
1166 IF (i_loop == 2) cycle
1167 pref = 2.0_dp*pref
1168 IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp*pref
1169
1170 CALL timeset(routinen//"_2c_2", handle2)
1171 !Note that the derivatives are in atomic block format (not split)
1172 CALL dbt_copy(t_2c_r_split, t_2c_r, move_data=.true.)
1173
1174 CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
1175 blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
1176 dbcsr_template=dbcsr_template)
1177
1178 !We have to calculate: S^-1(iat) * R_PQ * S^-1(jat) to trace with HFX pot der
1179 ! + R_PQ * S^-1(jat) * pot^T to trace with S^(x) (iat)
1180 ! + pot^T * S^-1(iat) *R_PQ to trace with S^(x) (jat)
1181
1182 !Because 3c tensors are all precontracted with the inverse RI metric,
1183 !t_2c_R is currently implicitely multiplied by S^-1(iat) from the left
1184 !and S^-1(jat) from the right, directly in the proper format for the trace
1185 !with the HFX potential derivative
1186
1187 !Trace with HFX pot deriv, that we need to build first
1188 DO i_xyz = 1, 3
1189 CALL get_ext_2c_int(t_2c_der_pot(i_xyz), mat_der_pot_sub(:, i_xyz), iatom, jatom, &
1190 b_img, ri_data, qs_env, blacs_env_ext=blacs_env_sub, &
1191 para_env_ext=para_env_sub, dbcsr_template=dbcsr_template)
1192 END DO
1193
1194 IF (use_virial_prv) THEN
1195 CALL get_2c_der_force(force, t_2c_r, t_2c_der_pot, atom_of_kind, kind_of, &
1196 b_img, pref, ri_data, qs_env, work_virial, cell, particle_set)
1197 ELSE
1198 CALL get_2c_der_force(force, t_2c_r, t_2c_der_pot, atom_of_kind, kind_of, &
1199 b_img, pref, ri_data, qs_env)
1200 END IF
1201
1202 DO i_xyz = 1, 3
1203 CALL dbt_clear(t_2c_der_pot(i_xyz))
1204 END DO
1205
1206 !R_PQ * S^-1(jat) * pot^T (=A)
1207 CALL dbt_contract(1.0_dp, t_2c_metric(iatom), t_2c_r, & !get rid of implicit S^-1(iat)
1208 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1209 contract_1=[2], notcontract_1=[1], &
1210 contract_2=[1], notcontract_2=[2], &
1211 filter_eps=ri_data%filter_eps, flop=nflop)
1212 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1213 CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_work(1), &
1214 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1215 contract_1=[2], notcontract_1=[1], &
1216 contract_2=[2], notcontract_2=[1], &
1217 filter_eps=ri_data%filter_eps, flop=nflop)
1218 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1219
1220 !With the RI bump function, things get more complex. M = (S|P)_D + B*(S|P)_OD*B
1221 !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1222 CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(iatom), &
1223 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1224 contract_1=[2], notcontract_1=[1], &
1225 contract_2=[1], notcontract_2=[2], &
1226 filter_eps=ri_data%filter_eps, flop=nflop)
1227 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1228
1229 CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1230 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1231 contract_1=[1], notcontract_1=[2], &
1232 contract_2=[1], notcontract_2=[2], &
1233 filter_eps=ri_data%filter_eps, flop=nflop)
1234 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1235
1236 CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.true.)
1237 CALL get_2c_bump_forces(force, t_2c_work(4), iatom, atom_of_kind, kind_of, pref, &
1238 ri_data, qs_env, work_virial)
1239
1240 !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1241 CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(2), &
1242 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1243 contract_1=[1], notcontract_1=[2], &
1244 contract_2=[1], notcontract_2=[2], &
1245 filter_eps=ri_data%filter_eps, flop=nflop)
1246 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1247
1248 IF (use_virial_prv) THEN
1249 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1250 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1251 diag=.true., offdiag=.false.)
1252 ELSE
1253 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1254 kind_of, 1, -pref, ri_data, qs_env, diag=.true., offdiag=.false.)
1255 END IF
1256
1257 !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1258 CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1259 CALL apply_bump(t_2c_work(2), iatom, ri_data, qs_env, from_left=.true., from_right=.true.)
1260
1261 IF (use_virial_prv) THEN
1262 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1263 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1264 diag=.false., offdiag=.true.)
1265 ELSE
1266 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1267 kind_of, 1, -pref, ri_data, qs_env, diag=.false., offdiag=.true.)
1268 END IF
1269
1270 !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1271 !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1272 CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(iatom), &
1273 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1274 contract_1=[2], notcontract_1=[1], &
1275 contract_2=[1], notcontract_2=[2], &
1276 filter_eps=ri_data%filter_eps, flop=nflop)
1277 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1278
1279 CALL dbt_contract(1.0_dp, t_2c_bint(iatom), t_2c_work(4), &
1280 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1281 contract_1=[1], notcontract_1=[2], &
1282 contract_2=[1], notcontract_2=[2], &
1283 filter_eps=ri_data%filter_eps, flop=nflop)
1284 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1285
1286 CALL get_2c_bump_forces(force, t_2c_work(2), iatom, atom_of_kind, kind_of, -pref, &
1287 ri_data, qs_env, work_virial)
1288
1289 ! pot^T * S^-1(iat) * R_PQ (=A)
1290 CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_r, &
1291 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1292 contract_1=[1], notcontract_1=[2], &
1293 contract_2=[1], notcontract_2=[2], &
1294 filter_eps=ri_data%filter_eps, flop=nflop)
1295 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1296
1297 CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_metric(jatom), & !get rid of implicit S^-1(jat)
1298 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1299 contract_1=[2], notcontract_1=[1], &
1300 contract_2=[1], notcontract_2=[2], &
1301 filter_eps=ri_data%filter_eps, flop=nflop)
1302 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1303
1304 !Do the same shenanigans with the S^(x) (jatom)
1305 !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1306 CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(jatom), &
1307 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1308 contract_1=[2], notcontract_1=[1], &
1309 contract_2=[1], notcontract_2=[2], &
1310 filter_eps=ri_data%filter_eps, flop=nflop)
1311 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1312
1313 CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1314 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1315 contract_1=[1], notcontract_1=[2], &
1316 contract_2=[1], notcontract_2=[2], &
1317 filter_eps=ri_data%filter_eps, flop=nflop)
1318 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1319
1320 CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.true.)
1321 CALL get_2c_bump_forces(force, t_2c_work(4), jatom, atom_of_kind, kind_of, pref, &
1322 ri_data, qs_env, work_virial)
1323
1324 !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1325 CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(2), &
1326 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1327 contract_1=[1], notcontract_1=[2], &
1328 contract_2=[1], notcontract_2=[2], &
1329 filter_eps=ri_data%filter_eps, flop=nflop)
1330 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1331
1332 IF (use_virial_prv) THEN
1333 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1334 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1335 diag=.true., offdiag=.false.)
1336 ELSE
1337 CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1338 kind_of, 1, -pref, ri_data, qs_env, diag=.true., offdiag=.false.)
1339 END IF
1340
1341 !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1342 CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1343 CALL apply_bump(t_2c_work(2), jatom, ri_data, qs_env, from_left=.true., from_right=.true.)
1344
1345 IF (use_virial_prv) THEN
1346 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1347 kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1348 diag=.false., offdiag=.true.)
1349 ELSE
1350 CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1351 kind_of, 1, -pref, ri_data, qs_env, diag=.false., offdiag=.true.)
1352 END IF
1353
1354 !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1355 !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1356 CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(jatom), &
1357 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1358 contract_1=[2], notcontract_1=[1], &
1359 contract_2=[1], notcontract_2=[2], &
1360 filter_eps=ri_data%filter_eps, flop=nflop)
1361 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1362
1363 CALL dbt_contract(1.0_dp, t_2c_bint(jatom), t_2c_work(4), &
1364 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1365 contract_1=[1], notcontract_1=[2], &
1366 contract_2=[1], notcontract_2=[2], &
1367 filter_eps=ri_data%filter_eps, flop=nflop)
1368 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1369
1370 CALL get_2c_bump_forces(force, t_2c_work(2), jatom, atom_of_kind, kind_of, -pref, &
1371 ri_data, qs_env, work_virial)
1372
1373 CALL timestop(handle2)
1374 END DO !iatom
1375 END DO !jatom
1376 END DO !b_img
1377
1378 IF (i_loop == 1) THEN
1379 CALL update_pattern_to_forces(force_pattern, sparsity_pattern, ngroups, ri_data, qs_env)
1380 END IF
1381 END DO !i_loop
1382
1383 CALL dbt_batched_contract_finalize(t_3c_work_3(1))
1384 CALL dbt_batched_contract_finalize(t_3c_work_3(2))
1385 CALL dbt_batched_contract_finalize(t_3c_work_3(3))
1386 CALL dbt_batched_contract_finalize(t_3c_work_2(1))
1387 CALL dbt_batched_contract_finalize(t_3c_work_2(2))
1388
1389 IF (use_virial_prv) THEN
1390 DO k_xyz = 1, 3
1391 DO j_xyz = 1, 3
1392 DO i_xyz = 1, 3
1393 virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
1394 + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
1395 END DO
1396 END DO
1397 END DO
1398 END IF
1399
1400 !End of subgroup parallelization
1401 CALL cp_blacs_env_release(blacs_env_sub)
1402 CALL para_env_sub%free()
1403 DEALLOCATE (para_env_sub)
1404
1405 CALL para_env%sync()
1406 t2 = m_walltime()
1407 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1408
1409 !clean-up
1410 IF (ASSOCIATED(dbcsr_template)) THEN
1411 CALL dbcsr_release(dbcsr_template)
1412 DEALLOCATE (dbcsr_template)
1413 END IF
1414 CALL dbt_destroy(t_2c_r)
1415 CALL dbt_destroy(t_2c_r_split)
1416 CALL dbt_destroy(t_2c_work(1))
1417 CALL dbt_destroy(t_2c_work(2))
1418 CALL dbt_destroy(t_2c_work(3))
1419 CALL dbt_destroy(t_2c_work(4))
1420 CALL dbt_destroy(t_2c_work(5))
1421 CALL dbt_destroy(t_3c_work_2(1))
1422 CALL dbt_destroy(t_3c_work_2(2))
1423 CALL dbt_destroy(t_3c_work_2(3))
1424 CALL dbt_destroy(t_3c_work_3(1))
1425 CALL dbt_destroy(t_3c_work_3(2))
1426 CALL dbt_destroy(t_3c_work_3(3))
1427 CALL dbt_destroy(t_3c_work_3(4))
1428 CALL dbt_destroy(t_3c_der_stack(1))
1429 CALL dbt_destroy(t_3c_der_stack(2))
1430 CALL dbt_destroy(t_3c_der_stack(3))
1431 CALL dbt_destroy(t_3c_der_stack(4))
1432 CALL dbt_destroy(t_3c_der_stack(5))
1433 CALL dbt_destroy(t_3c_der_stack(6))
1434 DO i_xyz = 1, 3
1435 CALL dbt_destroy(t_2c_der_pot(i_xyz))
1436 END DO
1437 DO iatom = 1, natom
1438 CALL dbt_destroy(t_2c_inv(iatom))
1439 CALL dbt_destroy(t_2c_binv(iatom))
1440 CALL dbt_destroy(t_2c_bint(iatom))
1441 CALL dbt_destroy(t_2c_metric(iatom))
1442 DO i_xyz = 1, 3
1443 CALL dbt_destroy(t_2c_der_metric_sub(iatom, i_xyz))
1444 END DO
1445 END DO
1446 DO i_img = 1, nimg
1447 CALL dbcsr_release(mat_2c_pot(i_img))
1448 DO i_spin = 1, nspins
1449 CALL dbt_destroy(rho_ao_t_sub(i_spin, i_img))
1450 CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
1451 END DO
1452 END DO
1453 DO i_xyz = 1, 3
1454 DO i_img = 1, nimg
1455 CALL dbt_destroy(t_3c_der_ri_sub(i_img, i_xyz))
1456 CALL dbt_destroy(t_3c_der_ao_sub(i_img, i_xyz))
1457 CALL dbcsr_release(mat_der_pot_sub(i_img, i_xyz))
1458 END DO
1459 END DO
1460
1461 CALL timestop(handle)
1462
1463 END SUBROUTINE hfx_ri_update_forces_kp
1464
1465! **************************************************************************************************
1466!> \brief A routine the applies the RI bump matrix from the left and/or the right, given an input
1467!> matrix and the central RI atom. We assume atomic block sizes
1468!> \param t_2c_inout ...
1469!> \param atom_i ...
1470!> \param ri_data ...
1471!> \param qs_env ...
1472!> \param from_left ...
1473!> \param from_right ...
1474!> \param debump ...
1475! **************************************************************************************************
1476 SUBROUTINE apply_bump(t_2c_inout, atom_i, ri_data, qs_env, from_left, from_right, debump)
1477 TYPE(dbt_type), INTENT(INOUT) :: t_2c_inout
1478 INTEGER, INTENT(IN) :: atom_i
1479 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1480 TYPE(qs_environment_type), POINTER :: qs_env
1481 LOGICAL, INTENT(IN), OPTIONAL :: from_left, from_right, debump
1482
1483 INTEGER :: i_img, i_ri, iatom, ind(2), j_img, j_ri, &
1484 jatom, natom, nblks(2), nimg, nkind
1485 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1486 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1487 LOGICAL :: found, my_debump, my_left, my_right
1488 REAL(dp) :: bval, r0, r1, ri(3), rj(3), rref(3), &
1489 scoord(3)
1490 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1491 TYPE(cell_type), POINTER :: cell
1492 TYPE(dbt_iterator_type) :: iter
1493 TYPE(kpoint_type), POINTER :: kpoints
1494 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1495 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1496
1497 NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1498
1499 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1500 kpoints=kpoints, particle_set=particle_set)
1501 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1502
1503 my_debump = .false.
1504 IF (PRESENT(debump)) my_debump = debump
1505
1506 my_left = .false.
1507 IF (PRESENT(from_left)) my_left = from_left
1508
1509 my_right = .false.
1510 IF (PRESENT(from_right)) my_right = from_right
1511 cpassert(my_left .OR. my_right)
1512
1513 CALL dbt_get_info(t_2c_inout, nblks_total=nblks)
1514 cpassert(nblks(1) == ri_data%ncell_RI*natom)
1515 cpassert(nblks(2) == ri_data%ncell_RI*natom)
1516
1517 nimg = ri_data%nimg
1518
1519 !Loop over the RI cells and atoms, and apply bump accordingly
1520 r1 = ri_data%kp_RI_range
1521 r0 = ri_data%kp_bump_rad
1522 rref = pbc(particle_set(atom_i)%r, cell)
1523
1524!$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_inout,natom,ri_data,cell,particle_set,index_to_cell,my_left, &
1525!$OMP my_right,r0,r1,rref,my_debump) &
1526!$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,bval)
1527 CALL dbt_iterator_start(iter, t_2c_inout)
1528 DO WHILE (dbt_iterator_blocks_left(iter))
1529 CALL dbt_iterator_next_block(iter, ind)
1530 CALL dbt_get_block(t_2c_inout, ind, blk, found)
1531 IF (.NOT. found) cycle
1532
1533 i_ri = (ind(1) - 1)/natom + 1
1534 i_img = ri_data%RI_cell_to_img(i_ri)
1535 iatom = ind(1) - (i_ri - 1)*natom
1536
1537 CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
1538 CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
1539
1540 j_ri = (ind(2) - 1)/natom + 1
1541 j_img = ri_data%RI_cell_to_img(j_ri)
1542 jatom = ind(2) - (j_ri - 1)*natom
1543
1544 CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1545 CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1546
1547 IF (.NOT. my_debump) THEN
1548 IF (my_left) blk(:, :) = blk(:, :)*bump(norm2(ri - rref), r0, r1)
1549 IF (my_right) blk(:, :) = blk(:, :)*bump(norm2(rj - rref), r0, r1)
1550 ELSE
1551 !Note: by construction, the bump function is never quite zero, as its range is the same
1552 ! as that of the extended RI basis (but we are safe)
1553 bval = bump(norm2(ri - rref), r0, r1)
1554 IF (my_left .AND. bval > epsilon(1.0_dp)) blk(:, :) = blk(:, :)/bval
1555 bval = bump(norm2(rj - rref), r0, r1)
1556 IF (my_right .AND. bval > epsilon(1.0_dp)) blk(:, :) = blk(:, :)/bval
1557 END IF
1558
1559 CALL dbt_put_block(t_2c_inout, ind, shape(blk), blk)
1560
1561 DEALLOCATE (blk)
1562 END DO
1563 CALL dbt_iterator_stop(iter)
1564!$OMP END PARALLEL
1565 CALL dbt_filter(t_2c_inout, ri_data%filter_eps)
1566
1567 END SUBROUTINE apply_bump
1568
1569! **************************************************************************************************
1570!> \brief A routine that calculates the forces due to the derivative of the bump function
1571!> \param force ...
1572!> \param t_2c_in ...
1573!> \param atom_i ...
1574!> \param atom_of_kind ...
1575!> \param kind_of ...
1576!> \param pref ...
1577!> \param ri_data ...
1578!> \param qs_env ...
1579!> \param work_virial ...
1580! **************************************************************************************************
1581 SUBROUTINE get_2c_bump_forces(force, t_2c_in, atom_i, atom_of_kind, kind_of, pref, ri_data, &
1582 qs_env, work_virial)
1583 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1584 TYPE(dbt_type), INTENT(INOUT) :: t_2c_in
1585 INTEGER, INTENT(IN) :: atom_i
1586 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
1587 REAL(dp), INTENT(IN) :: pref
1588 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1589 TYPE(qs_environment_type), POINTER :: qs_env
1590 REAL(dp), DIMENSION(3, 3), INTENT(INOUT) :: work_virial
1591
1592 INTEGER :: i, i_img, i_ri, i_xyz, iat_of_kind, iatom, ikind, ind(2), j_img, j_ri, j_xyz, &
1593 jat_of_kind, jatom, jkind, natom, nblks(2), nimg, nkind
1594 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1595 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1596 LOGICAL :: found
1597 REAL(dp) :: new_force, r0, r1, ri(3), rj(3), &
1598 rref(3), scoord(3), x
1599 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1600 TYPE(cell_type), POINTER :: cell
1601 TYPE(dbt_iterator_type) :: iter
1602 TYPE(kpoint_type), POINTER :: kpoints
1603 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1604 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1605
1606 NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1607
1608 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1609 kpoints=kpoints, particle_set=particle_set)
1610 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1611
1612 CALL dbt_get_info(t_2c_in, nblks_total=nblks)
1613 cpassert(nblks(1) == ri_data%ncell_RI*natom)
1614 cpassert(nblks(2) == ri_data%ncell_RI*natom)
1615
1616 nimg = ri_data%nimg
1617
1618 !Loop over the RI cells and atoms, and apply bump accordingly
1619 r1 = ri_data%kp_RI_range
1620 r0 = ri_data%kp_bump_rad
1621 rref = pbc(particle_set(atom_i)%r, cell)
1622
1623 iat_of_kind = atom_of_kind(atom_i)
1624 ikind = kind_of(atom_i)
1625
1626!$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_in,natom,ri_data,cell,particle_set,index_to_cell,pref, &
1627!$OMP force,r0,r1,rref,atom_of_kind,kind_of,iat_of_kind,ikind,work_virial) &
1628!$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,jkind,jat_of_kind, &
1629!$OMP new_force,i_xyz,i,x,j_xyz)
1630 CALL dbt_iterator_start(iter, t_2c_in)
1631 DO WHILE (dbt_iterator_blocks_left(iter))
1632 CALL dbt_iterator_next_block(iter, ind)
1633 IF (ind(1) .NE. ind(2)) cycle !bump matrix is diagonal
1634
1635 CALL dbt_get_block(t_2c_in, ind, blk, found)
1636 IF (.NOT. found) cycle
1637
1638 !bump is a function of x = SQRT((R - Rref)^2). We refer to R as jatom, and Rref as atom_i
1639 j_ri = (ind(2) - 1)/natom + 1
1640 j_img = ri_data%RI_cell_to_img(j_ri)
1641 jatom = ind(2) - (j_ri - 1)*natom
1642 jat_of_kind = atom_of_kind(jatom)
1643 jkind = kind_of(jatom)
1644
1645 CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1646 CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1647 x = norm2(rj - rref)
1648 IF (x < r0 .OR. x > r1) cycle
1649
1650 new_force = 0.0_dp
1651 DO i = 1, SIZE(blk, 1)
1652 new_force = new_force + blk(i, i)
1653 END DO
1654 new_force = pref*new_force*dbump(x, r0, r1)
1655
1656 !x = SQRT((R - Rref)^2), so we multiply by dx/dR and dx/dRref
1657 DO i_xyz = 1, 3
1658 !Force acting on second atom
1659!$OMP ATOMIC
1660 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) + &
1661 new_force*(rj(i_xyz) - rref(i_xyz))/x
1662
1663 !virial acting on second atom
1664 CALL real_to_scaled(scoord, rj, cell)
1665 DO j_xyz = 1, 3
1666!$OMP ATOMIC
1667 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1668 + new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1669 END DO
1670
1671 !Force acting on reference atom, defining the RI basis
1672!$OMP ATOMIC
1673 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) - &
1674 new_force*(rj(i_xyz) - rref(i_xyz))/x
1675
1676 !virial of ref atom
1677 CALL real_to_scaled(scoord, rref, cell)
1678 DO j_xyz = 1, 3
1679!$OMP ATOMIC
1680 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1681 - new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1682 END DO
1683 END DO !i_xyz
1684
1685 DEALLOCATE (blk)
1686 END DO
1687 CALL dbt_iterator_stop(iter)
1688!$OMP END PARALLEL
1689
1690 END SUBROUTINE get_2c_bump_forces
1691
1692! **************************************************************************************************
1693!> \brief The bumb function as defined by Juerg
1694!> \param x ...
1695!> \param r0 ...
1696!> \param r1 ...
1697!> \return ...
1698! **************************************************************************************************
1699 FUNCTION bump(x, r0, r1) RESULT(b)
1700 REAL(dp), INTENT(IN) :: x, r0, r1
1701 REAL(dp) :: b
1702
1703 REAL(dp) :: r
1704
1705 !Head-Gordon
1706 !b = 1.0_dp/(1.0_dp+EXP((r1-r0)/(r1-x)-(r1-r0)/(x-r0)))
1707 !Juerg
1708 r = (x - r0)/(r1 - r0)
1709 b = -6.0_dp*r**5 + 15.0_dp*r**4 - 10.0_dp*r**3 + 1.0_dp
1710 IF (x .GE. r1) b = 0.0_dp
1711 IF (x .LE. r0) b = 1.0_dp
1712
1713 END FUNCTION bump
1714
1715! **************************************************************************************************
1716!> \brief The derivative of the bump function
1717!> \param x ...
1718!> \param r0 ...
1719!> \param r1 ...
1720!> \return ...
1721! **************************************************************************************************
1722 FUNCTION dbump(x, r0, r1) RESULT(b)
1723 REAL(dp), INTENT(IN) :: x, r0, r1
1724 REAL(dp) :: b
1725
1726 REAL(dp) :: r
1727
1728 r = (x - r0)/(r1 - r0)
1729 b = (-30.0_dp*r**4 + 60.0_dp*r**3 - 30.0_dp*r**2)/(r1 - r0)
1730 IF (x .GE. r1) b = 0.0_dp
1731 IF (x .LE. r0) b = 0.0_dp
1732
1733 END FUNCTION dbump
1734
1735! **************************************************************************************************
1736!> \brief return the cell index a+c corresponding to given cell index i and b, with i = a+c-b
1737!> \param i_index ...
1738!> \param b_index ...
1739!> \param qs_env ...
1740!> \return ...
1741! **************************************************************************************************
1742 FUNCTION get_apc_index_from_ib(i_index, b_index, qs_env) RESULT(apc_index)
1743 INTEGER, INTENT(IN) :: i_index, b_index
1744 TYPE(qs_environment_type), POINTER :: qs_env
1745 INTEGER :: apc_index
1746
1747 INTEGER, DIMENSION(3) :: cell_apc
1748 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1749 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1750 TYPE(kpoint_type), POINTER :: kpoints
1751
1752 CALL get_qs_env(qs_env, kpoints=kpoints)
1753 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1754
1755 !i = a+c-b => a+c = i+b
1756 cell_apc(:) = index_to_cell(:, i_index) + index_to_cell(:, b_index)
1757
1758 IF (any([cell_apc(1), cell_apc(2), cell_apc(3)] < lbound(cell_to_index)) .OR. &
1759 any([cell_apc(1), cell_apc(2), cell_apc(3)] > ubound(cell_to_index))) THEN
1760
1761 apc_index = 0
1762 ELSE
1763 apc_index = cell_to_index(cell_apc(1), cell_apc(2), cell_apc(3))
1764 END IF
1765
1766 END FUNCTION get_apc_index_from_ib
1767
1768! **************************************************************************************************
1769!> \brief return the cell index i corresponding to the summ of cell_a and cell_c
1770!> \param a_index ...
1771!> \param c_index ...
1772!> \param qs_env ...
1773!> \return ...
1774! **************************************************************************************************
1775 FUNCTION get_apc_index(a_index, c_index, qs_env) RESULT(i_index)
1776 INTEGER, INTENT(IN) :: a_index, c_index
1777 TYPE(qs_environment_type), POINTER :: qs_env
1778 INTEGER :: i_index
1779
1780 INTEGER, DIMENSION(3) :: cell_i
1781 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1782 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1783 TYPE(kpoint_type), POINTER :: kpoints
1784
1785 CALL get_qs_env(qs_env, kpoints=kpoints)
1786 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1787
1788 cell_i(:) = index_to_cell(:, a_index) + index_to_cell(:, c_index)
1789
1790 IF (any([cell_i(1), cell_i(2), cell_i(3)] < lbound(cell_to_index)) .OR. &
1791 any([cell_i(1), cell_i(2), cell_i(3)] > ubound(cell_to_index))) THEN
1792
1793 i_index = 0
1794 ELSE
1795 i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1796 END IF
1797
1798 END FUNCTION get_apc_index
1799
1800! **************************************************************************************************
1801!> \brief return the cell index i corresponding to the summ of cell_a + cell_c - cell_b
1802!> \param apc_index ...
1803!> \param b_index ...
1804!> \param qs_env ...
1805!> \return ...
1806! **************************************************************************************************
1807 FUNCTION get_i_index(apc_index, b_index, qs_env) RESULT(i_index)
1808 INTEGER, INTENT(IN) :: apc_index, b_index
1809 TYPE(qs_environment_type), POINTER :: qs_env
1810 INTEGER :: i_index
1811
1812 INTEGER, DIMENSION(3) :: cell_i
1813 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1814 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1815 TYPE(kpoint_type), POINTER :: kpoints
1816
1817 CALL get_qs_env(qs_env, kpoints=kpoints)
1818 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1819
1820 cell_i(:) = index_to_cell(:, apc_index) - index_to_cell(:, b_index)
1821
1822 IF (any([cell_i(1), cell_i(2), cell_i(3)] < lbound(cell_to_index)) .OR. &
1823 any([cell_i(1), cell_i(2), cell_i(3)] > ubound(cell_to_index))) THEN
1824
1825 i_index = 0
1826 ELSE
1827 i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1828 END IF
1829
1830 END FUNCTION get_i_index
1831
1832! **************************************************************************************************
1833!> \brief A routine that returns all allowed a,c pairs such that a+c images corresponds to the value
1834!> of the apc_index input. Takes into account that image a corresponds to 3c integrals, which
1835!> are ordered in their own way
1836!> \param ac_pairs ...
1837!> \param apc_index ...
1838!> \param ri_data ...
1839!> \param qs_env ...
1840! **************************************************************************************************
1841 SUBROUTINE get_ac_pairs(ac_pairs, apc_index, ri_data, qs_env)
1842 INTEGER, DIMENSION(:, :), INTENT(INOUT) :: ac_pairs
1843 INTEGER, INTENT(IN) :: apc_index
1844 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1845 TYPE(qs_environment_type), POINTER :: qs_env
1846
1847 INTEGER :: a_index, actual_img, c_index, nimg
1848
1849 nimg = SIZE(ac_pairs, 1)
1850
1851 ac_pairs(:, :) = 0
1852!$OMP PARALLEL DO DEFAULT(NONE) SHARED(ac_pairs,nimg,ri_data,qs_env,apc_index) &
1853!$OMP PRIVATE(a_index,actual_img,c_index)
1854 DO a_index = 1, nimg
1855 actual_img = ri_data%idx_to_img(a_index)
1856 !c = a+c - a
1857 c_index = get_i_index(apc_index, actual_img, qs_env)
1858 ac_pairs(a_index, 1) = a_index
1859 ac_pairs(a_index, 2) = c_index
1860 END DO
1861!$OMP END PARALLEL DO
1862
1863 END SUBROUTINE get_ac_pairs
1864
1865! **************************************************************************************************
1866!> \brief A routine that returns all allowed i,a+c pairs such that, for the given value of b, we have
1867!> i = a+c-b. Takes into account that image i corrsponds to the 3c ints, which are ordered in
1868!> their own way
1869!> \param iapc_pairs ...
1870!> \param b_index ...
1871!> \param ri_data ...
1872!> \param qs_env ...
1873!> \param actual_i_img ...
1874! **************************************************************************************************
1875 SUBROUTINE get_iapc_pairs(iapc_pairs, b_index, ri_data, qs_env, actual_i_img)
1876 INTEGER, DIMENSION(:, :), INTENT(INOUT) :: iapc_pairs
1877 INTEGER, INTENT(IN) :: b_index
1878 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1879 TYPE(qs_environment_type), POINTER :: qs_env
1880 INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: actual_i_img
1881
1882 INTEGER :: actual_img, apc_index, i_index, nimg
1883
1884 nimg = SIZE(iapc_pairs, 1)
1885 IF (PRESENT(actual_i_img)) actual_i_img(:) = 0
1886
1887 iapc_pairs(:, :) = 0
1888!$OMP PARALLEL DO DEFAULT(NONE) SHARED(iapc_pairs,nimg,ri_data,qs_env,b_index,actual_i_img) &
1889!$OMP PRIVATE(i_index,actual_img,apc_index)
1890 DO i_index = 1, nimg
1891 actual_img = ri_data%idx_to_img(i_index)
1892 apc_index = get_apc_index_from_ib(actual_img, b_index, qs_env)
1893 IF (apc_index == 0) cycle
1894 iapc_pairs(i_index, 1) = i_index
1895 iapc_pairs(i_index, 2) = apc_index
1896 IF (PRESENT(actual_i_img)) actual_i_img(i_index) = actual_img
1897 END DO
1898
1899 END SUBROUTINE get_iapc_pairs
1900
1901! **************************************************************************************************
1902!> \brief A function that, given a cell index a, returun the index corresponding to -a, and zero if
1903!> if out of bounds
1904!> \param a_index ...
1905!> \param qs_env ...
1906!> \return ...
1907! **************************************************************************************************
1908 FUNCTION get_opp_index(a_index, qs_env) RESULT(opp_index)
1909 INTEGER, INTENT(IN) :: a_index
1910 TYPE(qs_environment_type), POINTER :: qs_env
1911 INTEGER :: opp_index
1912
1913 INTEGER, DIMENSION(3) :: opp_cell
1914 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1915 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1916 TYPE(kpoint_type), POINTER :: kpoints
1917
1918 NULLIFY (kpoints, cell_to_index, index_to_cell)
1919
1920 CALL get_qs_env(qs_env, kpoints=kpoints)
1921 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1922
1923 opp_cell(:) = -index_to_cell(:, a_index)
1924
1925 IF (any([opp_cell(1), opp_cell(2), opp_cell(3)] < lbound(cell_to_index)) .OR. &
1926 any([opp_cell(1), opp_cell(2), opp_cell(3)] > ubound(cell_to_index))) THEN
1927
1928 opp_index = 0
1929 ELSE
1930 opp_index = cell_to_index(opp_cell(1), opp_cell(2), opp_cell(3))
1931 END IF
1932
1933 END FUNCTION get_opp_index
1934
1935! **************************************************************************************************
1936!> \brief A routine that returns the actual non-symemtric density matrix for each image, by Fourier
1937!> transforming the kpoint density matrix
1938!> \param rho_ao_t ...
1939!> \param rho_ao ...
1940!> \param scale_prev_p ...
1941!> \param ri_data ...
1942!> \param qs_env ...
1943! **************************************************************************************************
1944 SUBROUTINE get_pmat_images(rho_ao_t, rho_ao, scale_prev_p, ri_data, qs_env)
1945 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t
1946 TYPE(dbcsr_p_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao
1947 REAL(dp), INTENT(IN) :: scale_prev_p
1948 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1949 TYPE(qs_environment_type), POINTER :: qs_env
1950
1951 INTEGER :: cell_j(3), i_img, i_spin, iatom, icol, &
1952 irow, j_img, jatom, mi_img, mj_img, &
1953 nimg, nspins
1954 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1955 LOGICAL :: found
1956 REAL(dp) :: fac
1957 REAL(dp), DIMENSION(:, :), POINTER :: pblock, pblock_desymm
1958 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: matrix_ks, rho_desymm
1959 TYPE(dbt_type) :: tmp
1960 TYPE(dft_control_type), POINTER :: dft_control
1961 TYPE(kpoint_type), POINTER :: kpoints
1963 DIMENSION(:), POINTER :: nl_iterator
1964 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1965 POINTER :: sab_nl, sab_nl_nosym
1966 TYPE(qs_scf_env_type), POINTER :: scf_env
1967
1968 NULLIFY (rho_desymm, kpoints, sab_nl_nosym, scf_env, matrix_ks, dft_control, &
1969 sab_nl, nl_iterator, cell_to_index, pblock, pblock_desymm)
1970
1971 CALL get_qs_env(qs_env, kpoints=kpoints, scf_env=scf_env, matrix_ks_kp=matrix_ks, dft_control=dft_control)
1972 CALL get_kpoint_info(kpoints, sab_nl_nosym=sab_nl_nosym, cell_to_index=cell_to_index, sab_nl=sab_nl)
1973
1974 IF (dft_control%do_admm) THEN
1975 CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_kp=matrix_ks)
1976 END IF
1977
1978 nspins = SIZE(matrix_ks, 1)
1979 nimg = ri_data%nimg
1980
1981 ALLOCATE (rho_desymm(nspins, nimg))
1982 DO i_img = 1, nimg
1983 DO i_spin = 1, nspins
1984 ALLOCATE (rho_desymm(i_spin, i_img)%matrix)
1985 CALL dbcsr_create(rho_desymm(i_spin, i_img)%matrix, template=matrix_ks(i_spin, i_img)%matrix, &
1986 matrix_type=dbcsr_type_no_symmetry)
1987 CALL cp_dbcsr_alloc_block_from_nbl(rho_desymm(i_spin, i_img)%matrix, sab_nl_nosym)
1988 END DO
1989 END DO
1990 CALL dbt_create(rho_desymm(1, 1)%matrix, tmp)
1991
1992 !We transfor the symmtric typed (but not actually symmetric: P_ab^i = P_ba^-i) real-spaced density
1993 !matrix into proper non-symemtric ones (using the same nl for consistency)
1994 CALL neighbor_list_iterator_create(nl_iterator, sab_nl)
1995 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
1996 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
1997 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
1998 IF (j_img > nimg .OR. j_img < 1) cycle
1999
2000 fac = 1.0_dp
2001 IF (iatom == jatom) fac = 0.5_dp
2002 mj_img = get_opp_index(j_img, qs_env)
2003 !if no opposite image, then no sum of P^j + P^-j => need full diag
2004 IF (mj_img == 0) fac = 1.0_dp
2005
2006 irow = iatom
2007 icol = jatom
2008 IF (iatom > jatom) THEN
2009 !because symmetric nl. Value for atom pair i,j is actually stored in j,i if i > j
2010 irow = jatom
2011 icol = iatom
2012 END IF
2013
2014 DO i_spin = 1, nspins
2015 CALL dbcsr_get_block_p(rho_ao(i_spin, j_img)%matrix, irow, icol, pblock, found)
2016 IF (.NOT. found) cycle
2017
2018 !distribution of symm and non-symm matrix match in that way
2019 CALL dbcsr_get_block_p(rho_desymm(i_spin, j_img)%matrix, iatom, jatom, pblock_desymm, found)
2020 IF (.NOT. found) cycle
2021
2022 IF (iatom > jatom) THEN
2023 pblock_desymm(:, :) = fac*transpose(pblock(:, :))
2024 ELSE
2025 pblock_desymm(:, :) = fac*pblock(:, :)
2026 END IF
2027 END DO
2028 END DO
2029 CALL neighbor_list_iterator_release(nl_iterator)
2030
2031 DO i_img = 1, nimg
2032 DO i_spin = 1, nspins
2033 CALL dbt_scale(rho_ao_t(i_spin, i_img), scale_prev_p)
2034
2035 CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, i_img)%matrix, tmp)
2036 CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), summation=.true., move_data=.true.)
2037
2038 !symmetrize by addin transpose of opp img
2039 mi_img = get_opp_index(i_img, qs_env)
2040 IF (mi_img > 0 .AND. mi_img .LE. nimg) THEN
2041 CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, mi_img)%matrix, tmp)
2042 CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), order=[2, 1], summation=.true., move_data=.true.)
2043 END IF
2044 CALL dbt_filter(rho_ao_t(i_spin, i_img), ri_data%filter_eps)
2045 END DO
2046 END DO
2047
2048 DO i_img = 1, nimg
2049 DO i_spin = 1, nspins
2050 CALL dbcsr_release(rho_desymm(i_spin, i_img)%matrix)
2051 DEALLOCATE (rho_desymm(i_spin, i_img)%matrix)
2052 END DO
2053 END DO
2054
2055 CALL dbt_destroy(tmp)
2056 DEALLOCATE (rho_desymm)
2057
2058 END SUBROUTINE get_pmat_images
2059
2060! **************************************************************************************************
2061!> \brief A routine that, given a cell index b and atom indices ij, returns a 2c tensor with the HFX
2062!> potential (P_i^0|Q_j^b), within the extended RI basis
2063!> \param t_2c_pot ...
2064!> \param mat_orig ...
2065!> \param atom_i ...
2066!> \param atom_j ...
2067!> \param img_b ...
2068!> \param ri_data ...
2069!> \param qs_env ...
2070!> \param do_inverse ...
2071!> \param para_env_ext ...
2072!> \param blacs_env_ext ...
2073!> \param dbcsr_template ...
2074!> \param off_diagonal ...
2075!> \param skip_inverse ...
2076! **************************************************************************************************
2077 SUBROUTINE get_ext_2c_int(t_2c_pot, mat_orig, atom_i, atom_j, img_b, ri_data, qs_env, do_inverse, &
2078 para_env_ext, blacs_env_ext, dbcsr_template, off_diagonal, skip_inverse)
2079 TYPE(dbt_type), INTENT(INOUT) :: t_2c_pot
2080 TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_orig
2081 INTEGER, INTENT(IN) :: atom_i, atom_j, img_b
2082 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2083 TYPE(qs_environment_type), POINTER :: qs_env
2084 LOGICAL, INTENT(IN), OPTIONAL :: do_inverse
2085 TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env_ext
2086 TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: blacs_env_ext
2087 TYPE(dbcsr_type), OPTIONAL, POINTER :: dbcsr_template
2088 LOGICAL, INTENT(IN), OPTIONAL :: off_diagonal, skip_inverse
2089
2090 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_ext_2c_int'
2091
2092 INTEGER :: group, handle, handle2, i_img, i_ri, iatom, iblk, ikind, img_tot, j_img, j_ri, &
2093 jatom, jblk, jkind, n_dependent, natom, nblks_ri, nimg, nkind
2094 INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2
2095 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: present_atoms_i, present_atoms_j
2096 INTEGER, DIMENSION(3) :: cell_b, cell_i, cell_j, cell_tot
2097 INTEGER, DIMENSION(:), POINTER :: col_dist, col_dist_ext, ri_blk_size_ext, &
2098 row_dist, row_dist_ext
2099 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell, pgrid
2100 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
2101 LOGICAL :: do_inverse_prv, found, my_offd, &
2102 skip_inverse_prv, use_template
2103 REAL(dp) :: bfac, dij, r0, r1, threshold
2104 REAL(dp), DIMENSION(3) :: ri, rij, rj, rref, scoord
2105 REAL(dp), DIMENSION(:, :), POINTER :: pblock
2106 TYPE(cell_type), POINTER :: cell
2107 TYPE(cp_blacs_env_type), POINTER :: blacs_env
2108 TYPE(dbcsr_distribution_type) :: dbcsr_dist, dbcsr_dist_ext
2109 TYPE(dbcsr_iterator_type) :: dbcsr_iter
2110 TYPE(dbcsr_type) :: work, work_tight, work_tight_inv
2111 TYPE(dbt_type) :: t_2c_tmp
2112 TYPE(distribution_2d_type), POINTER :: dist_2d
2113 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2114 DIMENSION(:), TARGET :: basis_set_ri
2115 TYPE(kpoint_type), POINTER :: kpoints
2116 TYPE(mp_para_env_type), POINTER :: para_env
2118 DIMENSION(:), POINTER :: nl_iterator
2119 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2120 POINTER :: nl_2c
2121 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2122 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2123
2124 NULLIFY (qs_kind_set, nl_2c, nl_iterator, cell, kpoints, cell_to_index, index_to_cell, dist_2d, &
2125 para_env, pblock, blacs_env, particle_set, col_dist, row_dist, pgrid, &
2126 col_dist_ext, row_dist_ext)
2127
2128 CALL timeset(routinen, handle)
2129
2130 !Idea: run over the neighbor list once for i and once for j, and record in which cell the MIC
2131 ! atoms are. Then loop over the atoms and only take the pairs the we need
2132
2133 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
2134 kpoints=kpoints, para_env=para_env, blacs_env=blacs_env, particle_set=particle_set)
2135 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
2136
2137 do_inverse_prv = .false.
2138 IF (PRESENT(do_inverse)) do_inverse_prv = do_inverse
2139 IF (do_inverse_prv) THEN
2140 cpassert(atom_i == atom_j)
2141 END IF
2142
2143 skip_inverse_prv = .false.
2144 IF (PRESENT(skip_inverse)) skip_inverse_prv = skip_inverse
2145
2146 my_offd = .false.
2147 IF (PRESENT(off_diagonal)) my_offd = off_diagonal
2148
2149 IF (PRESENT(para_env_ext)) para_env => para_env_ext
2150 IF (PRESENT(blacs_env_ext)) blacs_env => blacs_env_ext
2151
2152 nimg = SIZE(mat_orig)
2153
2154 CALL timeset(routinen//"_nl_iter", handle2)
2155
2156 !create our own dist_2d in the subgroup
2157 ALLOCATE (dist1(natom), dist2(natom))
2158 DO iatom = 1, natom
2159 dist1(iatom) = mod(iatom, blacs_env%num_pe(1))
2160 dist2(iatom) = mod(iatom, blacs_env%num_pe(2))
2161 END DO
2162 CALL distribution_2d_create(dist_2d, dist1, dist2, nkind, particle_set, blacs_env_ext=blacs_env)
2163
2164 ALLOCATE (basis_set_ri(nkind))
2165 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
2166
2167 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
2168 "HFX_2c_nl_RI", qs_env, sym_ij=.false., dist_2d=dist_2d)
2169
2170 ALLOCATE (present_atoms_i(natom, nimg), present_atoms_j(natom, nimg))
2171 present_atoms_i = 0
2172 present_atoms_j = 0
2173
2174 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
2175 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
2176 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, r=rij, cell=cell_j, &
2177 ikind=ikind, jkind=jkind)
2178
2179 dij = norm2(rij)
2180
2181 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
2182 IF (j_img > nimg .OR. j_img < 1) cycle
2183
2184 IF (iatom == atom_i .AND. dij .LE. ri_data%kp_RI_range) present_atoms_i(jatom, j_img) = 1
2185 IF (iatom == atom_j .AND. dij .LE. ri_data%kp_RI_range) present_atoms_j(jatom, j_img) = 1
2186 END DO
2187 CALL neighbor_list_iterator_release(nl_iterator)
2188 CALL release_neighbor_list_sets(nl_2c)
2189 CALL distribution_2d_release(dist_2d)
2190 CALL timestop(handle2)
2191
2192 CALL para_env%sum(present_atoms_i)
2193 CALL para_env%sum(present_atoms_j)
2194
2195 !Need to build a work matrix with matching distribution to mat_orig
2196 !If template is provided, use it. If not, we create it.
2197 use_template = .false.
2198 IF (PRESENT(dbcsr_template)) THEN
2199 IF (ASSOCIATED(dbcsr_template)) use_template = .true.
2200 END IF
2201
2202 IF (use_template) THEN
2203 CALL dbcsr_create(work, template=dbcsr_template)
2204 ELSE
2205 CALL dbcsr_get_info(mat_orig(1), distribution=dbcsr_dist)
2206 CALL dbcsr_distribution_get(dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, pgrid=pgrid)
2207 ALLOCATE (row_dist_ext(ri_data%ncell_RI*natom), col_dist_ext(ri_data%ncell_RI*natom))
2208 ALLOCATE (ri_blk_size_ext(ri_data%ncell_RI*natom))
2209 DO i_ri = 1, ri_data%ncell_RI
2210 row_dist_ext((i_ri - 1)*natom + 1:i_ri*natom) = row_dist(:)
2211 col_dist_ext((i_ri - 1)*natom + 1:i_ri*natom) = col_dist(:)
2212 ri_blk_size_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
2213 END DO
2214
2215 CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2216 row_dist=row_dist_ext, col_dist=col_dist_ext)
2217 CALL dbcsr_create(work, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2218 row_blk_size=ri_blk_size_ext, col_blk_size=ri_blk_size_ext)
2219 CALL dbcsr_distribution_release(dbcsr_dist_ext)
2220 DEALLOCATE (col_dist_ext, row_dist_ext, ri_blk_size_ext)
2221
2222 IF (PRESENT(dbcsr_template)) THEN
2223 ALLOCATE (dbcsr_template)
2224 CALL dbcsr_create(dbcsr_template, template=work)
2225 END IF
2226 END IF !use_template
2227
2228 cell_b(:) = index_to_cell(:, img_b)
2229 DO i_img = 1, nimg
2230 i_ri = ri_data%img_to_RI_cell(i_img)
2231 IF (i_ri == 0) cycle
2232 cell_i(:) = index_to_cell(:, i_img)
2233 DO j_img = 1, nimg
2234 j_ri = ri_data%img_to_RI_cell(j_img)
2235 IF (j_ri == 0) cycle
2236 cell_j(:) = index_to_cell(:, j_img)
2237 cell_tot = cell_j - cell_i + cell_b
2238
2239 IF (any([cell_tot(1), cell_tot(2), cell_tot(3)] < lbound(cell_to_index)) .OR. &
2240 any([cell_tot(1), cell_tot(2), cell_tot(3)] > ubound(cell_to_index))) cycle
2241 img_tot = cell_to_index(cell_tot(1), cell_tot(2), cell_tot(3))
2242 IF (img_tot > nimg .OR. img_tot < 1) cycle
2243
2244 CALL dbcsr_iterator_start(dbcsr_iter, mat_orig(img_tot))
2245 DO WHILE (dbcsr_iterator_blocks_left(dbcsr_iter))
2246 CALL dbcsr_iterator_next_block(dbcsr_iter, row=iatom, column=jatom)
2247 IF (present_atoms_i(iatom, i_img) == 0) cycle
2248 IF (present_atoms_j(jatom, j_img) == 0) cycle
2249 IF (my_offd .AND. (i_ri - 1)*natom + iatom == (j_ri - 1)*natom + jatom) cycle
2250
2251 CALL dbcsr_get_block_p(mat_orig(img_tot), iatom, jatom, pblock, found)
2252 IF (.NOT. found) cycle
2253
2254 CALL dbcsr_put_block(work, (i_ri - 1)*natom + iatom, (j_ri - 1)*natom + jatom, pblock)
2255
2256 END DO
2257 CALL dbcsr_iterator_stop(dbcsr_iter)
2258
2259 END DO !j_img
2260 END DO !i_img
2261 CALL dbcsr_finalize(work)
2262
2263 IF (do_inverse_prv) THEN
2264
2265 r1 = ri_data%kp_RI_range
2266 r0 = ri_data%kp_bump_rad
2267
2268 !Because there are a lot of empty rows/cols in work, we need to get rid of them for inversion
2269 nblks_ri = sum(present_atoms_i)
2270 ALLOCATE (col_dist_ext(nblks_ri), row_dist_ext(nblks_ri), ri_blk_size_ext(nblks_ri))
2271 iblk = 0
2272 DO i_img = 1, nimg
2273 i_ri = ri_data%img_to_RI_cell(i_img)
2274 IF (i_ri == 0) cycle
2275 DO iatom = 1, natom
2276 IF (present_atoms_i(iatom, i_img) == 0) cycle
2277 iblk = iblk + 1
2278 col_dist_ext(iblk) = col_dist(iatom)
2279 row_dist_ext(iblk) = row_dist(iatom)
2280 ri_blk_size_ext(iblk) = ri_data%bsizes_RI(iatom)
2281 END DO
2282 END DO
2283
2284 CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2285 row_dist=row_dist_ext, col_dist=col_dist_ext)
2286 CALL dbcsr_create(work_tight, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2287 row_blk_size=ri_blk_size_ext, col_blk_size=ri_blk_size_ext)
2288 CALL dbcsr_create(work_tight_inv, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2289 row_blk_size=ri_blk_size_ext, col_blk_size=ri_blk_size_ext)
2290 CALL dbcsr_distribution_release(dbcsr_dist_ext)
2291 DEALLOCATE (col_dist_ext, row_dist_ext, ri_blk_size_ext)
2292
2293 !We apply a bump function to the RI metric inverse for smooth RI basis extension:
2294 ! S^-1 = B * ((P|Q)_D + B*(P|Q)_OD*B)^-1 * B, with D block-diagonal blocks and OD off-diagonal
2295 rref = pbc(particle_set(atom_i)%r, cell)
2296
2297 iblk = 0
2298 DO i_img = 1, nimg
2299 i_ri = ri_data%img_to_RI_cell(i_img)
2300 IF (i_ri == 0) cycle
2301 DO iatom = 1, natom
2302 IF (present_atoms_i(iatom, i_img) == 0) cycle
2303 iblk = iblk + 1
2304
2305 CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
2306 CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
2307
2308 jblk = 0
2309 DO j_img = 1, nimg
2310 j_ri = ri_data%img_to_RI_cell(j_img)
2311 IF (j_ri == 0) cycle
2312 DO jatom = 1, natom
2313 IF (present_atoms_j(jatom, j_img) == 0) cycle
2314 jblk = jblk + 1
2315
2316 CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
2317 CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
2318
2319 CALL dbcsr_get_block_p(work, (i_ri - 1)*natom + iatom, (j_ri - 1)*natom + jatom, pblock, found)
2320 IF (.NOT. found) cycle
2321
2322 bfac = 1.0_dp
2323 IF (iblk .NE. jblk) bfac = bump(norm2(ri - rref), r0, r1)*bump(norm2(rj - rref), r0, r1)
2324 CALL dbcsr_put_block(work_tight, iblk, jblk, bfac*pblock(:, :))
2325 END DO
2326 END DO
2327 END DO
2328 END DO
2329 CALL dbcsr_finalize(work_tight)
2330 CALL dbcsr_clear(work)
2331
2332 IF (.NOT. skip_inverse_prv) THEN
2333 SELECT CASE (ri_data%t2c_method)
2334 CASE (hfx_ri_do_2c_iter)
2335 threshold = max(ri_data%filter_eps, 1.0e-12_dp)
2336 CALL invert_hotelling(work_tight_inv, work_tight, threshold=threshold, silent=.false.)
2338 CALL dbcsr_copy(work_tight_inv, work_tight)
2339 CALL cp_dbcsr_cholesky_decompose(work_tight_inv, para_env=para_env, blacs_env=blacs_env)
2340 CALL cp_dbcsr_cholesky_invert(work_tight_inv, para_env=para_env, blacs_env=blacs_env, &
2341 uplo_to_full=.true.)
2342 CASE (hfx_ri_do_2c_diag)
2343 CALL dbcsr_copy(work_tight_inv, work_tight)
2344 CALL cp_dbcsr_power(work_tight_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
2345 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
2346 END SELECT
2347 ELSE
2348 CALL dbcsr_copy(work_tight_inv, work_tight)
2349 END IF
2350
2351 !move back data to standard extended RI pattern
2352 !Note: we apply the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 later, because this matrix
2353 ! is required for forces
2354 iblk = 0
2355 DO i_img = 1, nimg
2356 i_ri = ri_data%img_to_RI_cell(i_img)
2357 IF (i_ri == 0) cycle
2358 DO iatom = 1, natom
2359 IF (present_atoms_i(iatom, i_img) == 0) cycle
2360 iblk = iblk + 1
2361
2362 jblk = 0
2363 DO j_img = 1, nimg
2364 j_ri = ri_data%img_to_RI_cell(j_img)
2365 IF (j_ri == 0) cycle
2366 DO jatom = 1, natom
2367 IF (present_atoms_j(jatom, j_img) == 0) cycle
2368 jblk = jblk + 1
2369
2370 CALL dbcsr_get_block_p(work_tight_inv, iblk, jblk, pblock, found)
2371 IF (.NOT. found) cycle
2372
2373 CALL dbcsr_put_block(work, (i_ri - 1)*natom + iatom, (j_ri - 1)*natom + jatom, pblock)
2374 END DO
2375 END DO
2376 END DO
2377 END DO
2378 CALL dbcsr_finalize(work)
2379
2380 CALL dbcsr_release(work_tight)
2381 CALL dbcsr_release(work_tight_inv)
2382 END IF
2383
2384 CALL dbt_create(work, t_2c_tmp)
2385 CALL dbt_copy_matrix_to_tensor(work, t_2c_tmp)
2386 CALL dbt_copy(t_2c_tmp, t_2c_pot, move_data=.true.)
2387 CALL dbt_filter(t_2c_pot, ri_data%filter_eps)
2388
2389 CALL dbt_destroy(t_2c_tmp)
2390 CALL dbcsr_release(work)
2391
2392 CALL timestop(handle)
2393
2394 END SUBROUTINE get_ext_2c_int
2395
2396! **************************************************************************************************
2397!> \brief Pre-contract the density matrices with the 3-center integrals:
2398!> P_sigma^a,lambda^a+c (mu^0 sigma^a| P^0)
2399!> \param t_3c_apc ...
2400!> \param rho_ao_t ...
2401!> \param ri_data ...
2402!> \param qs_env ...
2403! **************************************************************************************************
2404 SUBROUTINE contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
2405 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, rho_ao_t
2406 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2407 TYPE(qs_environment_type), POINTER :: qs_env
2408
2409 CHARACTER(len=*), PARAMETER :: routinen = 'contract_pmat_3c'
2410
2411 INTEGER :: apc_img, b_img, batch_size, handle, &
2412 i_batch, i_img, i_spin, idx, j_batch, &
2413 n_batch_img, n_batch_nze, nimg, &
2414 nimg_nze, nspins
2415 INTEGER(int_8) :: nflop, nze
2416 INTEGER, ALLOCATABLE, DIMENSION(:) :: apc_filter, batch_ranges_img, &
2417 batch_ranges_nze, int_indices
2418 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ac_pairs, iapc_pairs
2419 REAL(dp) :: occ, t1, t2
2420 TYPE(dbt_type) :: t_3c_tmp
2421 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ints_stack, res_stack, rho_stack
2422 TYPE(dft_control_type), POINTER :: dft_control
2423
2424 CALL timeset(routinen, handle)
2425
2426 CALL get_qs_env(qs_env, dft_control=dft_control)
2427
2428 nimg = ri_data%nimg
2429 nimg_nze = ri_data%nimg_nze
2430 nspins = dft_control%nspins
2431
2432 CALL dbt_create(t_3c_apc(1, 1), t_3c_tmp)
2433
2434 batch_size = ri_data%kp_stack_size
2435
2436 ALLOCATE (apc_filter(nimg), iapc_pairs(nimg, 2))
2437 apc_filter = 0
2438 DO b_img = 1, nimg
2439 CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env)
2440 DO i_img = 1, nimg_nze
2441 idx = iapc_pairs(i_img, 2)
2442 IF (idx < 1 .OR. idx > nimg) cycle
2443 apc_filter(idx) = 1
2444 END DO
2445 END DO
2446
2447 !batching over all images
2448 n_batch_img = nimg/batch_size
2449 IF (modulo(nimg, batch_size) .NE. 0) n_batch_img = n_batch_img + 1
2450 ALLOCATE (batch_ranges_img(n_batch_img + 1))
2451 DO i_batch = 1, n_batch_img
2452 batch_ranges_img(i_batch) = (i_batch - 1)*batch_size + 1
2453 END DO
2454 batch_ranges_img(n_batch_img + 1) = nimg + 1
2455
2456 !batching over images with non-zero 3c integrals
2457 n_batch_nze = nimg_nze/batch_size
2458 IF (modulo(nimg_nze, batch_size) .NE. 0) n_batch_nze = n_batch_nze + 1
2459 ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
2460 DO i_batch = 1, n_batch_nze
2461 batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
2462 END DO
2463 batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
2464
2465 !Create the stack tensors in the approriate distribution
2466 ALLOCATE (rho_stack(2), ints_stack(2), res_stack(2))
2467 CALL get_stack_tensors(res_stack, rho_stack, ints_stack, rho_ao_t(1, 1), &
2468 ri_data%t_3c_int_ctr_1(1, 1), batch_size, ri_data, qs_env)
2469
2470 ALLOCATE (ac_pairs(nimg, 2), int_indices(nimg_nze))
2471 DO i_img = 1, nimg_nze
2472 int_indices(i_img) = i_img
2473 END DO
2474
2475 t1 = m_walltime()
2476 DO j_batch = 1, n_batch_nze
2477 !First batch is over the integrals. They are always in the same order, consistent with get_ac_pairs
2478 CALL fill_3c_stack(ints_stack(1), ri_data%t_3c_int_ctr_1(1, :), int_indices, 3, ri_data, &
2479 img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)])
2480 CALL dbt_copy(ints_stack(1), ints_stack(2), move_data=.true.)
2481
2482 DO i_spin = 1, nspins
2483 DO i_batch = 1, n_batch_img
2484 !Second batch is over the P matrix. Here we fill the stacked rho tensors col by col
2485 DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2486 IF (apc_filter(apc_img) == 0) cycle
2487 CALL get_ac_pairs(ac_pairs, apc_img, ri_data, qs_env)
2488 CALL fill_2c_stack(rho_stack(1), rho_ao_t(i_spin, :), ac_pairs(:, 2), 1, ri_data, &
2489 img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)], &
2490 shift=apc_img - batch_ranges_img(i_batch) + 1)
2491
2492 END DO !apc_img
2493 CALL get_tensor_occupancy(rho_stack(1), nze, occ)
2494 IF (nze == 0) cycle
2495 CALL dbt_copy(rho_stack(1), rho_stack(2), move_data=.true.)
2496
2497 !The actual contraction
2498 CALL dbt_batched_contract_init(rho_stack(2))
2499 CALL dbt_contract(1.0_dp, ints_stack(2), rho_stack(2), &
2500 0.0_dp, res_stack(2), map_1=[1, 2], map_2=[3], &
2501 contract_1=[3], notcontract_1=[1, 2], &
2502 contract_2=[1], notcontract_2=[2], &
2503 filter_eps=ri_data%filter_eps, flop=nflop)
2504 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2505 CALL dbt_batched_contract_finalize(rho_stack(2))
2506 CALL dbt_copy(res_stack(2), res_stack(1), move_data=.true.)
2507
2508 DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2509 !Destack the resulting tensor and put it in t_3c_apc with correct apc_img
2510 IF (apc_filter(apc_img) == 0) cycle
2511 CALL unstack_t_3c_apc(t_3c_tmp, res_stack(1), apc_img - batch_ranges_img(i_batch) + 1)
2512 CALL dbt_copy(t_3c_tmp, t_3c_apc(i_spin, apc_img), summation=.true., move_data=.true.)
2513 END DO
2514
2515 END DO !i_batch
2516 END DO !i_spin
2517 END DO !j_batch
2518 DEALLOCATE (batch_ranges_img)
2519 DEALLOCATE (batch_ranges_nze)
2520 t2 = m_walltime()
2521 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2522
2523 CALL dbt_destroy(rho_stack(1))
2524 CALL dbt_destroy(rho_stack(2))
2525 CALL dbt_destroy(ints_stack(1))
2526 CALL dbt_destroy(ints_stack(2))
2527 CALL dbt_destroy(res_stack(1))
2528 CALL dbt_destroy(res_stack(2))
2529 CALL dbt_destroy(t_3c_tmp)
2530
2531 CALL timestop(handle)
2532
2533 END SUBROUTINE contract_pmat_3c
2534
2535! **************************************************************************************************
2536!> \brief Pre-contract 3-center integrals with the bumped invrse RI metric, for each atom
2537!> \param t_3c_int ...
2538!> \param ri_data ...
2539!> \param qs_env ...
2540! **************************************************************************************************
2541 SUBROUTINE precontract_3c_ints(t_3c_int, ri_data, qs_env)
2542 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_int
2543 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2544 TYPE(qs_environment_type), POINTER :: qs_env
2545
2546 CHARACTER(len=*), PARAMETER :: routinen = 'precontract_3c_ints'
2547
2548 INTEGER :: batch_size, handle, i_batch, i_img, &
2549 i_ri, iatom, is, n_batch, natom, &
2550 nblks, nblks_3c(3), nimg
2551 INTEGER(int_8) :: nflop
2552 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges, bsizes_ri_ext, bsizes_ri_ext_split, &
2553 bsizes_stack, dist1, dist2, dist3, dist_stack3, idx_to_at_ao, int_indices
2554 TYPE(dbt_distribution_type) :: t_dist
2555 TYPE(dbt_type) :: t_2c_ri_tmp(2), t_3c_tmp(3)
2556
2557 CALL timeset(routinen, handle)
2558
2559 CALL get_qs_env(qs_env, natom=natom)
2560
2561 nimg = ri_data%nimg
2562 ALLOCATE (int_indices(nimg))
2563 DO i_img = 1, nimg
2564 int_indices(i_img) = i_img
2565 END DO
2566
2567 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
2568 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2569
2570 nblks = SIZE(ri_data%bsizes_RI_split)
2571 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*natom))
2572 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks))
2573 DO i_ri = 1, ri_data%ncell_RI
2574 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
2575 bsizes_ri_ext_split((i_ri - 1)*nblks + 1:i_ri*nblks) = ri_data%bsizes_RI_split(:)
2576 END DO
2577 CALL create_2c_tensor(t_2c_ri_tmp(1), dist1, dist2, ri_data%pgrid_2d, &
2578 bsizes_ri_ext, bsizes_ri_ext, &
2579 name="(RI | RI)")
2580 DEALLOCATE (dist1, dist2)
2581 CALL create_2c_tensor(t_2c_ri_tmp(2), dist1, dist2, ri_data%pgrid_2d, &
2582 bsizes_ri_ext_split, bsizes_ri_ext_split, &
2583 name="(RI | RI)")
2584 DEALLOCATE (dist1, dist2)
2585
2586 !For more efficiency, we stack multiple images of the 3-center integrals into a single tensor
2587 batch_size = ri_data%kp_stack_size
2588 n_batch = nimg/batch_size
2589 IF (modulo(nimg, batch_size) .NE. 0) n_batch = n_batch + 1
2590 ALLOCATE (batch_ranges(n_batch + 1))
2591 DO i_batch = 1, n_batch
2592 batch_ranges(i_batch) = (i_batch - 1)*batch_size + 1
2593 END DO
2594 batch_ranges(n_batch + 1) = nimg + 1
2595
2596 nblks = SIZE(ri_data%bsizes_AO_split)
2597 ALLOCATE (bsizes_stack(batch_size*nblks))
2598 DO is = 1, batch_size
2599 bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
2600 END DO
2601
2602 CALL dbt_get_info(t_3c_int(1, 1), nblks_total=nblks_3c)
2603 ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)), dist_stack3(batch_size*nblks_3c(3)))
2604 CALL dbt_get_info(t_3c_int(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, proc_dist_3=dist3)
2605 DO is = 1, batch_size
2606 dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
2607 END DO
2608
2609 CALL dbt_distribution_new(t_dist, ri_data%pgrid, dist1, dist2, dist_stack3)
2610 CALL dbt_create(t_3c_tmp(1), "ints_stack", t_dist, [1], [2, 3], bsizes_ri_ext_split, &
2611 ri_data%bsizes_AO_split, bsizes_stack)
2612 CALL dbt_distribution_destroy(t_dist)
2613 DEALLOCATE (dist1, dist2, dist3, dist_stack3)
2614
2615 CALL dbt_create(t_3c_tmp(1), t_3c_tmp(2))
2616 CALL dbt_create(t_3c_int(1, 1), t_3c_tmp(3))
2617
2618 DO iatom = 1, natom
2619 CALL dbt_copy(ri_data%t_2c_inv(1, iatom), t_2c_ri_tmp(1))
2620 CALL apply_bump(t_2c_ri_tmp(1), iatom, ri_data, qs_env, from_left=.true., from_right=.true.)
2621 CALL dbt_copy(t_2c_ri_tmp(1), t_2c_ri_tmp(2), move_data=.true.)
2622
2623 CALL dbt_batched_contract_init(t_2c_ri_tmp(2))
2624 DO i_batch = 1, n_batch
2625
2626 CALL fill_3c_stack(t_3c_tmp(1), t_3c_int(1, :), int_indices, 3, ri_data, &
2627 img_bounds=[batch_ranges(i_batch), batch_ranges(i_batch + 1)], &
2628 filter_at=iatom, filter_dim=2, idx_to_at=idx_to_at_ao)
2629
2630 CALL dbt_contract(1.0_dp, t_2c_ri_tmp(2), t_3c_tmp(1), &
2631 0.0_dp, t_3c_tmp(2), map_1=[1], map_2=[2, 3], &
2632 contract_1=[2], notcontract_1=[1], &
2633 contract_2=[1], notcontract_2=[2, 3], &
2634 filter_eps=ri_data%filter_eps, flop=nflop)
2635 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2636
2637 DO i_img = batch_ranges(i_batch), batch_ranges(i_batch + 1) - 1
2638 CALL unstack_t_3c_apc(t_3c_tmp(3), t_3c_tmp(2), i_img - batch_ranges(i_batch) + 1)
2639 CALL dbt_copy(t_3c_tmp(3), ri_data%t_3c_int_ctr_1(1, i_img), summation=.true., &
2640 order=[2, 1, 3], move_data=.true.)
2641 END DO
2642 CALL dbt_clear(t_3c_tmp(1))
2643 END DO
2644 CALL dbt_batched_contract_finalize(t_2c_ri_tmp(2))
2645
2646 END DO
2647 CALL dbt_destroy(t_2c_ri_tmp(1))
2648 CALL dbt_destroy(t_2c_ri_tmp(2))
2649 CALL dbt_destroy(t_3c_tmp(1))
2650 CALL dbt_destroy(t_3c_tmp(2))
2651 CALL dbt_destroy(t_3c_tmp(3))
2652
2653 DO i_img = 1, nimg
2654 CALL dbt_destroy(t_3c_int(1, i_img))
2655 END DO
2656
2657 CALL timestop(handle)
2658
2659 END SUBROUTINE precontract_3c_ints
2660
2661! **************************************************************************************************
2662!> \brief Copy the data of a 2D tensor living in the main MPI group to a sub-group, given the proc
2663!> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2664!> \param t2c_sub ...
2665!> \param t2c_main ...
2666!> \param group_size ...
2667!> \param ngroups ...
2668!> \param para_env ...
2669! **************************************************************************************************
2670 SUBROUTINE copy_2c_to_subgroup(t2c_sub, t2c_main, group_size, ngroups, para_env)
2671 TYPE(dbt_type), INTENT(INOUT) :: t2c_sub, t2c_main
2672 INTEGER, INTENT(IN) :: group_size, ngroups
2673 TYPE(mp_para_env_type), POINTER :: para_env
2674
2675 INTEGER :: batch_size, i, i_batch, i_msg, iblk, &
2676 igroup, iproc, ir, is, jblk, n_batch, &
2677 nocc, tag
2678 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2
2679 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: block_dest, block_source
2680 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: current_dest
2681 INTEGER, DIMENSION(2) :: ind, nblks
2682 LOGICAL :: found
2683 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
2684 TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2685 TYPE(dbt_iterator_type) :: iter
2686 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2687
2688 !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2689 ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2690 ! to have blocks pre-reserved though
2691
2692 CALL dbt_get_info(t2c_main, nblks_total=nblks)
2693
2694 !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2695 ALLOCATE (block_source(nblks(1), nblks(2)))
2696 block_source = -1
2697 nocc = 0
2698!$OMP PARALLEL DEFAULT(NONE) SHARED(t2c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2699 CALL dbt_iterator_start(iter, t2c_main)
2700 DO WHILE (dbt_iterator_blocks_left(iter))
2701 CALL dbt_iterator_next_block(iter, ind)
2702 CALL dbt_get_block(t2c_main, ind, blk, found)
2703 IF (.NOT. found) cycle
2704
2705 block_source(ind(1), ind(2)) = para_env%mepos
2706!$OMP ATOMIC
2707 nocc = nocc + 1
2708 DEALLOCATE (blk)
2709 END DO
2710 CALL dbt_iterator_stop(iter)
2711!$OMP END PARALLEL
2712
2713 CALL para_env%sum(nocc)
2714 CALL para_env%sum(block_source)
2715 block_source = block_source + para_env%num_pe - 1
2716 IF (nocc == 0) RETURN
2717
2718 !Loop over the sub tensor, get the block destination
2719 igroup = para_env%mepos/group_size
2720 ALLOCATE (block_dest(nblks(1), nblks(2)))
2721 block_dest = -1
2722 DO jblk = 1, nblks(2)
2723 DO iblk = 1, nblks(1)
2724 IF (block_source(iblk, jblk) == -1) cycle
2725
2726 CALL dbt_get_stored_coordinates(t2c_sub, [iblk, jblk], iproc)
2727 block_dest(iblk, jblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2728 END DO
2729 END DO
2730
2731 ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)))
2732 CALL dbt_get_info(t2c_main, blk_size_1=bsizes1, blk_size_2=bsizes2)
2733
2734 ALLOCATE (current_dest(nblks(1), nblks(2), 0:ngroups - 1))
2735 DO igroup = 0, ngroups - 1
2736 !for a given subgroup, need to make the destination available to everyone in the main group
2737 current_dest(:, :, igroup) = block_dest(:, :)
2738 CALL para_env%bcast(current_dest(:, :, igroup), source=igroup*group_size) !bcast from first proc in sub-group
2739 END DO
2740
2741 !We go by batches, which cannot be larger than the maximum MPI tag value
2742 batch_size = min(para_env%get_tag_ub(), 128000, nocc*ngroups)
2743 n_batch = (nocc*ngroups)/batch_size
2744 IF (modulo(nocc*ngroups, batch_size) .NE. 0) n_batch = n_batch + 1
2745
2746 DO i_batch = 1, n_batch
2747 !Loop over groups, blocks and send/receive
2748 ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2749 ALLOCATE (send_req(batch_size), recv_req(batch_size))
2750 ir = 0
2751 is = 0
2752 i_msg = 0
2753 DO jblk = 1, nblks(2)
2754 DO iblk = 1, nblks(1)
2755 DO igroup = 0, ngroups - 1
2756 IF (block_source(iblk, jblk) == -1) cycle
2757
2758 i_msg = i_msg + 1
2759 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2760
2761 !a unique tag per block, within this batch
2762 tag = i_msg - (i_batch - 1)*batch_size
2763
2764 found = .false.
2765 IF (para_env%mepos == block_source(iblk, jblk)) THEN
2766 CALL dbt_get_block(t2c_main, [iblk, jblk], blk, found)
2767 END IF
2768
2769 !If blocks live on same proc, simply copy. Else MPI send/recv
2770 IF (block_source(iblk, jblk) == current_dest(iblk, jblk, igroup)) THEN
2771 IF (found) CALL dbt_put_block(t2c_sub, [iblk, jblk], shape(blk), blk)
2772 ELSE
2773 IF (para_env%mepos == block_source(iblk, jblk) .AND. found) THEN
2774 ALLOCATE (send_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2775 send_buff(tag)%array(:, :) = blk(:, :)
2776 is = is + 1
2777 CALL para_env%isend(msgin=send_buff(tag)%array, dest=current_dest(iblk, jblk, igroup), &
2778 request=send_req(is), tag=tag)
2779 END IF
2780
2781 IF (para_env%mepos == current_dest(iblk, jblk, igroup)) THEN
2782 ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2783 ir = ir + 1
2784 CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk), &
2785 request=recv_req(ir), tag=tag)
2786 END IF
2787 END IF
2788
2789 IF (found) DEALLOCATE (blk)
2790 END DO
2791 END DO
2792 END DO
2793
2794 CALL mp_waitall(send_req(1:is))
2795 CALL mp_waitall(recv_req(1:ir))
2796 !clean-up
2797 DO i = 1, batch_size
2798 IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
2799 END DO
2800
2801 !Finally copy the data from the buffer to the sub-tensor
2802 i_msg = 0
2803 DO jblk = 1, nblks(2)
2804 DO iblk = 1, nblks(1)
2805 DO igroup = 0, ngroups - 1
2806 IF (block_source(iblk, jblk) == -1) cycle
2807
2808 i_msg = i_msg + 1
2809 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2810
2811 !a unique tag per block, within this batch
2812 tag = i_msg - (i_batch - 1)*batch_size
2813
2814 IF (para_env%mepos == current_dest(iblk, jblk, igroup) .AND. &
2815 block_source(iblk, jblk) .NE. current_dest(iblk, jblk, igroup)) THEN
2816
2817 ALLOCATE (blk(bsizes1(iblk), bsizes2(jblk)))
2818 blk(:, :) = recv_buff(tag)%array(:, :)
2819 CALL dbt_put_block(t2c_sub, [iblk, jblk], shape(blk), blk)
2820 DEALLOCATE (blk)
2821 END IF
2822 END DO
2823 END DO
2824 END DO
2825
2826 !clean-up
2827 DO i = 1, batch_size
2828 IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
2829 END DO
2830 DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
2831 END DO !i_batch
2832 CALL dbt_finalize(t2c_sub)
2833
2834 END SUBROUTINE copy_2c_to_subgroup
2835
2836! **************************************************************************************************
2837!> \brief Pre-compute the destination of the block of a 3D tensor in various subgroups
2838!> \param subgroup_dest ...
2839!> \param t3c_sub ...
2840!> \param t3c_main ...
2841!> \param group_size ...
2842!> \param ngroups ...
2843!> \param para_env ...
2844! **************************************************************************************************
2845 SUBROUTINE get_3c_subgroup_dest(subgroup_dest, t3c_sub, t3c_main, group_size, ngroups, para_env)
2846 INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :), &
2847 INTENT(INOUT) :: subgroup_dest
2848 TYPE(dbt_type), INTENT(INOUT) :: t3c_sub, t3c_main
2849 INTEGER, INTENT(IN) :: group_size, ngroups
2850 TYPE(mp_para_env_type), POINTER :: para_env
2851
2852 INTEGER :: iblk, igroup, iproc, jblk, kblk
2853 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: block_dest
2854 INTEGER, DIMENSION(3) :: nblks
2855
2856 CALL dbt_get_info(t3c_main, nblks_total=nblks)
2857
2858 !Loop over the sub tensor, get the block destination
2859 igroup = para_env%mepos/group_size
2860 ALLOCATE (block_dest(nblks(1), nblks(2), nblks(3)))
2861 DO kblk = 1, nblks(3)
2862 DO jblk = 1, nblks(2)
2863 DO iblk = 1, nblks(1)
2864 CALL dbt_get_stored_coordinates(t3c_sub, [iblk, jblk, kblk], iproc)
2865 block_dest(iblk, jblk, kblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2866 END DO
2867 END DO
2868 END DO
2869
2870 ALLOCATE (subgroup_dest(nblks(1), nblks(2), nblks(3), ngroups))
2871 DO igroup = 0, ngroups - 1
2872 !for a given subgroup, need to make the destination available to everyone in the main group
2873 subgroup_dest(:, :, :, igroup + 1) = block_dest(:, :, :)
2874 CALL para_env%bcast(subgroup_dest(:, :, :, igroup + 1), source=igroup*group_size) !bcast from first proc in subgroup
2875 END DO
2876
2877 END SUBROUTINE get_3c_subgroup_dest
2878
2879! **************************************************************************************************
2880!> \brief Copy the data of a 3D tensor living in the main MPI group to a sub-group, given the proc
2881!> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2882!> \param t3c_sub ...
2883!> \param t3c_main ...
2884!> \param ngroups ...
2885!> \param para_env ...
2886!> \param subgroup_dest ...
2887!> \param iatom_to_subgroup ...
2888!> \param dim_at ...
2889!> \param idx_to_at ...
2890! **************************************************************************************************
2891 SUBROUTINE copy_3c_to_subgroup(t3c_sub, t3c_main, ngroups, para_env, subgroup_dest, &
2892 iatom_to_subgroup, dim_at, idx_to_at)
2893 TYPE(dbt_type), INTENT(INOUT) :: t3c_sub, t3c_main
2894 INTEGER, INTENT(IN) :: ngroups
2895 TYPE(mp_para_env_type), POINTER :: para_env
2896 INTEGER, DIMENSION(:, :, :, :), INTENT(IN) :: subgroup_dest
2897 TYPE(cp_1d_logical_p_type), DIMENSION(:), &
2898 INTENT(INOUT), OPTIONAL :: iatom_to_subgroup
2899 INTEGER, INTENT(IN), OPTIONAL :: dim_at
2900 INTEGER, DIMENSION(:), OPTIONAL :: idx_to_at
2901
2902 INTEGER :: batch_size, i, i_batch, i_msg, iatom, &
2903 iblk, igroup, ir, is, isbuff, jblk, &
2904 kblk, n_batch, nocc, tag
2905 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2, bsizes3
2906 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: block_source
2907 INTEGER, DIMENSION(3) :: ind, nblks
2908 LOGICAL :: filter_at, found
2909 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
2910 TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2911 TYPE(dbt_iterator_type) :: iter
2912 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2913
2914 !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2915 ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2916 ! to have blocks pre-reserved though
2917
2918 CALL dbt_get_info(t3c_main, nblks_total=nblks)
2919
2920 !in some cases, only copy a fraction of the 3c tensor to a given subgroup (corresponding to some atoms)
2921 filter_at = .false.
2922 IF (PRESENT(iatom_to_subgroup) .AND. PRESENT(dim_at) .AND. PRESENT(idx_to_at)) THEN
2923 filter_at = .true.
2924 cpassert(nblks(dim_at) == SIZE(idx_to_at))
2925 END IF
2926
2927 !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2928 ALLOCATE (block_source(nblks(1), nblks(2), nblks(3)))
2929 block_source = -1
2930 nocc = 0
2931!$OMP PARALLEL DEFAULT(NONE) SHARED(t3c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2932 CALL dbt_iterator_start(iter, t3c_main)
2933 DO WHILE (dbt_iterator_blocks_left(iter))
2934 CALL dbt_iterator_next_block(iter, ind)
2935 CALL dbt_get_block(t3c_main, ind, blk, found)
2936 IF (.NOT. found) cycle
2937
2938 block_source(ind(1), ind(2), ind(3)) = para_env%mepos
2939!$OMP ATOMIC
2940 nocc = nocc + 1
2941 DEALLOCATE (blk)
2942 END DO
2943 CALL dbt_iterator_stop(iter)
2944!$OMP END PARALLEL
2945
2946 CALL para_env%sum(nocc)
2947 CALL para_env%sum(block_source)
2948 block_source = block_source + para_env%num_pe - 1
2949 IF (nocc == 0) RETURN
2950
2951 ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)), bsizes3(nblks(3)))
2952 CALL dbt_get_info(t3c_main, blk_size_1=bsizes1, blk_size_2=bsizes2, blk_size_3=bsizes3)
2953
2954 !We go by batches, which cannot be larger than the maximum MPI tag value
2955 batch_size = min(para_env%get_tag_ub(), 128000, nocc*ngroups)
2956 n_batch = (nocc*ngroups)/batch_size
2957 IF (modulo(nocc*ngroups, batch_size) .NE. 0) n_batch = n_batch + 1
2958
2959 DO i_batch = 1, n_batch
2960 !Loop over groups, blocks and send/receive
2961 ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2962 ALLOCATE (send_req(batch_size), recv_req(batch_size))
2963 ir = 0
2964 is = 0
2965 i_msg = 0
2966 isbuff = 0
2967 DO kblk = 1, nblks(3)
2968 DO jblk = 1, nblks(2)
2969 DO iblk = 1, nblks(1)
2970 IF (block_source(iblk, jblk, kblk) == -1) cycle
2971
2972 found = .false.
2973 IF (para_env%mepos == block_source(iblk, jblk, kblk)) THEN
2974 CALL dbt_get_block(t3c_main, [iblk, jblk, kblk], blk, found)
2975 IF (found) THEN
2976 isbuff = isbuff + 1
2977 ALLOCATE (send_buff(isbuff)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
2978 END IF
2979 END IF
2980
2981 DO igroup = 0, ngroups - 1
2982
2983 i_msg = i_msg + 1
2984 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
2985
2986 !a unique tag per block, within this batch
2987 tag = i_msg - (i_batch - 1)*batch_size
2988
2989 IF (filter_at) THEN
2990 ind(:) = [iblk, jblk, kblk]
2991 iatom = idx_to_at(ind(dim_at))
2992 IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) cycle
2993 END IF
2994
2995 !If blocks live on same proc, simply copy. Else MPI send/recv
2996 IF (block_source(iblk, jblk, kblk) == subgroup_dest(iblk, jblk, kblk, igroup + 1)) THEN
2997 IF (found) CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], shape(blk), blk)
2998 ELSE
2999 IF (para_env%mepos == block_source(iblk, jblk, kblk) .AND. found) THEN
3000 send_buff(isbuff)%array(:, :, :) = blk(:, :, :)
3001 is = is + 1
3002 CALL para_env%isend(msgin=send_buff(isbuff)%array, &
3003 dest=subgroup_dest(iblk, jblk, kblk, igroup + 1), &
3004 request=send_req(is), tag=tag)
3005 END IF
3006
3007 IF (para_env%mepos == subgroup_dest(iblk, jblk, kblk, igroup + 1)) THEN
3008 ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
3009 ir = ir + 1
3010 CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk, kblk), &
3011 request=recv_req(ir), tag=tag)
3012 END IF
3013 END IF
3014 END DO !igroup
3015
3016 IF (found) DEALLOCATE (blk)
3017 END DO
3018 END DO
3019 END DO
3020
3021 !Finally copy the data from the buffer to the sub-tensor
3022 i_msg = 0
3023 ir = 0
3024 DO kblk = 1, nblks(3)
3025 DO jblk = 1, nblks(2)
3026 DO iblk = 1, nblks(1)
3027 DO igroup = 0, ngroups - 1
3028 IF (block_source(iblk, jblk, kblk) == -1) cycle
3029
3030 i_msg = i_msg + 1
3031 IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) cycle
3032
3033 !a unique tag per block, within this batch
3034 tag = i_msg - (i_batch - 1)*batch_size
3035
3036 IF (filter_at) THEN
3037 ind(:) = [iblk, jblk, kblk]
3038 iatom = idx_to_at(ind(dim_at))
3039 IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) cycle
3040 END IF
3041
3042 IF (para_env%mepos == subgroup_dest(iblk, jblk, kblk, igroup + 1) .AND. &
3043 block_source(iblk, jblk, kblk) .NE. subgroup_dest(iblk, jblk, kblk, igroup + 1)) THEN
3044
3045 ir = ir + 1
3046 CALL mp_waitall(recv_req(ir:ir))
3047 CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], shape(recv_buff(tag)%array), recv_buff(tag)%array)
3048 END IF
3049 END DO
3050 END DO
3051 END DO
3052 END DO
3053
3054 !clean-up
3055 CALL mp_waitall(send_req(1:is))
3056 DO i = 1, batch_size
3057 IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
3058 IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
3059 END DO
3060 DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
3061 END DO !i_batch
3062 CALL dbt_finalize(t3c_sub)
3063
3064 END SUBROUTINE copy_3c_to_subgroup
3065
3066! **************************************************************************************************
3067!> \brief A routine that gather the pieces of the KS matrix accross the subgroup and puts it in the
3068!> main group. Each b_img, iatom, jatom tuple is one a single CPU
3069!> \param ks_t ...
3070!> \param ks_t_sub ...
3071!> \param group_size ...
3072!> \param sparsity_pattern ...
3073!> \param para_env ...
3074!> \param ri_data ...
3075! **************************************************************************************************
3076 SUBROUTINE gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
3077 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t, ks_t_sub
3078 INTEGER, INTENT(IN) :: group_size
3079 INTEGER, DIMENSION(:, :, :), INTENT(IN) :: sparsity_pattern
3080 TYPE(mp_para_env_type), POINTER :: para_env
3081 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3082
3083 CHARACTER(len=*), PARAMETER :: routinen = 'gather_ks_matrix'
3084
3085 INTEGER :: b_img, dest, handle, i, i_spin, iatom, &
3086 igroup, ir, is, jatom, n_mess, natom, &
3087 nimg, nspins, source, tag
3088 LOGICAL :: found
3089 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
3090 TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
3091 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
3092
3093 CALL timeset(routinen, handle)
3094
3095 nimg = SIZE(sparsity_pattern, 3)
3096 natom = SIZE(sparsity_pattern, 2)
3097 nspins = SIZE(ks_t, 1)
3098
3099 DO b_img = 1, nimg
3100 n_mess = 0
3101 DO i_spin = 1, nspins
3102 DO jatom = 1, natom
3103 DO iatom = 1, natom
3104 IF (sparsity_pattern(iatom, jatom, b_img) > -1) n_mess = n_mess + 1
3105 END DO
3106 END DO
3107 END DO
3108
3109 ALLOCATE (send_buff(n_mess), recv_buff(n_mess))
3110 ALLOCATE (send_req(n_mess), recv_req(n_mess))
3111 ir = 0
3112 is = 0
3113 n_mess = 0
3114 tag = 0
3115
3116 DO i_spin = 1, nspins
3117 DO jatom = 1, natom
3118 DO iatom = 1, natom
3119 IF (sparsity_pattern(iatom, jatom, b_img) < 0) cycle
3120 n_mess = n_mess + 1
3121 tag = tag + 1
3122
3123 !sending the message
3124 CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3125 CALL dbt_get_stored_coordinates(ks_t_sub(i_spin, b_img), [iatom, jatom], source) !source within sub
3126 igroup = sparsity_pattern(iatom, jatom, b_img)
3127 source = source + igroup*group_size
3128 IF (para_env%mepos == source) THEN
3129 CALL dbt_get_block(ks_t_sub(i_spin, b_img), [iatom, jatom], blk, found)
3130 IF (source == dest) THEN
3131 IF (found) CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], shape(blk), blk)
3132 ELSE
3133 ALLOCATE (send_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3134 send_buff(n_mess)%array(:, :) = 0.0_dp
3135 IF (found) THEN
3136 send_buff(n_mess)%array(:, :) = blk(:, :)
3137 END IF
3138 is = is + 1
3139 CALL para_env%isend(msgin=send_buff(n_mess)%array, dest=dest, &
3140 request=send_req(is), tag=tag)
3141 END IF
3142 DEALLOCATE (blk)
3143 END IF
3144
3145 !receiving the message
3146 IF (para_env%mepos == dest .AND. source .NE. dest) THEN
3147 ALLOCATE (recv_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3148 ir = ir + 1
3149 CALL para_env%irecv(msgout=recv_buff(n_mess)%array, source=source, &
3150 request=recv_req(ir), tag=tag)
3151 END IF
3152 END DO !iatom
3153 END DO !jatom
3154 END DO !ispin
3155
3156 CALL mp_waitall(send_req(1:is))
3157 CALL mp_waitall(recv_req(1:ir))
3158
3159 !Copy the messages received into the KS matrix
3160 n_mess = 0
3161 DO i_spin = 1, nspins
3162 DO jatom = 1, natom
3163 DO iatom = 1, natom
3164 IF (sparsity_pattern(iatom, jatom, b_img) < 0) cycle
3165 n_mess = n_mess + 1
3166
3167 CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3168 IF (para_env%mepos == dest) THEN
3169 IF (.NOT. ASSOCIATED(recv_buff(n_mess)%array)) cycle
3170 ALLOCATE (blk(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3171 blk(:, :) = recv_buff(n_mess)%array(:, :)
3172 CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], shape(blk), blk)
3173 DEALLOCATE (blk)
3174 END IF
3175 END DO
3176 END DO
3177 END DO
3178
3179 !clean-up
3180 DO i = 1, n_mess
3181 IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
3182 IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
3183 END DO
3184 DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
3185 END DO !b_img
3186
3187 CALL timestop(handle)
3188
3189 END SUBROUTINE gather_ks_matrix
3190
3191! **************************************************************************************************
3192!> \brief copy all required 2c tensors from the main MPI group to the subgroups
3193!> \param mat_2c_pot ...
3194!> \param t_2c_work ...
3195!> \param t_2c_ao_tmp ...
3196!> \param ks_t_split ...
3197!> \param ks_t_sub ...
3198!> \param group_size ...
3199!> \param ngroups ...
3200!> \param para_env ...
3201!> \param para_env_sub ...
3202!> \param ri_data ...
3203! **************************************************************************************************
3204 SUBROUTINE get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
3205 group_size, ngroups, para_env, para_env_sub, ri_data)
3206 TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3207 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work, t_2c_ao_tmp, ks_t_split
3208 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t_sub
3209 INTEGER, INTENT(IN) :: group_size, ngroups
3210 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3211 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3212
3213 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_2c_tensors'
3214
3215 INTEGER :: handle, i, i_img, i_ri, i_spin, iproc, &
3216 j, natom, nblks, nimg, nspins
3217 INTEGER(int_8) :: nze
3218 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3219 dist1, dist2
3220 INTEGER, DIMENSION(2) :: pdims_2d
3221 INTEGER, DIMENSION(:), POINTER :: col_dist, ri_blk_size, row_dist
3222 INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3223 REAL(dp) :: occ
3224 TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3225 TYPE(dbt_pgrid_type) :: pgrid_2d
3226 TYPE(dbt_type) :: work, work_sub
3227
3228 CALL timeset(routinen, handle)
3229
3230 !Create the 2d pgrid
3231 pdims_2d = 0
3232 CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3233
3234 natom = SIZE(ri_data%bsizes_RI)
3235 nblks = SIZE(ri_data%bsizes_RI_split)
3236 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*natom))
3237 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks))
3238 DO i_ri = 1, ri_data%ncell_RI
3239 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
3240 bsizes_ri_ext_split((i_ri - 1)*nblks + 1:i_ri*nblks) = ri_data%bsizes_RI_split(:)
3241 END DO
3242
3243 !nRI x nRI 2c tensors
3244 CALL create_2c_tensor(t_2c_work(1), dist1, dist2, pgrid_2d, &
3245 bsizes_ri_ext, bsizes_ri_ext, &
3246 name="(RI | RI)")
3247 DEALLOCATE (dist1, dist2)
3248
3249 CALL create_2c_tensor(t_2c_work(2), dist1, dist2, pgrid_2d, &
3250 bsizes_ri_ext_split, bsizes_ri_ext_split, &
3251 name="(RI | RI)")
3252 DEALLOCATE (dist1, dist2)
3253
3254 !the AO based tensors
3255 CALL create_2c_tensor(ks_t_split(1), dist1, dist2, pgrid_2d, &
3256 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3257 name="(AO | AO)")
3258 DEALLOCATE (dist1, dist2)
3259 CALL dbt_create(ks_t_split(1), ks_t_split(2))
3260
3261 CALL create_2c_tensor(t_2c_ao_tmp(1), dist1, dist2, pgrid_2d, &
3262 ri_data%bsizes_AO, ri_data%bsizes_AO, &
3263 name="(AO | AO)")
3264 DEALLOCATE (dist1, dist2)
3265
3266 nspins = SIZE(ks_t_sub, 1)
3267 nimg = SIZE(ks_t_sub, 2)
3268 DO i_img = 1, nimg
3269 DO i_spin = 1, nspins
3270 CALL dbt_create(t_2c_ao_tmp(1), ks_t_sub(i_spin, i_img))
3271 END DO
3272 END DO
3273
3274 !Finally the HFX potential matrices
3275 !For now, we do a convoluted things where we go to tensors first, then back to matrices.
3276 CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3277 ri_data%bsizes_RI, ri_data%bsizes_RI, &
3278 name="(RI | RI)")
3279 CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3280
3281 ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3282 iproc = 0
3283 DO i = 0, pdims_2d(1) - 1
3284 DO j = 0, pdims_2d(2) - 1
3285 dbcsr_pgrid(i, j) = iproc
3286 iproc = iproc + 1
3287 END DO
3288 END DO
3289
3290 !We need to have the same exact 2d block dist as the tensors
3291 ALLOCATE (col_dist(natom), row_dist(natom))
3292 row_dist(:) = dist1(:)
3293 col_dist(:) = dist2(:)
3294
3295 ALLOCATE (ri_blk_size(natom))
3296 ri_blk_size(:) = ri_data%bsizes_RI(:)
3297
3298 CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3299 row_dist=row_dist, col_dist=col_dist)
3300 CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3301 row_blk_size=ri_blk_size, col_blk_size=ri_blk_size)
3302
3303 DO i_img = 1, nimg
3304 IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3305 CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3306 CALL get_tensor_occupancy(work, nze, occ)
3307 IF (nze == 0) cycle
3308
3309 CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3310 CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3311 CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3312 CALL dbt_clear(work_sub)
3313 END DO
3314
3315 CALL dbt_destroy(work)
3316 CALL dbt_destroy(work_sub)
3317 CALL dbt_pgrid_destroy(pgrid_2d)
3318 CALL dbcsr_distribution_release(dbcsr_dist_sub)
3319 DEALLOCATE (col_dist, row_dist, ri_blk_size, dbcsr_pgrid)
3320 CALL timestop(handle)
3321
3322 END SUBROUTINE get_subgroup_2c_tensors
3323
3324! **************************************************************************************************
3325!> \brief copy all required 3c tensors from the main MPI group to the subgroups
3326!> \param t_3c_int ...
3327!> \param t_3c_work_2 ...
3328!> \param t_3c_work_3 ...
3329!> \param t_3c_apc ...
3330!> \param t_3c_apc_sub ...
3331!> \param group_size ...
3332!> \param ngroups ...
3333!> \param para_env ...
3334!> \param para_env_sub ...
3335!> \param ri_data ...
3336! **************************************************************************************************
3337 SUBROUTINE get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
3338 group_size, ngroups, para_env, para_env_sub, ri_data)
3339 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_int, t_3c_work_2, t_3c_work_3
3340 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, t_3c_apc_sub
3341 INTEGER, INTENT(IN) :: group_size, ngroups
3342 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3343 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3344
3345 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_3c_tensors'
3346
3347 INTEGER :: batch_size, bo(2), handle, handle2, &
3348 i_blk, i_img, i_ri, i_spin, ib, natom, &
3349 nblks_ao, nblks_ri, nimg, nspins
3350 INTEGER(int_8) :: nze
3351 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3352 bsizes_stack, bsizes_tmp, dist1, &
3353 dist2, dist3, dist_stack, idx_to_at
3354 INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :) :: subgroup_dest
3355 INTEGER, DIMENSION(3) :: pdims
3356 REAL(dp) :: occ
3357 TYPE(dbt_distribution_type) :: t_dist
3358 TYPE(dbt_pgrid_type) :: pgrid
3359 TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3360
3361 CALL timeset(routinen, handle)
3362
3363 nblks_ri = SIZE(ri_data%bsizes_RI_split)
3364 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks_ri))
3365 DO i_ri = 1, ri_data%ncell_RI
3366 bsizes_ri_ext_split((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
3367 END DO
3368
3369 !Preparing larger block sizes for efficient communication (less, bigger messages)
3370 natom = SIZE(ri_data%bsizes_RI)
3371 nblks_ri = natom
3372 ALLOCATE (bsizes_tmp(nblks_ri))
3373 DO i_blk = 1, nblks_ri
3374 bo = get_limit(natom, nblks_ri, i_blk - 1)
3375 bsizes_tmp(i_blk) = sum(ri_data%bsizes_RI(bo(1):bo(2)))
3376 END DO
3377 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*nblks_ri))
3378 DO i_ri = 1, ri_data%ncell_RI
3379 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = bsizes_tmp(:)
3380 END DO
3381
3382 batch_size = ri_data%kp_stack_size
3383 nblks_ao = SIZE(ri_data%bsizes_AO_split)
3384 ALLOCATE (bsizes_stack(batch_size*nblks_ao))
3385 DO ib = 1, batch_size
3386 bsizes_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = ri_data%bsizes_AO_split(:)
3387 END DO
3388
3389 !Create the pgrid for the configuration correspoinding to ri_data%t_3c_int_ctr_3
3390 natom = SIZE(ri_data%bsizes_RI)
3391 pdims = 0
3392 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3393 tensor_dims=[SIZE(bsizes_ri_ext_split), 1, batch_size*SIZE(ri_data%bsizes_AO_split)])
3394
3395 !Create all required 3c tensors in that configuration
3396 CALL create_3c_tensor(t_3c_int(1), dist1, dist2, dist3, &
3397 pgrid, bsizes_ri_ext_split, ri_data%bsizes_AO_split, &
3398 ri_data%bsizes_AO_split, [1], [2, 3], name="(RI | AO AO)")
3399 nimg = SIZE(t_3c_int)
3400 DO i_img = 2, nimg
3401 CALL dbt_create(t_3c_int(1), t_3c_int(i_img))
3402 END DO
3403
3404 !The stacked work tensors, in a distribution that matches that of t_3c_int
3405 ALLOCATE (dist_stack(batch_size*nblks_ao))
3406 DO ib = 1, batch_size
3407 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3408 END DO
3409
3410 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3411 CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3412 bsizes_ri_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3413 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3414 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3415 CALL dbt_distribution_destroy(t_dist)
3416 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3417
3418 !For more efficient communication, we use intermediate tensors with larger block size
3419 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3420 pgrid, bsizes_ri_ext, ri_data%bsizes_AO, &
3421 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3422 DEALLOCATE (dist1, dist2, dist3)
3423
3424 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3425 ri_data%pgrid, bsizes_ri_ext, ri_data%bsizes_AO, &
3426 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3427 DEALLOCATE (dist1, dist2, dist3)
3428
3429 CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3430 group_size, ngroups, para_env)
3431
3432 !Finally copy the integrals into the subgroups (if not there already)
3433 CALL timeset(routinen//"_ints", handle2)
3434 IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
3435 DO i_img = 1, nimg
3436 CALL dbt_copy(ri_data%kp_t_3c_int(i_img), t_3c_int(i_img), move_data=.true.)
3437 END DO
3438 ELSE
3439 ALLOCATE (ri_data%kp_t_3c_int(nimg))
3440 DO i_img = 1, nimg
3441 CALL dbt_create(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img))
3442 CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, i_img), nze, occ)
3443 IF (nze == 0) cycle
3444 CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, i_img), work_atom_block, order=[2, 1, 3])
3445 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3446 ngroups, para_env, subgroup_dest)
3447 CALL dbt_copy(work_atom_block_sub, t_3c_int(i_img), move_data=.true.)
3448 CALL dbt_filter(t_3c_int(i_img), ri_data%filter_eps)
3449 END DO
3450 END IF
3451 CALL timestop(handle2)
3452 CALL dbt_pgrid_destroy(pgrid)
3453 CALL dbt_destroy(work_atom_block)
3454 CALL dbt_destroy(work_atom_block_sub)
3455 DEALLOCATE (subgroup_dest)
3456
3457 !Do the same for the t_3c_ctr_2 configuration
3458 pdims = 0
3459 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3460 tensor_dims=[1, SIZE(bsizes_ri_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3461
3462 !For more efficient communication, we use intermediate tensors with larger block size
3463 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3464 pgrid, ri_data%bsizes_AO, bsizes_ri_ext, &
3465 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3466 DEALLOCATE (dist1, dist2, dist3)
3467
3468 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3469 ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_ri_ext, &
3470 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3471 DEALLOCATE (dist1, dist2, dist3)
3472
3473 CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3474 group_size, ngroups, para_env)
3475
3476 !template for t_3c_apc_sub
3477 CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3478 pgrid, ri_data%bsizes_AO_split, bsizes_ri_ext_split, &
3479 ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3480
3481 !create t_3c_work_2 tensors in a distribution that matches the above
3482 ALLOCATE (dist_stack(batch_size*nblks_ao))
3483 DO ib = 1, batch_size
3484 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3485 END DO
3486
3487 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3488 CALL dbt_create(t_3c_work_2(1), "work_2_stack", t_dist, [1], [2, 3], &
3489 ri_data%bsizes_AO_split, bsizes_ri_ext_split, bsizes_stack)
3490 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3491 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3492 CALL dbt_distribution_destroy(t_dist)
3493 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3494
3495 !Finally copy data from t_3c_apc to the subgroups
3496 ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3497 CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3498 nspins = SIZE(t_3c_apc, 1)
3499 CALL timeset(routinen//"_apc", handle2)
3500 DO i_img = 1, nimg
3501 DO i_spin = 1, nspins
3502 CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3503 CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3504 IF (nze == 0) cycle
3505 CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.true.)
3506 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, ngroups, para_env, &
3507 subgroup_dest, ri_data%iatom_to_subgroup, 1, idx_to_at)
3508 CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.true.)
3509 CALL dbt_filter(t_3c_apc_sub(i_spin, i_img), ri_data%filter_eps)
3510 END DO
3511 DO i_spin = 1, nspins
3512 CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3513 END DO
3514 END DO
3515 CALL timestop(handle2)
3516 CALL dbt_pgrid_destroy(pgrid)
3517 CALL dbt_destroy(tmp)
3518 CALL dbt_destroy(work_atom_block)
3519 CALL dbt_destroy(work_atom_block_sub)
3520
3521 CALL timestop(handle)
3522
3523 END SUBROUTINE get_subgroup_3c_tensors
3524
3525! **************************************************************************************************
3526!> \brief copy all required 2c force tensors from the main MPI group to the subgroups
3527!> \param t_2c_inv ...
3528!> \param t_2c_bint ...
3529!> \param t_2c_metric ...
3530!> \param mat_2c_pot ...
3531!> \param t_2c_work ...
3532!> \param rho_ao_t ...
3533!> \param rho_ao_t_sub ...
3534!> \param t_2c_der_metric ...
3535!> \param t_2c_der_metric_sub ...
3536!> \param mat_der_pot ...
3537!> \param mat_der_pot_sub ...
3538!> \param group_size ...
3539!> \param ngroups ...
3540!> \param para_env ...
3541!> \param para_env_sub ...
3542!> \param ri_data ...
3543!> \note Main MPI group tensors are deleted within this routine, for memory optimization
3544! **************************************************************************************************
3545 SUBROUTINE get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
3546 rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
3547 mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
3548 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_inv, t_2c_bint, t_2c_metric
3549 TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3550 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work
3551 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
3552 t_2c_der_metric_sub
3553 TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot, mat_der_pot_sub
3554 INTEGER, INTENT(IN) :: group_size, ngroups
3555 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3556 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3557
3558 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_2c_derivs'
3559
3560 INTEGER :: handle, i, i_img, i_ri, i_spin, i_xyz, &
3561 iatom, iproc, j, natom, nblks, nimg, &
3562 nspins
3563 INTEGER(int_8) :: nze
3564 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3565 dist1, dist2
3566 INTEGER, DIMENSION(2) :: pdims_2d
3567 INTEGER, DIMENSION(:), POINTER :: col_dist, ri_blk_size, row_dist
3568 INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3569 REAL(dp) :: occ
3570 TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3571 TYPE(dbt_pgrid_type) :: pgrid_2d
3572 TYPE(dbt_type) :: work, work_sub
3573
3574 CALL timeset(routinen, handle)
3575
3576 !Note: a fair portion of this routine is copied from the energy version of it
3577 !Create the 2d pgrid
3578 pdims_2d = 0
3579 CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3580
3581 natom = SIZE(ri_data%bsizes_RI)
3582 nblks = SIZE(ri_data%bsizes_RI_split)
3583 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*natom))
3584 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks))
3585 DO i_ri = 1, ri_data%ncell_RI
3586 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
3587 bsizes_ri_ext_split((i_ri - 1)*nblks + 1:i_ri*nblks) = ri_data%bsizes_RI_split(:)
3588 END DO
3589
3590 !nRI x nRI 2c tensors
3591 CALL create_2c_tensor(t_2c_inv(1), dist1, dist2, pgrid_2d, &
3592 bsizes_ri_ext, bsizes_ri_ext, &
3593 name="(RI | RI)")
3594 DEALLOCATE (dist1, dist2)
3595
3596 CALL dbt_create(t_2c_inv(1), t_2c_bint(1))
3597 CALL dbt_create(t_2c_inv(1), t_2c_metric(1))
3598 DO iatom = 2, natom
3599 CALL dbt_create(t_2c_inv(1), t_2c_inv(iatom))
3600 CALL dbt_create(t_2c_inv(1), t_2c_bint(iatom))
3601 CALL dbt_create(t_2c_inv(1), t_2c_metric(iatom))
3602 END DO
3603 CALL dbt_create(t_2c_inv(1), t_2c_work(1))
3604 CALL dbt_create(t_2c_inv(1), t_2c_work(2))
3605 CALL dbt_create(t_2c_inv(1), t_2c_work(3))
3606 CALL dbt_create(t_2c_inv(1), t_2c_work(4))
3607
3608 CALL create_2c_tensor(t_2c_work(5), dist1, dist2, pgrid_2d, &
3609 bsizes_ri_ext_split, bsizes_ri_ext_split, &
3610 name="(RI | RI)")
3611 DEALLOCATE (dist1, dist2)
3612
3613 !copy the data from the main group.
3614 DO iatom = 1, natom
3615 CALL copy_2c_to_subgroup(t_2c_inv(iatom), ri_data%t_2c_inv(1, iatom), group_size, ngroups, para_env)
3616 CALL copy_2c_to_subgroup(t_2c_bint(iatom), ri_data%t_2c_int(1, iatom), group_size, ngroups, para_env)
3617 CALL copy_2c_to_subgroup(t_2c_metric(iatom), ri_data%t_2c_pot(1, iatom), group_size, ngroups, para_env)
3618 END DO
3619
3620 !This includes the derivatives of the RI metric, for which there is one per atom
3621 DO i_xyz = 1, 3
3622 DO iatom = 1, natom
3623 CALL dbt_create(t_2c_inv(1), t_2c_der_metric_sub(iatom, i_xyz))
3624 CALL copy_2c_to_subgroup(t_2c_der_metric_sub(iatom, i_xyz), t_2c_der_metric(iatom, i_xyz), &
3625 group_size, ngroups, para_env)
3626 CALL dbt_destroy(t_2c_der_metric(iatom, i_xyz))
3627 END DO
3628 END DO
3629
3630 !AO x AO 2c tensors
3631 CALL create_2c_tensor(rho_ao_t_sub(1, 1), dist1, dist2, pgrid_2d, &
3632 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3633 name="(AO | AO)")
3634 DEALLOCATE (dist1, dist2)
3635 nspins = SIZE(rho_ao_t, 1)
3636 nimg = SIZE(rho_ao_t, 2)
3637
3638 DO i_img = 1, nimg
3639 DO i_spin = 1, nspins
3640 IF (.NOT. (i_img == 1 .AND. i_spin == 1)) &
3641 CALL dbt_create(rho_ao_t_sub(1, 1), rho_ao_t_sub(i_spin, i_img))
3642 CALL copy_2c_to_subgroup(rho_ao_t_sub(i_spin, i_img), rho_ao_t(i_spin, i_img), &
3643 group_size, ngroups, para_env)
3644 CALL dbt_destroy(rho_ao_t(i_spin, i_img))
3645 END DO
3646 END DO
3647
3648 !The RIxRI matrices, going through tensors
3649 CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3650 ri_data%bsizes_RI, ri_data%bsizes_RI, &
3651 name="(RI | RI)")
3652 CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3653
3654 ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3655 iproc = 0
3656 DO i = 0, pdims_2d(1) - 1
3657 DO j = 0, pdims_2d(2) - 1
3658 dbcsr_pgrid(i, j) = iproc
3659 iproc = iproc + 1
3660 END DO
3661 END DO
3662
3663 !We need to have the same exact 2d block dist as the tensors
3664 ALLOCATE (col_dist(natom), row_dist(natom))
3665 row_dist(:) = dist1(:)
3666 col_dist(:) = dist2(:)
3667
3668 ALLOCATE (ri_blk_size(natom))
3669 ri_blk_size(:) = ri_data%bsizes_RI(:)
3670
3671 CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3672 row_dist=row_dist, col_dist=col_dist)
3673 CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3674 row_blk_size=ri_blk_size, col_blk_size=ri_blk_size)
3675
3676 !The HFX potential
3677 DO i_img = 1, nimg
3678 IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3679 CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3680 CALL get_tensor_occupancy(work, nze, occ)
3681 IF (nze == 0) cycle
3682
3683 CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3684 CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3685 CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3686 CALL dbt_clear(work_sub)
3687 END DO
3688
3689 !The derivatives of the HFX potential
3690 DO i_xyz = 1, 3
3691 DO i_img = 1, nimg
3692 CALL dbcsr_create(mat_der_pot_sub(i_img, i_xyz), template=mat_2c_pot(1))
3693 CALL dbt_copy_matrix_to_tensor(mat_der_pot(i_img, i_xyz), work)
3694 CALL dbcsr_release(mat_der_pot(i_img, i_xyz))
3695 CALL get_tensor_occupancy(work, nze, occ)
3696 IF (nze == 0) cycle
3697
3698 CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3699 CALL dbt_copy_tensor_to_matrix(work_sub, mat_der_pot_sub(i_img, i_xyz))
3700 CALL dbcsr_filter(mat_der_pot_sub(i_img, i_xyz), ri_data%filter_eps)
3701 CALL dbt_clear(work_sub)
3702 END DO
3703 END DO
3704
3705 CALL dbt_destroy(work)
3706 CALL dbt_destroy(work_sub)
3707 CALL dbt_pgrid_destroy(pgrid_2d)
3708 CALL dbcsr_distribution_release(dbcsr_dist_sub)
3709 DEALLOCATE (col_dist, row_dist, ri_blk_size, dbcsr_pgrid)
3710
3711 CALL timestop(handle)
3712
3713 END SUBROUTINE get_subgroup_2c_derivs
3714
3715! **************************************************************************************************
3716!> \brief copy all required 3c derivative tensors from the main MPI group to the subgroups
3717!> \param t_3c_work_2 ...
3718!> \param t_3c_work_3 ...
3719!> \param t_3c_der_AO ...
3720!> \param t_3c_der_AO_sub ...
3721!> \param t_3c_der_RI ...
3722!> \param t_3c_der_RI_sub ...
3723!> \param t_3c_apc ...
3724!> \param t_3c_apc_sub ...
3725!> \param t_3c_der_stack ...
3726!> \param group_size ...
3727!> \param ngroups ...
3728!> \param para_env ...
3729!> \param para_env_sub ...
3730!> \param ri_data ...
3731!> \note the tensor containing the derivatives in the main MPI group are deleted for memory
3732! **************************************************************************************************
3733 SUBROUTINE get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_AO, t_3c_der_AO_sub, &
3734 t_3c_der_RI, t_3c_der_RI_sub, t_3c_apc, t_3c_apc_sub, &
3735 t_3c_der_stack, group_size, ngroups, para_env, para_env_sub, &
3736 ri_data)
3737 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_work_2, t_3c_work_3
3738 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_ao, t_3c_der_ao_sub, &
3739 t_3c_der_ri, t_3c_der_ri_sub, &
3740 t_3c_apc, t_3c_apc_sub
3741 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_der_stack
3742 INTEGER, INTENT(IN) :: group_size, ngroups
3743 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3744 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3745
3746 CHARACTER(len=*), PARAMETER :: routinen = 'get_subgroup_3c_derivs'
3747
3748 INTEGER :: batch_size, handle, i_img, i_ri, i_spin, &
3749 i_xyz, ib, nblks_ao, nblks_ri, nimg, &
3750 nspins, pdims(3)
3751 INTEGER(int_8) :: nze
3752 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, &
3753 bsizes_stack, dist1, dist2, dist3, &
3754 dist_stack, idx_to_at
3755 INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :) :: subgroup_dest
3756 REAL(dp) :: occ
3757 TYPE(dbt_distribution_type) :: t_dist
3758 TYPE(dbt_pgrid_type) :: pgrid
3759 TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3760
3761 CALL timeset(routinen, handle)
3762
3763 !We use intermediate tensors with larger block size for more optimized communication
3764 nblks_ri = SIZE(ri_data%bsizes_RI)
3765 ALLOCATE (bsizes_ri_ext(ri_data%ncell_RI*nblks_ri))
3766 DO i_ri = 1, ri_data%ncell_RI
3767 bsizes_ri_ext((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI(:)
3768 END DO
3769
3770 CALL dbt_get_info(ri_data%kp_t_3c_int(1), pdims=pdims)
3771 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3772
3773 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3774 pgrid, bsizes_ri_ext, ri_data%bsizes_AO, &
3775 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3776 DEALLOCATE (dist1, dist2, dist3)
3777
3778 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3779 ri_data%pgrid_2, bsizes_ri_ext, ri_data%bsizes_AO, &
3780 ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3781 DEALLOCATE (dist1, dist2, dist3)
3782 CALL dbt_pgrid_destroy(pgrid)
3783
3784 CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3785 group_size, ngroups, para_env)
3786
3787 !We use the 3c integrals on the subgroup as template for the derivatives
3788 nimg = ri_data%nimg
3789 DO i_xyz = 1, 3
3790 DO i_img = 1, nimg
3791 CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_ao_sub(i_img, i_xyz))
3792 CALL get_tensor_occupancy(t_3c_der_ao(i_img, i_xyz), nze, occ)
3793 IF (nze == 0) cycle
3794
3795 CALL dbt_copy(t_3c_der_ao(i_img, i_xyz), work_atom_block, move_data=.true.)
3796 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3797 ngroups, para_env, subgroup_dest)
3798 CALL dbt_copy(work_atom_block_sub, t_3c_der_ao_sub(i_img, i_xyz), move_data=.true.)
3799 CALL dbt_filter(t_3c_der_ao_sub(i_img, i_xyz), ri_data%filter_eps)
3800 END DO
3801
3802 DO i_img = 1, nimg
3803 CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_ri_sub(i_img, i_xyz))
3804 CALL get_tensor_occupancy(t_3c_der_ri(i_img, i_xyz), nze, occ)
3805 IF (nze == 0) cycle
3806
3807 CALL dbt_copy(t_3c_der_ri(i_img, i_xyz), work_atom_block, move_data=.true.)
3808 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3809 ngroups, para_env, subgroup_dest)
3810 CALL dbt_copy(work_atom_block_sub, t_3c_der_ri_sub(i_img, i_xyz), move_data=.true.)
3811 CALL dbt_filter(t_3c_der_ri_sub(i_img, i_xyz), ri_data%filter_eps)
3812 END DO
3813
3814 DO i_img = 1, nimg
3815 CALL dbt_destroy(t_3c_der_ri(i_img, i_xyz))
3816 CALL dbt_destroy(t_3c_der_ao(i_img, i_xyz))
3817 END DO
3818 END DO
3819 CALL dbt_destroy(work_atom_block_sub)
3820 CALL dbt_destroy(work_atom_block)
3821 DEALLOCATE (subgroup_dest)
3822
3823 !Deal with t_3c_apc
3824 nblks_ri = SIZE(ri_data%bsizes_RI_split)
3825 ALLOCATE (bsizes_ri_ext_split(ri_data%ncell_RI*nblks_ri))
3826 DO i_ri = 1, ri_data%ncell_RI
3827 bsizes_ri_ext_split((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
3828 END DO
3829
3830 pdims = 0
3831 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3832 tensor_dims=[1, SIZE(bsizes_ri_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3833
3834 CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3835 pgrid, ri_data%bsizes_AO, bsizes_ri_ext, &
3836 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3837 DEALLOCATE (dist1, dist2, dist3)
3838
3839 CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3840 ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_ri_ext, &
3841 ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3842 DEALLOCATE (dist1, dist2, dist3)
3843
3844 CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3845 pgrid, ri_data%bsizes_AO_split, bsizes_ri_ext_split, &
3846 ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3847 DEALLOCATE (dist1, dist2, dist3)
3848
3849 CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3850 group_size, ngroups, para_env)
3851
3852 ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3853 CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3854 nspins = SIZE(t_3c_apc, 1)
3855 DO i_img = 1, nimg
3856 DO i_spin = 1, nspins
3857 CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3858 CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3859 IF (nze == 0) cycle
3860 CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.true.)
3861 CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, ngroups, para_env, &
3862 subgroup_dest, ri_data%iatom_to_subgroup, 1, idx_to_at)
3863 CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.true.)
3864 CALL dbt_filter(t_3c_apc_sub(i_spin, i_img), ri_data%filter_eps)
3865 END DO
3866 DO i_spin = 1, nspins
3867 CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3868 END DO
3869 END DO
3870 CALL dbt_destroy(tmp)
3871 CALL dbt_destroy(work_atom_block)
3872 CALL dbt_destroy(work_atom_block_sub)
3873 CALL dbt_pgrid_destroy(pgrid)
3874
3875 !t_3c_work_3 based on structure of 3c integrals/derivs
3876 batch_size = ri_data%kp_stack_size
3877 nblks_ao = SIZE(ri_data%bsizes_AO_split)
3878 ALLOCATE (bsizes_stack(batch_size*nblks_ao))
3879 DO ib = 1, batch_size
3880 bsizes_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = ri_data%bsizes_AO_split(:)
3881 END DO
3882
3883 ALLOCATE (dist1(ri_data%ncell_RI*nblks_ri), dist2(nblks_ao), dist3(nblks_ao))
3884 CALL dbt_get_info(ri_data%kp_t_3c_int(1), proc_dist_1=dist1, proc_dist_2=dist2, &
3885 proc_dist_3=dist3, pdims=pdims)
3886
3887 ALLOCATE (dist_stack(batch_size*nblks_ao))
3888 DO ib = 1, batch_size
3889 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3890 END DO
3891
3892 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3893 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3894 CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3895 bsizes_ri_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3896 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3897 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3898 CALL dbt_create(t_3c_work_3(1), t_3c_work_3(4))
3899 CALL dbt_distribution_destroy(t_dist)
3900 CALL dbt_pgrid_destroy(pgrid)
3901 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3902
3903 !the derivatives are stacked in the same way
3904 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(1))
3905 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(2))
3906 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(3))
3907 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(4))
3908 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(5))
3909 CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(6))
3910
3911 !t_3c_work_2 based on structure of t_3c_apc
3912 ALLOCATE (dist1(nblks_ao), dist2(ri_data%ncell_RI*nblks_ri), dist3(nblks_ao))
3913 CALL dbt_get_info(t_3c_apc_sub(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, &
3914 proc_dist_3=dist3, pdims=pdims)
3915
3916 ALLOCATE (dist_stack(batch_size*nblks_ao))
3917 DO ib = 1, batch_size
3918 dist_stack((ib - 1)*nblks_ao + 1:ib*nblks_ao) = dist3(:)
3919 END DO
3920
3921 CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3922 CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3923 CALL dbt_create(t_3c_work_2(1), "work_3_stack", t_dist, [1], [2, 3], &
3924 ri_data%bsizes_AO_split, bsizes_ri_ext_split, bsizes_stack)
3925 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3926 CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3927 CALL dbt_distribution_destroy(t_dist)
3928 CALL dbt_pgrid_destroy(pgrid)
3929 DEALLOCATE (dist1, dist2, dist3, dist_stack)
3930
3931 CALL timestop(handle)
3932
3933 END SUBROUTINE get_subgroup_3c_derivs
3934
3935! **************************************************************************************************
3936!> \brief A routine that reorders the t_3c_int tensors such that all items which are fully empty
3937!> are bunched together. This way, we can get much more efficient screening based on NZE
3938!> \param t_3c_ints ...
3939!> \param ri_data ...
3940! **************************************************************************************************
3941 SUBROUTINE reorder_3c_ints(t_3c_ints, ri_data)
3942 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_ints
3943 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3944
3945 CHARACTER(LEN=*), PARAMETER :: routinen = 'reorder_3c_ints'
3946
3947 INTEGER :: handle, i_img, idx, idx_empty, idx_full, &
3948 nimg
3949 INTEGER(int_8) :: nze
3950 REAL(dp) :: occ
3951 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
3952
3953 CALL timeset(routinen, handle)
3954
3955 nimg = ri_data%nimg
3956 ALLOCATE (t_3c_tmp(nimg))
3957 DO i_img = 1, nimg
3958 CALL dbt_create(t_3c_ints(i_img), t_3c_tmp(i_img))
3959 CALL dbt_copy(t_3c_ints(i_img), t_3c_tmp(i_img), move_data=.true.)
3960 END DO
3961
3962 !Loop over the images, check if ints have NZE == 0, and put them at the start or end of the
3963 !initial tensor array. Keep the mapping in an array
3964 ALLOCATE (ri_data%idx_to_img(nimg))
3965 idx_full = 0
3966 idx_empty = nimg + 1
3967
3968 DO i_img = 1, nimg
3969 CALL get_tensor_occupancy(t_3c_tmp(i_img), nze, occ)
3970 IF (nze == 0) THEN
3971 idx_empty = idx_empty - 1
3972 CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_empty), move_data=.true.)
3973 ri_data%idx_to_img(idx_empty) = i_img
3974 ELSE
3975 idx_full = idx_full + 1
3976 CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_full), move_data=.true.)
3977 ri_data%idx_to_img(idx_full) = i_img
3978 END IF
3979 CALL dbt_destroy(t_3c_tmp(i_img))
3980 END DO
3981
3982 !store the highest image index with non-zero integrals
3983 ri_data%nimg_nze = idx_full
3984
3985 ALLOCATE (ri_data%img_to_idx(nimg))
3986 DO idx = 1, nimg
3987 ri_data%img_to_idx(ri_data%idx_to_img(idx)) = idx
3988 END DO
3989
3990 CALL timestop(handle)
3991
3992 END SUBROUTINE reorder_3c_ints
3993
3994! **************************************************************************************************
3995!> \brief A routine that reorders the 3c derivatives, the same way that the integrals are, also to
3996!> increase efficiency of screening
3997!> \param t_3c_derivs ...
3998!> \param ri_data ...
3999! **************************************************************************************************
4000 SUBROUTINE reorder_3c_derivs(t_3c_derivs, ri_data)
4001 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_derivs
4002 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4003
4004 CHARACTER(LEN=*), PARAMETER :: routinen = 'reorder_3c_derivs'
4005
4006 INTEGER :: handle, i_img, i_xyz, idx, nimg
4007 INTEGER(int_8) :: nze
4008 REAL(dp) :: occ
4009 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
4010
4011 CALL timeset(routinen, handle)
4012
4013 nimg = ri_data%nimg
4014 ALLOCATE (t_3c_tmp(nimg))
4015 DO i_img = 1, nimg
4016 CALL dbt_create(t_3c_derivs(1, 1), t_3c_tmp(i_img))
4017 END DO
4018
4019 DO i_xyz = 1, 3
4020 DO i_img = 1, nimg
4021 CALL dbt_copy(t_3c_derivs(i_img, i_xyz), t_3c_tmp(i_img), move_data=.true.)
4022 END DO
4023 DO i_img = 1, nimg
4024 idx = ri_data%img_to_idx(i_img)
4025 CALL dbt_copy(t_3c_tmp(i_img), t_3c_derivs(idx, i_xyz), move_data=.true.)
4026 CALL get_tensor_occupancy(t_3c_derivs(idx, i_xyz), nze, occ)
4027 IF (nze > 0) ri_data%nimg_nze = max(idx, ri_data%nimg_nze)
4028 END DO
4029 END DO
4030
4031 DO i_img = 1, nimg
4032 CALL dbt_destroy(t_3c_tmp(i_img))
4033 END DO
4034
4035 CALL timestop(handle)
4036
4037 END SUBROUTINE reorder_3c_derivs
4038
4039! **************************************************************************************************
4040!> \brief Get the sparsity pattern related to the non-symmetric AO basis overlap neighbor list
4041!> \param pattern ...
4042!> \param ri_data ...
4043!> \param qs_env ...
4044! **************************************************************************************************
4045 SUBROUTINE get_sparsity_pattern(pattern, ri_data, qs_env)
4046 INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: pattern
4047 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4048 TYPE(qs_environment_type), POINTER :: qs_env
4049
4050 INTEGER :: iatom, j_img, jatom, mj_img, natom, nimg
4051 INTEGER, ALLOCATABLE, DIMENSION(:) :: bins
4052 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: tmp_pattern
4053 INTEGER, DIMENSION(3) :: cell_j
4054 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
4055 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
4056 TYPE(dft_control_type), POINTER :: dft_control
4057 TYPE(kpoint_type), POINTER :: kpoints
4058 TYPE(mp_para_env_type), POINTER :: para_env
4060 DIMENSION(:), POINTER :: nl_iterator
4061 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4062 POINTER :: nl_2c
4063
4064 NULLIFY (nl_2c, nl_iterator, kpoints, cell_to_index, dft_control, index_to_cell, para_env)
4065
4066 CALL get_qs_env(qs_env, kpoints=kpoints, dft_control=dft_control, para_env=para_env, natom=natom)
4067 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell, sab_nl=nl_2c)
4068
4069 nimg = ri_data%nimg
4070 pattern(:, :, :) = 0
4071
4072 !We use the symmetric nl for all images that have an opposite cell
4073 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4074 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4075 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
4076
4077 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4078 IF (j_img > nimg .OR. j_img < 1) cycle
4079
4080 mj_img = get_opp_index(j_img, qs_env)
4081 IF (mj_img > nimg .OR. mj_img < 1) cycle
4082
4083 IF (ri_data%present_images(j_img) == 0) cycle
4084
4085 pattern(iatom, jatom, j_img) = 1
4086 END DO
4087 CALL neighbor_list_iterator_release(nl_iterator)
4088
4089 !If there is no opposite cell present, then we take into account the non-symmetric nl
4090 CALL get_kpoint_info(kpoints, sab_nl_nosym=nl_2c)
4091
4092 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4093 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4094 CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
4095
4096 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4097 IF (j_img > nimg .OR. j_img < 1) cycle
4098
4099 mj_img = get_opp_index(j_img, qs_env)
4100 IF (mj_img .LE. nimg .AND. mj_img > 0) cycle
4101
4102 IF (ri_data%present_images(j_img) == 0) cycle
4103
4104 pattern(iatom, jatom, j_img) = 1
4105 END DO
4106 CALL neighbor_list_iterator_release(nl_iterator)
4107
4108 CALL para_env%sum(pattern)
4109
4110 !If the opposite image is considered, then there is no need to compute diagonal twice
4111 DO j_img = 2, nimg
4112 DO iatom = 1, natom
4113 IF (pattern(iatom, iatom, j_img) .NE. 0) THEN
4114 mj_img = get_opp_index(j_img, qs_env)
4115 IF (mj_img > nimg .OR. mj_img < 1) cycle
4116 pattern(iatom, iatom, mj_img) = 0
4117 END IF
4118 END DO
4119 END DO
4120
4121 ! We want to equilibrate the sparsity pattern such that there are same amount of blocks
4122 ! for each atom i of i,j pairs
4123 ALLOCATE (bins(natom))
4124 bins(:) = 0
4125
4126 ALLOCATE (tmp_pattern(natom, natom, nimg))
4127 tmp_pattern(:, :, :) = 0
4128 DO j_img = 1, nimg
4129 DO jatom = 1, natom
4130 DO iatom = 1, natom
4131 IF (pattern(iatom, jatom, j_img) == 0) cycle
4132 mj_img = get_opp_index(j_img, qs_env)
4133
4134 !Should we take the i,j,b or th j,i,-b atomic block?
4135 IF (mj_img > nimg .OR. mj_img < 1) THEN
4136 !No opposite image, no choice
4137 bins(iatom) = bins(iatom) + 1
4138 tmp_pattern(iatom, jatom, j_img) = 1
4139 ELSE
4140
4141 IF (bins(iatom) > bins(jatom)) THEN
4142 bins(jatom) = bins(jatom) + 1
4143 tmp_pattern(jatom, iatom, mj_img) = 1
4144 ELSE
4145 bins(iatom) = bins(iatom) + 1
4146 tmp_pattern(iatom, jatom, j_img) = 1
4147 END IF
4148 END IF
4149 END DO
4150 END DO
4151 END DO
4152
4153 ! -1 => unoccupied, 0 => occupied
4154 pattern(:, :, :) = tmp_pattern(:, :, :) - 1
4155
4156 END SUBROUTINE get_sparsity_pattern
4157
4158! **************************************************************************************************
4159!> \brief Distribute the iatom, jatom, b_img triplet over the subgroupd to spread the load
4160!> the group id for each triplet is passed as the value of sparsity_pattern(i, j, b),
4161!> with -1 being an unoccupied block
4162!> \param sparsity_pattern ...
4163!> \param ngroups ...
4164!> \param ri_data ...
4165! **************************************************************************************************
4166 SUBROUTINE get_sub_dist(sparsity_pattern, ngroups, ri_data)
4167 INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: sparsity_pattern
4168 INTEGER, INTENT(IN) :: ngroups
4169 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4170
4171 INTEGER :: b_img, ctr, iat, iatom, igroup, jatom, &
4172 natom, nimg, ub
4173 INTEGER, ALLOCATABLE, DIMENSION(:) :: max_at_per_group
4174 REAL(dp) :: cost
4175 REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4176
4177 natom = SIZE(sparsity_pattern, 2)
4178 nimg = SIZE(sparsity_pattern, 3)
4179
4180 !To avoid unnecessary data replication accross the subgroups, we want to have a limited number
4181 !of subgroup with the data of a given iatom. At the minimum, all groups have 1 atom
4182 !We assume that the cost associated to each iatom is roughly the same
4183 IF (.NOT. ALLOCATED(ri_data%iatom_to_subgroup)) THEN
4184 ALLOCATE (ri_data%iatom_to_subgroup(natom), max_at_per_group(ngroups))
4185 DO iatom = 1, natom
4186 NULLIFY (ri_data%iatom_to_subgroup(iatom)%array)
4187 ALLOCATE (ri_data%iatom_to_subgroup(iatom)%array(ngroups))
4188 ri_data%iatom_to_subgroup(iatom)%array(:) = .false.
4189 END DO
4190
4191 ub = natom/ngroups
4192 IF (ub*ngroups < natom) ub = ub + 1
4193 max_at_per_group(:) = max(1, ub)
4194
4195 !We want each atom to be present the same amount of times. Some groups might have more atoms
4196 !than other to achieve this.
4197 ctr = 0
4198 DO WHILE (modulo(sum(max_at_per_group), natom) .NE. 0)
4199 igroup = modulo(ctr, ngroups) + 1
4200 max_at_per_group(igroup) = max_at_per_group(igroup) + 1
4201 ctr = ctr + 1
4202 END DO
4203
4204 ctr = 0
4205 DO igroup = 1, ngroups
4206 DO iat = 1, max_at_per_group(igroup)
4207 iatom = modulo(ctr, natom) + 1
4208 ri_data%iatom_to_subgroup(iatom)%array(igroup) = .true.
4209 ctr = ctr + 1
4210 END DO
4211 END DO
4212 END IF
4213
4214 ALLOCATE (bins(ngroups))
4215 bins = 0.0_dp
4216 DO b_img = 1, nimg
4217 DO jatom = 1, natom
4218 DO iatom = 1, natom
4219 IF (sparsity_pattern(iatom, jatom, b_img) == -1) cycle
4220 igroup = minloc(bins, 1, mask=ri_data%iatom_to_subgroup(iatom)%array) - 1
4221
4222 !Use cost information from previous SCF if available
4223 IF (any(ri_data%kp_cost > epsilon(0.0_dp))) THEN
4224 cost = ri_data%kp_cost(iatom, jatom, b_img)
4225 ELSE
4226 cost = real(ri_data%bsizes_AO(iatom)*ri_data%bsizes_AO(jatom), dp)
4227 END IF
4228 bins(igroup + 1) = bins(igroup + 1) + cost
4229 sparsity_pattern(iatom, jatom, b_img) = igroup
4230 END DO
4231 END DO
4232 END DO
4233
4234 END SUBROUTINE get_sub_dist
4235
4236! **************************************************************************************************
4237!> \brief A rouine that updates the sparsity pattern for force calculation, where all i,j,b combinations
4238!> are visited.
4239!> \param force_pattern ...
4240!> \param scf_pattern ...
4241!> \param ngroups ...
4242!> \param ri_data ...
4243!> \param qs_env ...
4244! **************************************************************************************************
4245 SUBROUTINE update_pattern_to_forces(force_pattern, scf_pattern, ngroups, ri_data, qs_env)
4246 INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: force_pattern, scf_pattern
4247 INTEGER, INTENT(IN) :: ngroups
4248 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4249 TYPE(qs_environment_type), POINTER :: qs_env
4250
4251 INTEGER :: b_img, iatom, igroup, jatom, mb_img, &
4252 natom, nimg
4253 REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4254
4255 natom = SIZE(scf_pattern, 2)
4256 nimg = SIZE(scf_pattern, 3)
4257
4258 ALLOCATE (bins(ngroups))
4259 bins = 0.0_dp
4260
4261 DO b_img = 1, nimg
4262 mb_img = get_opp_index(b_img, qs_env)
4263 DO jatom = 1, natom
4264 DO iatom = 1, natom
4265 !Important: same distribution as KS matrix, because reuse t_3c_apc
4266 igroup = minloc(bins, 1, mask=ri_data%iatom_to_subgroup(iatom)%array) - 1
4267
4268 !check that block not already treated
4269 IF (scf_pattern(iatom, jatom, b_img) > -1) cycle
4270
4271 !If not, take the cost of block j, i, -b (same energy contribution)
4272 IF (mb_img > 0 .AND. mb_img .LE. nimg) THEN
4273 IF (scf_pattern(jatom, iatom, mb_img) == -1) cycle
4274 bins(igroup + 1) = bins(igroup + 1) + ri_data%kp_cost(jatom, iatom, mb_img)
4275 force_pattern(iatom, jatom, b_img) = igroup
4276 END IF
4277 END DO
4278 END DO
4279 END DO
4280
4281 END SUBROUTINE update_pattern_to_forces
4282
4283! **************************************************************************************************
4284!> \brief A routine that determines the extend of the KP RI-HFX periodic images, including for the
4285!> extension of the RI basis
4286!> \param ri_data ...
4287!> \param qs_env ...
4288! **************************************************************************************************
4289 SUBROUTINE get_kp_and_ri_images(ri_data, qs_env)
4290 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4291 TYPE(qs_environment_type), POINTER :: qs_env
4292
4293 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_kp_and_ri_images'
4294
4295 CHARACTER(LEN=512) :: warning_msg
4296 INTEGER :: cell_j(3), cell_k(3), handle, i_img, iatom, ikind, j_img, jatom, jcell, katom, &
4297 kcell, kp_index_lbounds(3), kp_index_ubounds(3), natom, ngroups, nimg, nkind, pcoord(3), &
4298 pdims(3)
4299 INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_ao_1, dist_ao_2, dist_ri, &
4300 nri_per_atom, present_img, ri_cells
4301 INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
4302 REAL(dp) :: bump_fact, dij, dik, image_range, &
4303 ri_range, rij(3), rik(3)
4304 TYPE(dbt_type) :: t_dummy
4305 TYPE(dft_control_type), POINTER :: dft_control
4306 TYPE(distribution_2d_type), POINTER :: dist_2d
4307 TYPE(distribution_3d_type) :: dist_3d
4308 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4309 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
4310 TYPE(kpoint_type), POINTER :: kpoints
4311 TYPE(mp_cart_type) :: mp_comm_t3c
4312 TYPE(mp_para_env_type), POINTER :: para_env
4313 TYPE(neighbor_list_3c_iterator_type) :: nl_3c_iter
4314 TYPE(neighbor_list_3c_type) :: nl_3c
4316 DIMENSION(:), POINTER :: nl_iterator
4317 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4318 POINTER :: nl_2c
4319 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4320 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4321 TYPE(section_vals_type), POINTER :: hfx_section
4322
4323 NULLIFY (qs_kind_set, dist_2d, nl_2c, nl_iterator, dft_control, &
4324 particle_set, kpoints, para_env, cell_to_index, hfx_section)
4325
4326 CALL timeset(routinen, handle)
4327
4328 CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, &
4329 dft_control=dft_control, particle_set=particle_set, kpoints=kpoints, &
4330 para_env=para_env, natom=natom)
4331 nimg = dft_control%nimages
4332 CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
4333 kp_index_lbounds = lbound(cell_to_index)
4334 kp_index_ubounds = ubound(cell_to_index)
4335
4336 hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
4337 CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
4338
4339 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
4340 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
4341 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
4342
4343 !In case of shortrange HFX potential, it is imprtant to be consistent with the rest of the KP
4344 !code, and use EPS_SCHWARZ to determine the range (rather than eps_filter_2c in normal RI-HFX)
4345 IF (ri_data%hfx_pot%potential_type == do_potential_short) THEN
4346 CALL erfc_cutoff(ri_data%eps_schwarz, ri_data%hfx_pot%omega, ri_data%hfx_pot%cutoff_radius)
4347 WRITE (warning_msg, '(A)') &
4348 "The SHORTANGE HFX potential typically extends over many periodic images, "// &
4349 "possibly slowing down the calculation. Consider using the TRUNCATED "// &
4350 "potential for better computational performance."
4351 cpwarn(warning_msg)
4352 END IF
4353
4354 !Determine the range for contributing periodic images, and for the RI basis extension
4355 ri_data%kp_RI_range = 0.0_dp
4356 ri_data%kp_image_range = 0.0_dp
4357 DO ikind = 1, nkind
4358
4359 CALL init_interaction_radii_orb_basis(basis_set_ao(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4360 CALL get_gto_basis_set(basis_set_ao(ikind)%gto_basis_set, kind_radius=ri_range)
4361 ri_data%kp_RI_range = max(ri_range, ri_data%kp_RI_range)
4362
4363 CALL init_interaction_radii_orb_basis(basis_set_ao(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4364 CALL init_interaction_radii_orb_basis(basis_set_ri(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4365 CALL get_gto_basis_set(basis_set_ri(ikind)%gto_basis_set, kind_radius=image_range)
4366
4367 image_range = 2.0_dp*image_range + cutoff_screen_factor*ri_data%hfx_pot%cutoff_radius
4368 ri_data%kp_image_range = max(image_range, ri_data%kp_image_range)
4369 END DO
4370
4371 CALL section_vals_val_get(hfx_section, "KP_RI_BUMP_FACTOR", r_val=bump_fact)
4372 ri_data%kp_bump_rad = bump_fact*ri_data%kp_RI_range
4373
4374 !For the extent of the KP RI-HFX images, we are limited by the RI-HFX potential in
4375 !(mu^0 sigma^a|P^0) (P^0|Q^b) (Q^b|nu^b lambda^a+c), if there is no contact between
4376 !any P^0 and Q^b, then image b does not contribute
4377 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
4378 "HFX_2c_nl_RI", qs_env, sym_ij=.false., dist_2d=dist_2d)
4379
4380 ALLOCATE (present_img(nimg))
4381 present_img = 0
4382 ri_data%nimg = 0
4383 CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4384 DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4385 CALL get_iterator_info(nl_iterator, r=rij, cell=cell_j)
4386
4387 dij = norm2(rij)
4388
4389 j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4390 IF (j_img > nimg .OR. j_img < 1) cycle
4391
4392 IF (dij > ri_data%kp_image_range) cycle
4393
4394 ri_data%nimg = max(j_img, ri_data%nimg)
4395 present_img(j_img) = 1
4396
4397 END DO
4398 CALL neighbor_list_iterator_release(nl_iterator)
4399 CALL release_neighbor_list_sets(nl_2c)
4400 CALL para_env%max(ri_data%nimg)
4401 IF (ri_data%nimg > nimg) &
4402 cpabort("Make sure the smallest exponent of the RI-HFX basis is larger than that of the ORB basis.")
4403
4404 !Keep track of which images will not contribute, so that can be ignored before calculation
4405 CALL para_env%sum(present_img)
4406 ALLOCATE (ri_data%present_images(ri_data%nimg))
4407 ri_data%present_images = 0
4408 DO i_img = 1, ri_data%nimg
4409 IF (present_img(i_img) > 0) ri_data%present_images(i_img) = 1
4410 END DO
4411
4412 CALL create_3c_tensor(t_dummy, dist_ao_1, dist_ao_2, dist_ri, &
4413 ri_data%pgrid, ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4414 map1=[1, 2], map2=[3], name="(AO AO | RI)")
4415
4416 CALL dbt_mp_environ_pgrid(ri_data%pgrid, pdims, pcoord)
4417 CALL mp_comm_t3c%create(ri_data%pgrid%mp_comm_2d, 3, pdims)
4418 CALL distribution_3d_create(dist_3d, dist_ao_1, dist_ao_2, dist_ri, &
4419 nkind, particle_set, mp_comm_t3c, own_comm=.true.)
4420 DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
4421 CALL dbt_destroy(t_dummy)
4422
4423 !For the extension of the RI basis P in (mu^0 sigma^a |P^i), we consider an atom if the distance,
4424 !between mu^0 and P^i if smaller or equal to the kind radius of mu^0
4425 CALL build_3c_neighbor_lists(nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, dist_3d, &
4426 ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.false., &
4427 own_dist=.true.)
4428
4429 ALLOCATE (ri_cells(nimg))
4430 ri_cells = 0
4431
4432 ALLOCATE (nri_per_atom(natom))
4433 nri_per_atom = 0
4434
4435 CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
4436 DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
4437 CALL get_3c_iterator_info(nl_3c_iter, cell_k=cell_k, rik=rik, cell_j=cell_j, &
4438 iatom=iatom, jatom=jatom, katom=katom)
4439 dik = norm2(rik)
4440
4441 IF (any([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
4442 any([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) cycle
4443
4444 jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4445 IF (jcell > nimg .OR. jcell < 1) cycle
4446
4447 IF (any([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
4448 any([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) cycle
4449
4450 kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
4451 IF (kcell > nimg .OR. kcell < 1) cycle
4452
4453 IF (dik > ri_data%kp_RI_range) cycle
4454 ri_cells(kcell) = 1
4455
4456 IF (jcell == 1 .AND. iatom == jatom) nri_per_atom(iatom) = nri_per_atom(iatom) + ri_data%bsizes_RI(katom)
4457 END DO
4458 CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
4459 CALL neighbor_list_3c_destroy(nl_3c)
4460 CALL para_env%sum(ri_cells)
4461 CALL para_env%sum(nri_per_atom)
4462
4463 ALLOCATE (ri_data%img_to_RI_cell(nimg))
4464 ri_data%ncell_RI = 0
4465 ri_data%img_to_RI_cell = 0
4466 DO i_img = 1, nimg
4467 IF (ri_cells(i_img) > 0) THEN
4468 ri_data%ncell_RI = ri_data%ncell_RI + 1
4469 ri_data%img_to_RI_cell(i_img) = ri_data%ncell_RI
4470 END IF
4471 END DO
4472
4473 ALLOCATE (ri_data%RI_cell_to_img(ri_data%ncell_RI))
4474 DO i_img = 1, nimg
4475 IF (ri_data%img_to_RI_cell(i_img) > 0) ri_data%RI_cell_to_img(ri_data%img_to_RI_cell(i_img)) = i_img
4476 END DO
4477
4478 !Print some info
4479 IF (ri_data%unit_nr > 0) THEN
4480 WRITE (ri_data%unit_nr, fmt="(/T3,A,I29)") &
4481 "KP-HFX_RI_INFO| Number of RI-KP parallel groups:", ngroups
4482 WRITE (ri_data%unit_nr, fmt="(T3,A,I29)") &
4483 "KP-HFX_RI_INFO| Tensor stack size: ", ri_data%kp_stack_size
4484 WRITE (ri_data%unit_nr, fmt="(T3,A,F31.3,A)") &
4485 "KP-HFX_RI_INFO| RI basis extension radius:", ri_data%kp_RI_range*angstrom, " Ang"
4486 WRITE (ri_data%unit_nr, fmt="(T3,A,F12.3,A, F6.3, A)") &
4487 "KP-HFX_RI_INFO| RI basis bump factor and bump radius:", bump_fact, " /", &
4488 ri_data%kp_bump_rad*angstrom, " Ang"
4489 WRITE (ri_data%unit_nr, fmt="(T3,A,I16,A)") &
4490 "KP-HFX_RI_INFO| The extended RI bases cover up to ", ri_data%ncell_RI, " unit cells"
4491 WRITE (ri_data%unit_nr, fmt="(T3,A,I18)") &
4492 "KP-HFX_RI_INFO| Average number of sgf in extended RI bases:", sum(nri_per_atom)/natom
4493 WRITE (ri_data%unit_nr, fmt="(T3,A,F13.3,A)") &
4494 "KP-HFX_RI_INFO| Consider all image cells within a radius of ", ri_data%kp_image_range*angstrom, " Ang"
4495 WRITE (ri_data%unit_nr, fmt="(T3,A,I27/)") &
4496 "KP-HFX_RI_INFO| Number of image cells considered: ", ri_data%nimg
4497 CALL m_flush(ri_data%unit_nr)
4498 END IF
4499
4500 CALL timestop(handle)
4501
4502 END SUBROUTINE get_kp_and_ri_images
4503
4504! **************************************************************************************************
4505!> \brief A routine that creates tensors structure for rho_ao and 3c_ints in a stacked format for
4506!> the efficient contractions of rho_sigma^0,lambda^c * (mu^0 sigam^a | P) => TAS tensors
4507!> \param res_stack ...
4508!> \param rho_stack ...
4509!> \param ints_stack ...
4510!> \param rho_template ...
4511!> \param ints_template ...
4512!> \param stack_size ...
4513!> \param ri_data ...
4514!> \param qs_env ...
4515!> \note The result tensor has the exact same shape and distribution as the integral tensor
4516! **************************************************************************************************
4517 SUBROUTINE get_stack_tensors(res_stack, rho_stack, ints_stack, rho_template, ints_template, &
4518 stack_size, ri_data, qs_env)
4519 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: res_stack, rho_stack, ints_stack
4520 TYPE(dbt_type), INTENT(INOUT) :: rho_template, ints_template
4521 INTEGER, INTENT(IN) :: stack_size
4522 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4523 TYPE(qs_environment_type), POINTER :: qs_env
4524
4525 INTEGER :: is, nblks, nblks_3c(3), pdims_3d(3)
4526 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_stack, dist1, &
4527 dist2, dist3, dist_stack1, &
4528 dist_stack2, dist_stack3
4529 TYPE(dbt_distribution_type) :: t_dist
4530 TYPE(dbt_pgrid_type) :: pgrid
4531 TYPE(mp_para_env_type), POINTER :: para_env
4532
4533 NULLIFY (para_env)
4534
4535 CALL get_qs_env(qs_env, para_env=para_env)
4536
4537 nblks = SIZE(ri_data%bsizes_AO_split)
4538 ALLOCATE (bsizes_stack(stack_size*nblks))
4539 DO is = 1, stack_size
4540 bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
4541 END DO
4542
4543 ALLOCATE (dist1(nblks), dist2(nblks), dist_stack1(stack_size*nblks), dist_stack2(stack_size*nblks))
4544 CALL dbt_get_info(rho_template, proc_dist_1=dist1, proc_dist_2=dist2)
4545 DO is = 1, stack_size
4546 dist_stack1((is - 1)*nblks + 1:is*nblks) = dist1(:)
4547 dist_stack2((is - 1)*nblks + 1:is*nblks) = dist2(:)
4548 END DO
4549
4550 !First 2c tensor matches the distribution of template
4551 !It is stacked in both directions
4552 CALL dbt_distribution_new(t_dist, ri_data%pgrid_2d, dist_stack1, dist_stack2)
4553 CALL dbt_create(rho_stack(1), "RHO_stack", t_dist, [1], [2], bsizes_stack, bsizes_stack)
4554 CALL dbt_distribution_destroy(t_dist)
4555 DEALLOCATE (dist1, dist2, dist_stack1, dist_stack2)
4556
4557 !Second 2c tensor has optimal distribution on the 2d pgrid
4558 CALL create_2c_tensor(rho_stack(2), dist1, dist2, ri_data%pgrid_2d, bsizes_stack, bsizes_stack, name="RHO_stack")
4559 DEALLOCATE (dist1, dist2)
4560
4561 CALL dbt_get_info(ints_template, nblks_total=nblks_3c)
4562 ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)))
4563 ALLOCATE (dist_stack3(stack_size*nblks_3c(3)), bsizes_ri_ext(nblks_3c(2)))
4564 CALL dbt_get_info(ints_template, proc_dist_1=dist1, proc_dist_2=dist2, &
4565 proc_dist_3=dist3, blk_size_2=bsizes_ri_ext)
4566 DO is = 1, stack_size
4567 dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
4568 END DO
4569
4570 !First 3c tensor matches the distribution of template
4571 CALL dbt_distribution_new(t_dist, ri_data%pgrid_1, dist1, dist2, dist_stack3)
4572 CALL dbt_create(ints_stack(1), "ints_stack", t_dist, [1, 2], [3], ri_data%bsizes_AO_split, &
4573 bsizes_ri_ext, bsizes_stack)
4574 CALL dbt_distribution_destroy(t_dist)
4575 DEALLOCATE (dist1, dist2, dist3, dist_stack3)
4576
4577 !Second 3c tensor has optimal pgrid
4578 pdims_3d = 0
4579 CALL dbt_pgrid_create(para_env, pdims_3d, pgrid, tensor_dims=[nblks_3c(1), nblks_3c(2), stack_size*nblks_3c(3)])
4580 CALL create_3c_tensor(ints_stack(2), dist1, dist2, dist3, pgrid, ri_data%bsizes_AO_split, &
4581 bsizes_ri_ext, bsizes_stack, [1, 2], [3], name="ints_stack")
4582 DEALLOCATE (dist1, dist2, dist3)
4583 CALL dbt_pgrid_destroy(pgrid)
4584
4585 !The result tensor has the same shape and dist as the integral tensor
4586 CALL dbt_create(ints_stack(1), res_stack(1))
4587 CALL dbt_create(ints_stack(2), res_stack(2))
4588
4589 END SUBROUTINE get_stack_tensors
4590
4591! **************************************************************************************************
4592!> \brief Fill the stack of 3c tensors accrding to the order in the images input
4593!> \param t_3c_stack ...
4594!> \param t_3c_in ...
4595!> \param images ...
4596!> \param stack_dim ...
4597!> \param ri_data ...
4598!> \param filter_at ...
4599!> \param filter_dim ...
4600!> \param idx_to_at ...
4601!> \param img_bounds ...
4602! **************************************************************************************************
4603 SUBROUTINE fill_3c_stack(t_3c_stack, t_3c_in, images, stack_dim, ri_data, filter_at, filter_dim, &
4604 idx_to_at, img_bounds)
4605 TYPE(dbt_type), INTENT(INOUT) :: t_3c_stack
4606 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_in
4607 INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4608 INTEGER, INTENT(IN) :: stack_dim
4609 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4610 INTEGER, INTENT(IN), OPTIONAL :: filter_at, filter_dim
4611 INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: idx_to_at
4612 INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2)
4613
4614 INTEGER :: dest(3), i_img, idx, ind(3), lb, nblks, &
4615 nimg, offset, ub
4616 LOGICAL :: do_filter, found
4617 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4618 TYPE(dbt_iterator_type) :: iter
4619
4620 !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4621 !in the stack tensor (corresponding to pair index). Distributions match by construction
4622 nimg = ri_data%nimg
4623 nblks = SIZE(ri_data%bsizes_AO_split)
4624
4625 do_filter = .false.
4626 IF (PRESENT(filter_at) .AND. PRESENT(filter_dim) .AND. PRESENT(idx_to_at)) do_filter = .true.
4627
4628 lb = 1
4629 ub = nimg
4630 offset = 0
4631 IF (PRESENT(img_bounds)) THEN
4632 lb = img_bounds(1)
4633 ub = img_bounds(2) - 1
4634 offset = lb - 1
4635 END IF
4636
4637 DO idx = lb, ub
4638 i_img = images(idx)
4639 IF (i_img == 0 .OR. i_img > nimg) cycle
4640
4641!$OMP PARALLEL DEFAULT(NONE) &
4642!$OMP SHARED(idx,i_img,t_3c_in,t_3c_stack,nblks,stack_dim,filter_at,filter_dim,idx_to_at,do_filter,offset) &
4643!$OMP PRIVATE(iter,ind,blk,found,dest)
4644 CALL dbt_iterator_start(iter, t_3c_in(i_img))
4645 DO WHILE (dbt_iterator_blocks_left(iter))
4646 CALL dbt_iterator_next_block(iter, ind)
4647 CALL dbt_get_block(t_3c_in(i_img), ind, blk, found)
4648 IF (.NOT. found) cycle
4649
4650 IF (do_filter) THEN
4651 IF (.NOT. idx_to_at(ind(filter_dim)) == filter_at) cycle
4652 END IF
4653
4654 IF (stack_dim == 1) THEN
4655 dest = [(idx - offset - 1)*nblks + ind(1), ind(2), ind(3)]
4656 ELSE IF (stack_dim == 2) THEN
4657 dest = [ind(1), (idx - offset - 1)*nblks + ind(2), ind(3)]
4658 ELSE
4659 dest = [ind(1), ind(2), (idx - offset - 1)*nblks + ind(3)]
4660 END IF
4661
4662 CALL dbt_put_block(t_3c_stack, dest, shape(blk), blk)
4663 DEALLOCATE (blk)
4664 END DO
4665 CALL dbt_iterator_stop(iter)
4666!$OMP END PARALLEL
4667 END DO !i_img
4668 CALL dbt_finalize(t_3c_stack)
4669
4670 END SUBROUTINE fill_3c_stack
4671
4672! **************************************************************************************************
4673!> \brief Fill the stack of 2c tensors based on the content of images input
4674!> \param t_2c_stack ...
4675!> \param t_2c_in ...
4676!> \param images ...
4677!> \param stack_dim ...
4678!> \param ri_data ...
4679!> \param img_bounds ...
4680!> \param shift ...
4681! **************************************************************************************************
4682 SUBROUTINE fill_2c_stack(t_2c_stack, t_2c_in, images, stack_dim, ri_data, img_bounds, shift)
4683 TYPE(dbt_type), INTENT(INOUT) :: t_2c_stack
4684 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_in
4685 INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4686 INTEGER, INTENT(IN) :: stack_dim
4687 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4688 INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2), shift
4689
4690 INTEGER :: dest(2), i_img, idx, ind(2), lb, &
4691 my_shift, nblks, nimg, offset, ub
4692 LOGICAL :: found
4693 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
4694 TYPE(dbt_iterator_type) :: iter
4695
4696 !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4697 !in the stack tensor (corresponding to pair index). Distributions match by construction
4698 nimg = ri_data%nimg
4699 nblks = SIZE(ri_data%bsizes_AO_split)
4700
4701 lb = 1
4702 ub = nimg
4703 offset = 0
4704 IF (PRESENT(img_bounds)) THEN
4705 lb = img_bounds(1)
4706 ub = img_bounds(2) - 1
4707 offset = lb - 1
4708 END IF
4709
4710 my_shift = 1
4711 IF (PRESENT(shift)) my_shift = shift
4712
4713 DO idx = lb, ub
4714 i_img = images(idx)
4715 IF (i_img == 0 .OR. i_img > nimg) cycle
4716
4717!$OMP PARALLEL DEFAULT(NONE) SHARED(idx,i_img,t_2c_in,t_2c_stack,nblks,stack_dim,offset,my_shift) &
4718!$OMP PRIVATE(iter,ind,blk,found,dest)
4719 CALL dbt_iterator_start(iter, t_2c_in(i_img))
4720 DO WHILE (dbt_iterator_blocks_left(iter))
4721 CALL dbt_iterator_next_block(iter, ind)
4722 CALL dbt_get_block(t_2c_in(i_img), ind, blk, found)
4723 IF (.NOT. found) cycle
4724
4725 IF (stack_dim == 1) THEN
4726 dest = [(idx - offset - 1)*nblks + ind(1), (my_shift - 1)*nblks + ind(2)]
4727 ELSE
4728 dest = [(my_shift - 1)*nblks + ind(1), (idx - offset - 1)*nblks + ind(2)]
4729 END IF
4730
4731 CALL dbt_put_block(t_2c_stack, dest, shape(blk), blk)
4732 DEALLOCATE (blk)
4733 END DO
4734 CALL dbt_iterator_stop(iter)
4735!$OMP END PARALLEL
4736 END DO !idx
4737 CALL dbt_finalize(t_2c_stack)
4738
4739 END SUBROUTINE fill_2c_stack
4740
4741! **************************************************************************************************
4742!> \brief Unstacks a stacked 3c tensor containing t_3c_apc
4743!> \param t_3c_apc ...
4744!> \param t_stacked ...
4745!> \param idx ...
4746! **************************************************************************************************
4747 SUBROUTINE unstack_t_3c_apc(t_3c_apc, t_stacked, idx)
4748 TYPE(dbt_type), INTENT(INOUT) :: t_3c_apc, t_stacked
4749 INTEGER, INTENT(IN) :: idx
4750
4751 INTEGER :: current_idx
4752 INTEGER, DIMENSION(3) :: ind, nblks_3c
4753 LOGICAL :: found
4754 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4755 TYPE(dbt_iterator_type) :: iter
4756
4757 !Note: t_3c_apc and t_stacked must have the same ditribution
4758 CALL dbt_get_info(t_3c_apc, nblks_total=nblks_3c)
4759
4760!$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_apc,t_stacked,idx,nblks_3c) PRIVATE(iter,ind,blk,found,current_idx)
4761 CALL dbt_iterator_start(iter, t_stacked)
4762 DO WHILE (dbt_iterator_blocks_left(iter))
4763 CALL dbt_iterator_next_block(iter, ind)
4764
4765 !tensor is stacked along the 3rd dimension
4766 current_idx = (ind(3) - 1)/nblks_3c(3) + 1
4767 IF (.NOT. idx == current_idx) cycle
4768
4769 CALL dbt_get_block(t_stacked, ind, blk, found)
4770 IF (.NOT. found) cycle
4771
4772 CALL dbt_put_block(t_3c_apc, [ind(1), ind(2), ind(3) - (idx - 1)*nblks_3c(3)], shape(blk), blk)
4773 DEALLOCATE (blk)
4774 END DO
4775 CALL dbt_iterator_stop(iter)
4776!$OMP END PARALLEL
4777
4778 END SUBROUTINE unstack_t_3c_apc
4779
4780! **************************************************************************************************
4781!> \brief copies the 3c integrals correspoinding to a single atom mu from the general (P^0| mu^0 sigam^a)
4782!> \param t_3c_at ...
4783!> \param t_3c_ints ...
4784!> \param iatom ...
4785!> \param dim_at ...
4786!> \param idx_to_at ...
4787! **************************************************************************************************
4788 SUBROUTINE get_atom_3c_ints(t_3c_at, t_3c_ints, iatom, dim_at, idx_to_at)
4789 TYPE(dbt_type), INTENT(INOUT) :: t_3c_at, t_3c_ints
4790 INTEGER, INTENT(IN) :: iatom, dim_at
4791 INTEGER, DIMENSION(:), INTENT(IN) :: idx_to_at
4792
4793 INTEGER, DIMENSION(3) :: ind
4794 LOGICAL :: found
4795 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4796 TYPE(dbt_iterator_type) :: iter
4797
4798!$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_ints,t_3c_at,iatom,idx_to_at,dim_at) PRIVATE(iter,ind,blk,found)
4799 CALL dbt_iterator_start(iter, t_3c_ints)
4800 DO WHILE (dbt_iterator_blocks_left(iter))
4801 CALL dbt_iterator_next_block(iter, ind)
4802 IF (.NOT. idx_to_at(ind(dim_at)) == iatom) cycle
4803
4804 CALL dbt_get_block(t_3c_ints, ind, blk, found)
4805 IF (.NOT. found) cycle
4806
4807 CALL dbt_put_block(t_3c_at, ind, shape(blk), blk)
4808 DEALLOCATE (blk)
4809 END DO
4810 CALL dbt_iterator_stop(iter)
4811!$OMP END PARALLEL
4812 CALL dbt_finalize(t_3c_at)
4813
4814 END SUBROUTINE get_atom_3c_ints
4815
4816! **************************************************************************************************
4817!> \brief Precalculate the 3c and 2c derivatives tensors
4818!> \param t_3c_der_RI ...
4819!> \param t_3c_der_AO ...
4820!> \param mat_der_pot ...
4821!> \param t_2c_der_metric ...
4822!> \param ri_data ...
4823!> \param qs_env ...
4824! **************************************************************************************************
4825 SUBROUTINE precalc_derivatives(t_3c_der_RI, t_3c_der_AO, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
4826 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_ri, t_3c_der_ao
4827 TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot
4828 TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_2c_der_metric
4829 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4830 TYPE(qs_environment_type), POINTER :: qs_env
4831
4832 CHARACTER(LEN=*), PARAMETER :: routinen = 'precalc_derivatives'
4833
4834 INTEGER :: handle, handle2, i_img, i_mem, i_ri, &
4835 i_xyz, iatom, n_mem, natom, nblks_ri, &
4836 ncell_ri, nimg, nkind, nthreads
4837 INTEGER(int_8) :: nze
4838 INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_ri_ext, bsizes_ri_ext_split, dist_ao_1, &
4839 dist_ao_2, dist_ri, dist_ri_ext, dummy_end, dummy_start, end_blocks, start_blocks
4840 INTEGER, DIMENSION(3) :: pcoord, pdims
4841 INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
4842 REAL(dp) :: occ
4843 TYPE(dbcsr_distribution_type) :: dbcsr_dist
4844 TYPE(dbcsr_type) :: dbcsr_template
4845 TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_metric
4846 TYPE(dbt_distribution_type) :: t_dist
4847 TYPE(dbt_pgrid_type) :: pgrid
4848 TYPE(dbt_type) :: t_3c_template
4849 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_ao_prv, t_3c_der_ri_prv
4850 TYPE(dft_control_type), POINTER :: dft_control
4851 TYPE(distribution_2d_type), POINTER :: dist_2d
4852 TYPE(distribution_3d_type) :: dist_3d
4853 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4854 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
4855 TYPE(mp_cart_type) :: mp_comm_t3c
4856 TYPE(mp_para_env_type), POINTER :: para_env
4857 TYPE(neighbor_list_3c_type) :: nl_3c
4858 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4859 POINTER :: nl_2c
4860 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4861 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4862
4863 NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env, row_bsize, col_bsize)
4864
4865 CALL timeset(routinen, handle)
4866
4867 CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
4868 particle_set=particle_set, dft_control=dft_control, para_env=para_env)
4869
4870 nimg = ri_data%nimg
4871 ncell_ri = ri_data%ncell_RI
4872
4873 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
4874 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
4875 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ri)
4876 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
4877 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ao)
4878
4879 !Dealing with the 3c derivatives
4880 nthreads = 1
4881!$ nthreads = omp_get_num_threads()
4882 pdims = 0
4883 CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[max(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
4884
4885 CALL create_3c_tensor(t_3c_template, dist_ao_1, dist_ao_2, dist_ri, pgrid, &
4886 ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4887 map1=[1, 2], map2=[3], name="tmp")
4888 CALL dbt_destroy(t_3c_template)
4889
4890 !We stack the RI basis images. Keep consistent distribution
4891 nblks_ri = SIZE(ri_data%bsizes_RI_split)
4892 ALLOCATE (dist_ri_ext(natom*ncell_ri))
4893 ALLOCATE (bsizes_ri_ext(natom*ncell_ri))
4894 ALLOCATE (bsizes_ri_ext_split(nblks_ri*ncell_ri))
4895 DO i_ri = 1, ncell_ri
4896 bsizes_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = ri_data%bsizes_RI(:)
4897 dist_ri_ext((i_ri - 1)*natom + 1:i_ri*natom) = dist_ri(:)
4898 bsizes_ri_ext_split((i_ri - 1)*nblks_ri + 1:i_ri*nblks_ri) = ri_data%bsizes_RI_split(:)
4899 END DO
4900
4901 CALL dbt_distribution_new(t_dist, pgrid, dist_ao_1, dist_ao_2, dist_ri_ext)
4902 CALL dbt_create(t_3c_template, "KP_3c_der", t_dist, [1, 2], [3], &
4903 ri_data%bsizes_AO, ri_data%bsizes_AO, bsizes_ri_ext)
4904 CALL dbt_distribution_destroy(t_dist)
4905
4906 ALLOCATE (t_3c_der_ri_prv(nimg, 1, 3), t_3c_der_ao_prv(nimg, 1, 3))
4907 DO i_xyz = 1, 3
4908 DO i_img = 1, nimg
4909 CALL dbt_create(t_3c_template, t_3c_der_ri_prv(i_img, 1, i_xyz))
4910 CALL dbt_create(t_3c_template, t_3c_der_ao_prv(i_img, 1, i_xyz))
4911 END DO
4912 END DO
4913 CALL dbt_destroy(t_3c_template)
4914
4915 CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
4916 CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
4917 CALL distribution_3d_create(dist_3d, dist_ao_1, dist_ao_2, dist_ri, &
4918 nkind, particle_set, mp_comm_t3c, own_comm=.true.)
4919 DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
4920 CALL dbt_pgrid_destroy(pgrid)
4921
4922 CALL build_3c_neighbor_lists(nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, dist_3d, ri_data%ri_metric, &
4923 "HFX_3c_nl", qs_env, op_pos=2, sym_jk=.false., own_dist=.true.)
4924
4925 n_mem = ri_data%n_mem
4926 CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
4927 start_blocks, end_blocks)
4928 DEALLOCATE (dummy_start, dummy_end)
4929
4930 CALL create_3c_tensor(t_3c_template, dist_ri, dist_ao_1, dist_ao_2, ri_data%pgrid_2, &
4931 bsizes_ri_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
4932 map1=[1], map2=[2, 3], name="der (RI | AO AO)")
4933 DO i_xyz = 1, 3
4934 DO i_img = 1, nimg
4935 CALL dbt_create(t_3c_template, t_3c_der_ri(i_img, i_xyz))
4936 CALL dbt_create(t_3c_template, t_3c_der_ao(i_img, i_xyz))
4937 END DO
4938 END DO
4939
4940 DO i_mem = 1, n_mem
4941 CALL build_3c_derivatives(t_3c_der_ao_prv, t_3c_der_ri_prv, ri_data%filter_eps, qs_env, &
4942 nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, &
4943 ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=2, &
4944 do_kpoints=.true., do_hfx_kpoints=.true., &
4945 bounds_k=[start_blocks(i_mem), end_blocks(i_mem)], &
4946 ri_range=ri_data%kp_RI_range, img_to_ri_cell=ri_data%img_to_RI_cell)
4947
4948 CALL timeset(routinen//"_cpy", handle2)
4949 !We go from (mu^0 sigma^i | P^j) to (P^i| sigma^j mu^0) and finally to (P^i| mu^0 sigma^j)
4950 DO i_img = 1, nimg
4951 DO i_xyz = 1, 3
4952 !derivative wrt to mu^0
4953 CALL get_tensor_occupancy(t_3c_der_ao_prv(i_img, 1, i_xyz), nze, occ)
4954 IF (nze > 0) THEN
4955 CALL dbt_copy(t_3c_der_ao_prv(i_img, 1, i_xyz), t_3c_template, &
4956 order=[3, 2, 1], move_data=.true.)
4957 CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4958 CALL dbt_copy(t_3c_template, t_3c_der_ao(i_img, i_xyz), &
4959 order=[1, 3, 2], move_data=.true., summation=.true.)
4960 END IF
4961
4962 !derivative wrt to P^i
4963 CALL get_tensor_occupancy(t_3c_der_ri_prv(i_img, 1, i_xyz), nze, occ)
4964 IF (nze > 0) THEN
4965 CALL dbt_copy(t_3c_der_ri_prv(i_img, 1, i_xyz), t_3c_template, &
4966 order=[3, 2, 1], move_data=.true.)
4967 CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4968 CALL dbt_copy(t_3c_template, t_3c_der_ri(i_img, i_xyz), &
4969 order=[1, 3, 2], move_data=.true., summation=.true.)
4970 END IF
4971 END DO
4972 END DO
4973 CALL timestop(handle2)
4974 END DO
4975 CALL dbt_destroy(t_3c_template)
4976
4977 CALL neighbor_list_3c_destroy(nl_3c)
4978 DO i_xyz = 1, 3
4979 DO i_img = 1, nimg
4980 CALL dbt_destroy(t_3c_der_ri_prv(i_img, 1, i_xyz))
4981 CALL dbt_destroy(t_3c_der_ao_prv(i_img, 1, i_xyz))
4982 END DO
4983 END DO
4984 DEALLOCATE (t_3c_der_ri_prv, t_3c_der_ao_prv)
4985
4986 !Reorder 3c derivatives to be consistant with ints
4987 CALL reorder_3c_derivs(t_3c_der_ri, ri_data)
4988 CALL reorder_3c_derivs(t_3c_der_ao, ri_data)
4989
4990 CALL timeset(routinen//"_2c", handle2)
4991 !The 2-center derivatives
4992 CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
4993 ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
4994 ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
4995 row_bsize(:) = ri_data%bsizes_RI
4996 col_bsize(:) = ri_data%bsizes_RI
4997
4998 CALL dbcsr_create(dbcsr_template, "2c_der", dbcsr_dist, dbcsr_type_no_symmetry, &
4999 row_bsize, col_bsize)
5000 CALL dbcsr_distribution_release(dbcsr_dist)
5001 DEALLOCATE (col_bsize, row_bsize)
5002
5003 ALLOCATE (mat_der_metric(nimg, 3))
5004 DO i_xyz = 1, 3
5005 DO i_img = 1, nimg
5006 CALL dbcsr_create(mat_der_pot(i_img, i_xyz), template=dbcsr_template)
5007 CALL dbcsr_create(mat_der_metric(i_img, i_xyz), template=dbcsr_template)
5008 END DO
5009 END DO
5010 CALL dbcsr_release(dbcsr_template)
5011
5012 !HFX potential derivatives
5013 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
5014 "HFX_2c_nl_pot", qs_env, sym_ij=.false., dist_2d=dist_2d)
5015 CALL build_2c_derivatives(mat_der_pot, ri_data%filter_eps_2c, qs_env, nl_2c, &
5016 basis_set_ri, basis_set_ri, ri_data%hfx_pot, do_kpoints=.true.)
5017 CALL release_neighbor_list_sets(nl_2c)
5018
5019 !RI metric derivatives
5020 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
5021 "HFX_2c_nl_pot", qs_env, sym_ij=.false., dist_2d=dist_2d)
5022 CALL build_2c_derivatives(mat_der_metric, ri_data%filter_eps_2c, qs_env, nl_2c, &
5023 basis_set_ri, basis_set_ri, ri_data%ri_metric, do_kpoints=.true.)
5024 CALL release_neighbor_list_sets(nl_2c)
5025
5026 !Get into extended RI basis and tensor format
5027 DO i_xyz = 1, 3
5028 DO iatom = 1, natom
5029 CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_der_metric(iatom, i_xyz))
5030 CALL get_ext_2c_int(t_2c_der_metric(iatom, i_xyz), mat_der_metric(:, i_xyz), &
5031 iatom, iatom, 1, ri_data, qs_env)
5032 END DO
5033 DO i_img = 1, nimg
5034 CALL dbcsr_release(mat_der_metric(i_img, i_xyz))
5035 END DO
5036 END DO
5037 CALL timestop(handle2)
5038
5039 CALL timestop(handle)
5040
5041 END SUBROUTINE precalc_derivatives
5042
5043! **************************************************************************************************
5044!> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
5045!> \param force ...
5046!> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
5047!> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
5048!> \param atom_of_kind ...
5049!> \param kind_of ...
5050!> \param img in which periodic image the second center of the tensor is
5051!> \param pref ...
5052!> \param ri_data ...
5053!> \param qs_env ...
5054!> \param work_virial ...
5055!> \param cell ...
5056!> \param particle_set ...
5057!> \param diag ...
5058!> \param offdiag ...
5059!> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution. Atomic block sizes are
5060!> assumed
5061! **************************************************************************************************
5062 SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, img, pref, &
5063 ri_data, qs_env, work_virial, cell, particle_set, diag, offdiag)
5064
5065 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
5066 TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
5067 TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_der
5068 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
5069 INTEGER, INTENT(IN) :: img
5070 REAL(dp), INTENT(IN) :: pref
5071 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
5072 TYPE(qs_environment_type), POINTER :: qs_env
5073 REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
5074 TYPE(cell_type), OPTIONAL, POINTER :: cell
5075 TYPE(particle_type), DIMENSION(:), OPTIONAL, &
5076 POINTER :: particle_set
5077 LOGICAL, INTENT(IN), OPTIONAL :: diag, offdiag
5078
5079 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_2c_der_force'
5080
5081 INTEGER :: handle, i_img, i_ri, i_xyz, iat, &
5082 iat_of_kind, ikind, j_img, j_ri, &
5083 j_xyz, jat, jat_of_kind, jkind, natom
5084 INTEGER, DIMENSION(2) :: ind
5085 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
5086 LOGICAL :: found, my_diag, my_offdiag, use_virial
5087 REAL(dp) :: new_force
5088 REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
5089 REAL(dp), DIMENSION(3) :: scoord
5090 TYPE(dbt_iterator_type) :: iter
5091 TYPE(kpoint_type), POINTER :: kpoints
5092
5093 NULLIFY (kpoints, index_to_cell)
5094
5095 !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
5096 !update the relevant force
5097
5098 CALL timeset(routinen, handle)
5099
5100 use_virial = .false.
5101 IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .true.
5102
5103 my_diag = .false.
5104 IF (PRESENT(diag)) my_diag = diag
5105
5106 my_offdiag = .false.
5107 IF (PRESENT(diag)) my_offdiag = offdiag
5108
5109 CALL get_qs_env(qs_env, kpoints=kpoints, natom=natom)
5110 CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
5111
5112!$OMP PARALLEL DEFAULT(NONE) &
5113!$OMP SHARED(t_2c_der,t_2c_contr,work_virial,force,use_virial,natom,index_to_cell,ri_data,img) &
5114!$OMP SHARED(pref,atom_of_kind,kind_of,particle_set,cell,my_diag,my_offdiag) &
5115!$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,i_RI,i_img,j_RI,j_img) &
5116!$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
5117 DO i_xyz = 1, 3
5118 CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
5119 DO WHILE (dbt_iterator_blocks_left(iter))
5120 CALL dbt_iterator_next_block(iter, ind)
5121
5122 !Only take forecs due to block diagonal or block off-diagonal, depending on arguments
5123 IF ((my_diag .AND. .NOT. my_offdiag) .OR. (.NOT. my_diag .AND. my_offdiag)) THEN
5124 IF (my_diag .AND. (ind(1) .NE. ind(2))) cycle
5125 IF (my_offdiag .AND. (ind(1) == ind(2))) cycle
5126 END IF
5127
5128 CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
5129 cpassert(found)
5130 CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
5131
5132 IF (found) THEN
5133
5134 !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
5135 !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
5136 new_force = pref*sum(der_blk(:, :)*contr_blk(:, :))
5137
5138 i_ri = (ind(1) - 1)/natom + 1
5139 i_img = ri_data%RI_cell_to_img(i_ri)
5140 iat = ind(1) - (i_ri - 1)*natom
5141 iat_of_kind = atom_of_kind(iat)
5142 ikind = kind_of(iat)
5143
5144 j_ri = (ind(2) - 1)/natom + 1
5145 j_img = ri_data%RI_cell_to_img(j_ri)
5146 jat = ind(2) - (j_ri - 1)*natom
5147 jat_of_kind = atom_of_kind(jat)
5148 jkind = kind_of(jat)
5149
5150 !Force on iatom (first center)
5151!$OMP ATOMIC
5152 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5153 + new_force
5154
5155 IF (use_virial) THEN
5156
5157 CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5158 scoord(:) = scoord(:) + real(index_to_cell(:, i_img), dp)
5159
5160 DO j_xyz = 1, 3
5161!$OMP ATOMIC
5162 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5163 END DO
5164 END IF
5165
5166 !Force on jatom (second center)
5167!$OMP ATOMIC
5168 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5169 - new_force
5170
5171 IF (use_virial) THEN
5172
5173 CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5174 scoord(:) = scoord(:) + real(index_to_cell(:, j_img) + index_to_cell(:, img), dp)
5175
5176 DO j_xyz = 1, 3
5177!$OMP ATOMIC
5178 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) - new_force*scoord(j_xyz)
5179 END DO
5180 END IF
5181
5182 DEALLOCATE (contr_blk)
5183 END IF
5184
5185 DEALLOCATE (der_blk)
5186 END DO !iter
5187 CALL dbt_iterator_stop(iter)
5188
5189 END DO !i_xyz
5190!$OMP END PARALLEL
5191 CALL timestop(handle)
5192
5193 END SUBROUTINE get_2c_der_force
5194
5195! **************************************************************************************************
5196!> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
5197!> force = sum_ijk A_ijk B_ijk., the B tensor is (P^0| sigma^0 lambda^img), with P in the
5198!> extended RI basis. Note that all tensors are stacked along the 3rd dimension
5199!> \param force ...
5200!> \param t_3c_contr ...
5201!> \param t_3c_der_1 ...
5202!> \param t_3c_der_2 ...
5203!> \param atom_of_kind ...
5204!> \param kind_of ...
5205!> \param idx_to_at_RI ...
5206!> \param idx_to_at_AO ...
5207!> \param i_images ...
5208!> \param lb_img ...
5209!> \param pref ...
5210!> \param ri_data ...
5211!> \param qs_env ...
5212!> \param work_virial ...
5213!> \param cell ...
5214!> \param particle_set ...
5215! **************************************************************************************************
5216 SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der_1, t_3c_der_2, atom_of_kind, kind_of, &
5217 idx_to_at_RI, idx_to_at_AO, i_images, lb_img, pref, &
5218 ri_data, qs_env, work_virial, cell, particle_set)
5219
5220 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
5221 TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
5222 TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der_1, t_3c_der_2
5223 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at_ri, &
5224 idx_to_at_ao, i_images
5225 INTEGER, INTENT(IN) :: lb_img
5226 REAL(dp), INTENT(IN) :: pref
5227 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
5228 TYPE(qs_environment_type), POINTER :: qs_env
5229 REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
5230 TYPE(cell_type), OPTIONAL, POINTER :: cell
5231 TYPE(particle_type), DIMENSION(:), OPTIONAL, &
5232 POINTER :: particle_set
5233
5234 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_force_from_3c_trace'
5235
5236 INTEGER :: handle, i_ri, i_xyz, iat, iat_of_kind, idx, ikind, j_xyz, jat, jat_of_kind, &
5237 jkind, kat, kat_of_kind, kkind, nblks_ao, nblks_ri, ri_img
5238 INTEGER, DIMENSION(3) :: ind
5239 INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
5240 LOGICAL :: found, found_1, found_2, use_virial
5241 REAL(dp) :: new_force
5242 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk_1, der_blk_2, &
5243 der_blk_3
5244 REAL(dp), DIMENSION(3) :: scoord
5245 TYPE(dbt_iterator_type) :: iter
5246 TYPE(kpoint_type), POINTER :: kpoints
5247
5248 NULLIFY (kpoints, index_to_cell)
5249
5250 CALL timeset(routinen, handle)
5251
5252 CALL get_qs_env(qs_env, kpoints=kpoints)
5253 CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
5254
5255 nblks_ri = SIZE(ri_data%bsizes_RI_split)
5256 nblks_ao = SIZE(ri_data%bsizes_AO_split)
5257
5258 use_virial = .false.
5259 IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .true.
5260
5261!$OMP PARALLEL DEFAULT(NONE) &
5262!$OMP SHARED(t_3c_der_1, t_3c_der_2,t_3c_contr,work_virial,force,use_virial,index_to_cell,i_images,lb_img) &
5263!$OMP SHARED(pref,idx_to_at_AO,atom_of_kind,kind_of,particle_set,cell,idx_to_at_RI,ri_data,nblks_RI,nblks_AO) &
5264!$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk_1,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord) &
5265!$OMP PRIVATE(jat,kat,jat_of_kind,kat_of_kind,jkind,kkind,i_RI,RI_img,der_blk_2,der_blk_3,found_1,found_2,idx)
5266 CALL dbt_iterator_start(iter, t_3c_contr)
5267 DO WHILE (dbt_iterator_blocks_left(iter))
5268 CALL dbt_iterator_next_block(iter, ind)
5269
5270 CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
5271 IF (found) THEN
5272
5273 DO i_xyz = 1, 3
5274 CALL dbt_get_block(t_3c_der_1(i_xyz), ind, der_blk_1, found_1)
5275 IF (.NOT. found_1) THEN
5276 DEALLOCATE (der_blk_1)
5277 ALLOCATE (der_blk_1(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5278 der_blk_1(:, :, :) = 0.0_dp
5279 END IF
5280 CALL dbt_get_block(t_3c_der_2(i_xyz), ind, der_blk_2, found_2)
5281 IF (.NOT. found_2) THEN
5282 DEALLOCATE (der_blk_2)
5283 ALLOCATE (der_blk_2(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5284 der_blk_2(:, :, :) = 0.0_dp
5285 END IF
5286
5287 ALLOCATE (der_blk_3(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5288 der_blk_3(:, :, :) = -(der_blk_1(:, :, :) + der_blk_2(:, :, :))
5289
5290 !We assume the tensors are in the format (P^0| sigma^0 mu^a+c-b), with P a member of the
5291 !extended RI basis set
5292
5293 !Force for the first center (RI extended basis, zero cell)
5294 new_force = pref*sum(der_blk_1(:, :, :)*contr_blk(:, :, :))
5295
5296 i_ri = (ind(1) - 1)/nblks_ri + 1
5297 ri_img = ri_data%RI_cell_to_img(i_ri)
5298 iat = idx_to_at_ri(ind(1) - (i_ri - 1)*nblks_ri)
5299 iat_of_kind = atom_of_kind(iat)
5300 ikind = kind_of(iat)
5301
5302!$OMP ATOMIC
5303 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5304 + new_force
5305
5306 IF (use_virial) THEN
5307
5308 CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5309 scoord(:) = scoord(:) + real(index_to_cell(:, ri_img), dp)
5310
5311 DO j_xyz = 1, 3
5312!$OMP ATOMIC
5313 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5314 END DO
5315 END IF
5316
5317 !Force with respect to the second center (AO basis, zero cell)
5318 new_force = pref*sum(der_blk_2(:, :, :)*contr_blk(:, :, :))
5319 jat = idx_to_at_ao(ind(2))
5320 jat_of_kind = atom_of_kind(jat)
5321 jkind = kind_of(jat)
5322
5323!$OMP ATOMIC
5324 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5325 + new_force
5326
5327 IF (use_virial) THEN
5328
5329 CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5330
5331 DO j_xyz = 1, 3
5332!$OMP ATOMIC
5333 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5334 END DO
5335 END IF
5336
5337 !Force with respect to the third center (AO basis, apc_img - b_img)
5338 !Note: tensors are stacked along the 3rd direction
5339 new_force = pref*sum(der_blk_3(:, :, :)*contr_blk(:, :, :))
5340 idx = (ind(3) - 1)/nblks_ao + 1
5341 kat = idx_to_at_ao(ind(3) - (idx - 1)*nblks_ao)
5342 kat_of_kind = atom_of_kind(kat)
5343 kkind = kind_of(kat)
5344
5345!$OMP ATOMIC
5346 force(kkind)%fock_4c(i_xyz, kat_of_kind) = force(kkind)%fock_4c(i_xyz, kat_of_kind) &
5347 + new_force
5348
5349 IF (use_virial) THEN
5350 CALL real_to_scaled(scoord, pbc(particle_set(kat)%r, cell), cell)
5351 scoord(:) = scoord(:) + real(index_to_cell(:, i_images(lb_img - 1 + idx)), dp)
5352
5353 DO j_xyz = 1, 3
5354!$OMP ATOMIC
5355 work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5356 END DO
5357 END IF
5358
5359 DEALLOCATE (der_blk_1, der_blk_2, der_blk_3)
5360 END DO !i_xyz
5361 DEALLOCATE (contr_blk)
5362 END IF !found
5363 END DO !iter
5364 CALL dbt_iterator_stop(iter)
5365!$OMP END PARALLEL
5366 CALL timestop(handle)
5367
5368 END SUBROUTINE get_force_from_3c_trace
5369
5370END MODULE
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static GRID_HOST_DEVICE double fac(const int i)
Factorial function, e.g. fac(5) = 5! = 120.
Definition grid_common.h:51
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
Types and set/get functions for auxiliary density matrix methods.
Definition admm_types.F:15
subroutine, public get_admm_env(admm_env, mo_derivs_aux_fit, mos_aux_fit, sab_aux_fit, sab_aux_fit_asymm, sab_aux_fit_vs_orb, matrix_s_aux_fit, matrix_s_aux_fit_kp, matrix_s_aux_fit_vs_orb, matrix_s_aux_fit_vs_orb_kp, task_list_aux_fit, matrix_ks_aux_fit, matrix_ks_aux_fit_kp, matrix_ks_aux_fit_im, matrix_ks_aux_fit_dft, matrix_ks_aux_fit_hfx, matrix_ks_aux_fit_dft_kp, matrix_ks_aux_fit_hfx_kp, rho_aux_fit, rho_aux_fit_buffer, admm_dm)
Get routine for the ADMM env.
Definition admm_types.F:593
Define the atomic kind types and their sub types.
subroutine, public get_atomic_kind_set(atomic_kind_set, atom_of_kind, kind_of, natom_of_kind, maxatom, natom, nshell, fist_potential_present, shell_present, shell_adiabatic, shell_check_distance, damping_present)
Get attributes of an atomic kind set.
subroutine, public get_gto_basis_set(gto_basis_set, name, aliases, norm_type, kind_radius, ncgf, nset, nsgf, cgf_symbol, sgf_symbol, norm_cgf, set_radius, lmax, lmin, lx, ly, lz, m, ncgf_set, npgf, nsgf_set, nshell, cphi, pgf_radius, sphi, scon, zet, first_cgf, first_sgf, l, last_cgf, last_sgf, n, gcc, maxco, maxl, maxpgf, maxsgf_set, maxshell, maxso, nco_sum, npgf_sum, nshell_sum, maxder, short_kind_radius, npgf_seg_sum)
...
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public bussy2024
Handles all functions related to the CELL.
Definition cell_types.F:15
subroutine, public scaled_to_real(r, s, cell)
Transform scaled cell coordinates real coordinates. r=h*s.
Definition cell_types.F:516
subroutine, public real_to_scaled(s, r, cell)
Transform real to scaled cell coordinates. s=h_inv*r.
Definition cell_types.F:486
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
subroutine, public dbcsr_distribution_release(dist)
...
subroutine, public dbcsr_distribution_new(dist, template, group, pgrid, row_dist, col_dist, reuse_arrays)
...
subroutine, public dbcsr_iterator_next_block(iterator, row, column, block, block_number_argument_has_been_removed, row_size, col_size, row_offset, col_offset)
...
logical function, public dbcsr_iterator_blocks_left(iterator)
...
subroutine, public dbcsr_iterator_stop(iterator)
...
subroutine, public dbcsr_copy(matrix_b, matrix_a, name, keep_sparsity, keep_imaginary)
...
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_filter(matrix, eps)
...
subroutine, public dbcsr_finalize(matrix)
...
subroutine, public dbcsr_iterator_start(iterator, matrix, shared, dynamic, dynamic_byrows)
...
subroutine, public dbcsr_release(matrix)
...
subroutine, public dbcsr_clear(matrix)
...
subroutine, public dbcsr_put_block(matrix, row, col, block, summation)
...
subroutine, public dbcsr_add(matrix_a, matrix_b, alpha_scalar, beta_scalar)
...
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
Interface to (sca)lapack for the Cholesky based procedures.
subroutine, public cp_dbcsr_cholesky_decompose(matrix, n, para_env, blacs_env)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
subroutine, public cp_dbcsr_cholesky_invert(matrix, n, para_env, blacs_env, uplo_to_full)
used to replace the cholesky decomposition by the inverse
subroutine, public dbcsr_dot(matrix_a, matrix_b, trace)
Computes the dot product of two matrices, also known as the trace of their matrix product.
Interface to (sca)lapack for the Cholesky based procedures.
subroutine, public cp_dbcsr_power(matrix, exponent, threshold, n_dependent, para_env, blacs_env, verbose, eigenvectors, eigenvalues)
...
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_dist2d_to_dist(dist2d, dist)
Creates a DBCSR distribution from a distribution_2d.
This is the start of a dbt_api, all publically needed functions are exported here....
Definition dbt_api.F:17
stores a mapping of 2D info (e.g. matrix) on a 2D processor distribution (i.e. blacs grid) where cpus...
subroutine, public distribution_2d_create(distribution_2d, blacs_env, local_rows_ptr, n_local_rows, local_cols_ptr, row_distribution_ptr, col_distribution_ptr, n_local_cols, n_row_distribution, n_col_distribution)
initializes the distribution_2d
subroutine, public distribution_2d_release(distribution_2d)
...
RI-methods for HFX and K-points. \auhtor Augustin Bussy (01.2023)
Definition hfx_ri_kp.F:13
subroutine, public hfx_ri_update_forces_kp(qs_env, ri_data, nspins, hf_fraction, rho_ao, use_virial)
Update the K-points RI-HFX forces.
Definition hfx_ri_kp.F:861
subroutine, public hfx_ri_update_ks_kp(qs_env, ri_data, ks_matrix, ehfx, rho_ao, geometry_did_change, nspins, hf_fraction)
Update the KS matrices for each real-space image.
Definition hfx_ri_kp.F:455
RI-methods for HFX.
Definition hfx_ri.F:12
subroutine, public get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, deriv_dim)
This routines calculates the force contribution from a trace over 3D tensors, i.e....
Definition hfx_ri.F:3368
subroutine, public get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
a small utility function that returns the atom corresponding to a block of a split tensor
Definition hfx_ri.F:4238
subroutine, public hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_ri, t_2c_int_pot, t_3c_int, do_kpoints)
Calculate 2-center and 3-center integrals.
Definition hfx_ri.F:449
subroutine, public get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, do_ovlp)
Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
Definition hfx_ri.F:3454
Types and set/get functions for HFX.
Definition hfx_types.F:15
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public hfx_ri_do_2c_cholesky
integer, parameter, public hfx_ri_do_2c_diag
integer, parameter, public hfx_ri_do_2c_iter
integer, parameter, public do_potential_short
function that builds the hartree fock exchange section of the input
integer, parameter, public ri_pmat
objects that represent the structure of input sections and the data contained in an input section
subroutine, public section_vals_val_set(section_vals, keyword_name, i_rep_section, i_rep_val, val, l_val, i_val, r_val, c_val, l_vals_ptr, i_vals_ptr, r_vals_ptr, c_vals_ptr)
sets the requested value
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_val_get(section_vals, keyword_name, i_rep_section, i_rep_val, n_rep_val, val, l_val, i_val, r_val, c_val, l_vals, i_vals, r_vals, c_vals, explicit)
returns the requested value
Routines useful for iterative matrix calculations.
subroutine, public invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite matrix by Hotelling's method explicit symmetrization makes this ...
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
Types and basic routines needed for a kpoint calculation.
subroutine, public get_kpoint_info(kpoint, kp_scheme, nkp_grid, kp_shift, symmetry, verbose, full_grid, use_real_wfn, eps_geo, parallel_group_size, kp_range, nkp, xkp, wkp, para_env, blacs_env_all, para_env_kp, para_env_inter_kp, blacs_env, kp_env, kp_aux_env, mpools, iogrp, nkp_groups, kp_dist, cell_to_index, index_to_cell, sab_nl, sab_nl_nosym)
Retrieve information from a kpoint environment.
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
real(kind=dp), parameter, public cutoff_screen_factor
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:443
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:131
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:148
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
subroutine, public diag(n, a, d, v)
Diagonalize matrix a. The eigenvalues are returned in vector d and the eigenvectors are returned in m...
Definition mathlib.F:1496
subroutine, public erfc_cutoff(eps, omg, r_cutoff)
compute a truncation radius for the shortrange operator
Definition mathlib.F:1686
Interface to the message passing library MPI.
Define methods related to particle_type.
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Define the data structure for the particle information.
Definition of physical constants:
Definition physcon.F:68
real(kind=dp), parameter, public angstrom
Definition physcon.F:144
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, sab_cneo, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, rhoz_cneo_set, ecoul_1c, rho0_s_rs, rho0_s_gs, rhoz_cneo_s_rs, rhoz_cneo_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, harris_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, eeq, rhs, do_rixs, tb_tblite)
Get the QUICKSTEP environment.
Some utility functions for the calculation of integrals.
subroutine, public basis_set_list_setup(basis_set_list, basis_type, qs_kind_set)
Set up an easy accessible list of the basis sets for all kinds.
Calculate the interaction radii for the operator matrix calculation.
subroutine, public init_interaction_radii_orb_basis(orb_basis_set, eps_pgf_orb, eps_pgf_short)
...
Define the quickstep kind type and their sub types.
Define the neighbor list data types and the corresponding functionality.
subroutine, public release_neighbor_list_sets(nlists)
releases an array of neighbor_list_sets
subroutine, public neighbor_list_iterator_create(iterator_set, nl, search, nthread)
Neighbor list iterator functions.
subroutine, public neighbor_list_iterator_release(iterator_set)
...
integer function, public neighbor_list_iterate(iterator_set, mepos)
...
subroutine, public get_iterator_info(iterator_set, mepos, ikind, jkind, nkind, ilist, nlist, inode, nnode, iatom, jatom, r, cell)
...
module that contains the definitions of the scf types
Utility methods to build 3-center integral tensors of various types.
subroutine, public distribution_3d_create(dist_3d, dist1, dist2, dist3, nkind, particle_set, mp_comm_3d, own_comm)
Create a 3d distribution.
subroutine, public create_2c_tensor(t2c, dist_1, dist_2, pgrid, sizes_1, sizes_2, order, name)
...
subroutine, public create_tensor_batches(sizes, nbatches, starts_array, ends_array, starts_array_block, ends_array_block)
...
subroutine, public create_3c_tensor(t3c, dist_1, dist_2, dist_3, pgrid, sizes_1, sizes_2, sizes_3, map1, map2, name)
...
Utility methods to build 3-center integral tensors of various types.
Definition qs_tensors.F:11
subroutine, public build_3c_derivatives(t3c_der_i, t3c_der_k, filter_eps, qs_env, nl_3c, basis_i, basis_j, basis_k, potential_parameter, der_eps, op_pos, do_kpoints, do_hfx_kpoints, bounds_i, bounds_j, bounds_k, ri_range, img_to_ri_cell)
Build 3-center derivative tensors.
Definition qs_tensors.F:930
subroutine, public build_2c_neighbor_lists(ij_list, basis_i, basis_j, potential_parameter, name, qs_env, sym_ij, molecular, dist_2d, pot_to_rad)
Build 2-center neighborlists adapted to different operators This mainly wraps build_neighbor_lists fo...
Definition qs_tensors.F:144
recursive integer function, public neighbor_list_3c_iterate(iterator)
Iterate 3c-nl iterator.
Definition qs_tensors.F:469
subroutine, public neighbor_list_3c_iterator_destroy(iterator)
Destroy 3c-nl iterator.
Definition qs_tensors.F:447
subroutine, public neighbor_list_3c_destroy(ijk_list)
Destroy 3c neighborlist.
Definition qs_tensors.F:385
subroutine, public build_2c_derivatives(t2c_der, filter_eps, qs_env, nl_2c, basis_i, basis_j, potential_parameter, do_kpoints)
Calculates the derivatives of 2-center integrals, wrt to the first center.
subroutine, public get_tensor_occupancy(tensor, nze, occ)
...
subroutine, public build_3c_neighbor_lists(ijk_list, basis_i, basis_j, basis_k, dist_3d, potential_parameter, name, qs_env, sym_ij, sym_jk, sym_ik, molecular, op_pos, own_dist)
Build a 3-center neighbor list.
Definition qs_tensors.F:284
subroutine, public neighbor_list_3c_iterator_create(iterator, ijk_nl)
Create a 3-center neighborlist iterator.
Definition qs_tensors.F:402
subroutine, public get_3c_iterator_info(iterator, ikind, jkind, kkind, nkind, iatom, jatom, katom, rij, rjk, rik, cell_j, cell_k)
Get info of current iteration.
Definition qs_tensors.F:566
All kind of helpful little routines.
Definition util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333
Provides all information about an atomic kind.
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
represent a pointer to a 1d array
represent a pointer to a 2d array
represent a pointer to a 3d array
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
distributes pairs on a 2d grid of processors
Contains information about kpoints.
stores all the informations relevant to an mpi environment
Provides all information about a quickstep kind.