(git:374b731)
Loading...
Searching...
No Matches
hfx_derivatives.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines to calculate derivatives with respect to basis function origin
10!> \par History
11!> 04.2008 created [Manuel Guidon]
12!> \author Manuel Guidon
13! **************************************************************************************************
15 USE admm_types, ONLY: admm_type
18 USE cell_types, ONLY: cell_type, &
19 pbc
21 USE cp_files, ONLY: close_file, &
25 USE cp_output_handling, ONLY: cp_p_file, &
30 USE dbcsr_api, ONLY: dbcsr_get_matrix_type, &
31 dbcsr_p_type, &
32 dbcsr_type_antisymmetric
33 USE gamma, ONLY: init_md_ftable
52 USE hfx_types, ONLY: &
62 USE kinds, ONLY: dp, &
63 int_8
65 USE machine, ONLY: m_walltime
66 USE mathconstants, ONLY: fac
67 USE orbital_pointers, ONLY: nco, &
68 ncoset, &
69 nso
74 USE t_c_g0, ONLY: init
75 USE util, ONLY: sort
76 USE virial_types, ONLY: virial_type
77
78!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
79
80#include "./base/base_uses.f90"
81
82 IMPLICIT NONE
83 PRIVATE
84
86
87 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_derivatives'
88
89!***
90
91CONTAINS
92
93! **************************************************************************************************
94!> \brief computes four center derivatives for a full basis set and updates the
95!> forces%fock_4c arrays. Uses all 8 eri symmetries
96!> \param qs_env ...
97!> \param rho_ao density matrix
98!> \param rho_ao_resp relaxed density matrix from response
99!> \param hfx_section HFX input section
100!> \param para_env para_env
101!> \param irep ID of HFX replica
102!> \param use_virial ...
103!> \param adiabatic_rescale_factor parameter used for MCY3 hybrid
104!> \param resp_only Calculate only forces from response density
105!> \par History
106!> 06.2007 created [Manuel Guidon]
107!> 08.2007 optimized load balance [Manuel Guidon]
108!> 02.2009 completely rewritten screening part [Manuel Guidon]
109!> 12.2017 major bug fix. removed wrong cycle that was causing segfault.
110!> see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
111!> [Tobias Binninger + Valery Weber]
112!> \author Manuel Guidon
113! **************************************************************************************************
114 SUBROUTINE derivatives_four_center(qs_env, rho_ao, rho_ao_resp, hfx_section, para_env, &
115 irep, use_virial, adiabatic_rescale_factor, resp_only, &
116 external_x_data)
117
118 TYPE(qs_environment_type), POINTER :: qs_env
119 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
120 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: rho_ao_resp
121 TYPE(section_vals_type), POINTER :: hfx_section
122 TYPE(mp_para_env_type), POINTER :: para_env
123 INTEGER, INTENT(IN) :: irep
124 LOGICAL, INTENT(IN) :: use_virial
125 REAL(dp), INTENT(IN), OPTIONAL :: adiabatic_rescale_factor
126 LOGICAL, INTENT(IN), OPTIONAL :: resp_only
127 TYPE(hfx_type), DIMENSION(:, :), POINTER, OPTIONAL :: external_x_data
128
129 CHARACTER(LEN=*), PARAMETER :: routinen = 'derivatives_four_center'
130
131 INTEGER :: atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, bin, &
132 bits_max_val, buffer_left, buffer_size, buffer_start, cache_size, coord, current_counter, &
133 forces_map(4, 2), handle, handle_bin, handle_getp, handle_load, handle_main, i, i_atom, &
134 i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
135 i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_thread, iatom, iatom_block, &
136 iatom_end, iatom_start, ikind, iset, iw, j, j_atom, jatom, jatom_block, jatom_end, &
137 jatom_start, jkind, jset, k, k_atom, katom, katom_block, katom_end, katom_start
138 INTEGER :: kind_kind_idx, kkind, kset, l_atom, l_max, latom, latom_block, latom_end, &
139 latom_start, lkind, lset, max_am, max_pgf, max_set, my_bin_id, my_bin_size, my_thread_id, &
140 n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, nneighbors, nseta, &
141 nsetb, nsgf_max, nspins, sgfb, shm_task_counter, shm_total_bins, sphi_a_u1, sphi_a_u2, &
142 sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, &
143 sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
144 INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
145 mem_compression_counter, mem_eris, mem_max_val, my_current_counter, my_istart, &
146 n_processes, nblocks, ncpu, neris_incore, neris_onthefly, neris_tmp, neris_total, &
147 nprim_ints, shm_neris_incore, shm_neris_onthefly, shm_neris_total, shm_nprim_ints, &
148 shm_stor_count_max_val, shm_storage_counter_integrals, stor_count_max_val, &
149 storage_counter_integrals, tmp_block, tmp_i8(6)
150 INTEGER(int_8), ALLOCATABLE, DIMENSION(:) :: tmp_task_list_cost
151 INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, kind_of, last_sgf_global, &
152 nimages, tmp_index
153 INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
154 ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
155 INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
156 offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
157 INTEGER, DIMENSION(:, :), POINTER, SAVE :: shm_is_assoc_atomic_block
158 INTEGER, DIMENSION(:, :, :, :), POINTER :: shm_set_offset
159 INTEGER, SAVE :: shm_number_of_p_entries
160 LOGICAL :: bins_left, buffer_overflow, do_dynamic_load_balancing, do_it, do_periodic, &
161 do_print_load_balance_info, is_anti_symmetric, my_resp_only, screen_pmat_forces, &
162 treat_forces_in_core, use_disk_storage, with_resp_density
163 LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
164 REAL(dp) :: bintime_start, bintime_stop, cartesian_estimate, cartesian_estimate_virial, &
165 compression_factor, eps_schwarz, eps_storage, fac, hf_fraction, ln_10, log10_eps_schwarz, &
166 log10_pmax, log_2, max_contraction_val, max_val1, max_val2, max_val2_set, &
167 my_adiabatic_rescale_factor, pmax_atom, pmax_blocks, pmax_entry, pmax_tmp, ra(3), rab2, &
168 rb(3), rc(3), rcd2, rd(3), spherical_estimate, spherical_estimate_virial, symm_fac, &
169 tmp_virial(3, 3)
170 REAL(dp), ALLOCATABLE, DIMENSION(:) :: ede_buffer1, ede_buffer2, ede_primitives_tmp, &
171 ede_primitives_tmp_virial, ede_work, ede_work2, ede_work2_virial, ede_work_forces, &
172 ede_work_virial, pac_buf, pac_buf_beta, pac_buf_resp, pac_buf_resp_beta, pad_buf, &
173 pad_buf_beta, pad_buf_resp, pad_buf_resp_beta, pbc_buf, pbc_buf_beta, pbc_buf_resp, &
174 pbc_buf_resp_beta, pbd_buf, pbd_buf_beta, pbd_buf_resp, pbd_buf_resp_beta
175 REAL(dp), ALLOCATABLE, DIMENSION(:), TARGET :: primitive_forces, primitive_forces_virial
176 REAL(dp), DIMENSION(:), POINTER :: full_density_resp, &
177 full_density_resp_beta, t2
178 REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, &
179 max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, shm_pmax_block, &
180 sphi_b, zeta, zetb, zetc, zetd
181 REAL(dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
182 sphi_c_ext_set, sphi_d_ext_set
183 REAL(dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
184 sphi_d_ext
185 REAL(kind=dp) :: coeffs_kind_max0
186 TYPE(admm_type), POINTER :: admm_env
187 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
188 TYPE(cell_type), POINTER :: cell
189 TYPE(cp_libint_t) :: private_deriv
190 TYPE(cp_logger_type), POINTER :: logger
191 TYPE(dft_control_type), POINTER :: dft_control
192 TYPE(hfx_basis_info_type), POINTER :: basis_info
193 TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
194 TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches
195 TYPE(hfx_cache_type), POINTER :: maxval_cache
196 TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers
197 TYPE(hfx_container_type), POINTER :: maxval_container
198 TYPE(hfx_distribution), POINTER :: distribution_forces
199 TYPE(hfx_general_type) :: general_parameter
200 TYPE(hfx_load_balance_type), POINTER :: load_balance_parameter
201 TYPE(hfx_memory_type), POINTER :: memory_parameter
202 TYPE(hfx_p_kind), DIMENSION(:), POINTER :: shm_initial_p
203 TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
204 TYPE(hfx_pgf_product_list), ALLOCATABLE, &
205 DIMENSION(:) :: pgf_product_list
206 TYPE(hfx_potential_type) :: potential_parameter
207 TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
208 POINTER :: screen_coeffs_kind, tmp_r_1, tmp_r_2, &
209 tmp_screen_pgf1, tmp_screen_pgf2
210 TYPE(hfx_screen_coeff_type), &
211 DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
212 TYPE(hfx_screen_coeff_type), &
213 DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
214 TYPE(hfx_screening_type) :: screening_parameter
215 TYPE(hfx_task_list_type), DIMENSION(:), POINTER :: shm_task_list, tmp_task_list
216 TYPE(hfx_type), POINTER :: actual_x_data
217 TYPE(hfx_type), DIMENSION(:, :), POINTER :: my_x_data
218 TYPE(pair_list_type) :: list_ij, list_kl
219 TYPE(pair_set_list_type), ALLOCATABLE, &
220 DIMENSION(:) :: set_list_ij, set_list_kl
221 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
222 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
223 TYPE(virial_type), POINTER :: virial
224
225 NULLIFY (dft_control, admm_env)
226
227 with_resp_density = .false.
228 IF (ASSOCIATED(rho_ao_resp)) with_resp_density = .true.
229
230 my_resp_only = .false.
231 IF (PRESENT(resp_only)) my_resp_only = resp_only
232
233 is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) .EQ. dbcsr_type_antisymmetric
234
235 CALL get_qs_env(qs_env, &
236 admm_env=admm_env, &
237 particle_set=particle_set, &
238 atomic_kind_set=atomic_kind_set, &
239 cell=cell, &
240 dft_control=dft_control)
241
242 nspins = dft_control%nspins
243 nkimages = dft_control%nimages
244 cpassert(nkimages == 1)
245
246 my_x_data => qs_env%x_data
247 IF (PRESENT(external_x_data)) my_x_data => external_x_data
248
249 !! One atom systems have no contribution to forces
250 IF (SIZE(particle_set, 1) == 1) THEN
251 IF (.NOT. use_virial) RETURN
252 END IF
253
254 CALL timeset(routinen, handle)
255 !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
256 nkind = SIZE(atomic_kind_set, 1)
257 l_max = 0
258 DO ikind = 1, nkind
259 l_max = max(l_max, maxval(my_x_data(1, 1)%basis_parameter(ikind)%lmax))
260 END DO
261 l_max = 4*l_max + 1
262 CALL init_md_ftable(l_max)
263
264 IF (my_x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
265 my_x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
266 IF (l_max > init_t_c_g0_lmax) THEN
267 CALL open_file(unit_number=unit_id, file_name=my_x_data(1, 1)%potential_parameter%filename)
268 CALL init(l_max, unit_id, para_env%mepos, para_env)
269 CALL close_file(unit_id)
270 init_t_c_g0_lmax = l_max
271 END IF
272 END IF
273
274 n_threads = 1
275!$ n_threads = omp_get_max_threads()
276
277 shm_neris_total = 0
278 shm_nprim_ints = 0
279 shm_neris_onthefly = 0
280 shm_storage_counter_integrals = 0
281 shm_neris_incore = 0
282 shm_stor_count_max_val = 0
283
284 !! get force array
285 CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
286
287 my_adiabatic_rescale_factor = 1.0_dp
288 IF (PRESENT(adiabatic_rescale_factor)) THEN
289 my_adiabatic_rescale_factor = adiabatic_rescale_factor
290 END IF
291
292 !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
293!$OMP SHARED(qs_env,&
294!$OMP rho_ao,&
295!$OMP rho_ao_resp,&
296!$OMP with_resp_density,&
297!$OMP hfx_section,&
298!$OMP para_env,&
299!$OMP irep,&
300!$OMP my_resp_only,&
301!$OMP ncoset,&
302!$OMP nco,&
303!$OMP nso,&
304!$OMP n_threads,&
305!$OMP full_density_alpha,&
306!$OMP full_density_resp,&
307!$OMP full_density_resp_beta,&
308!$OMP full_density_beta,&
309!$OMP shm_initial_p,&
310!$OMP shm_is_assoc_atomic_block,&
311!$OMP shm_number_of_p_entries,&
312!$OMP force,&
313!$OMP virial,&
314!$OMP my_adiabatic_rescale_factor,&
315!$OMP shm_neris_total,&
316!$OMP shm_neris_onthefly,&
317!$OMP shm_storage_counter_integrals,&
318!$OMP shm_neris_incore,&
319!$OMP shm_nprim_ints,&
320!$OMP shm_stor_count_max_val,&
321!$OMP cell,&
322!$OMP screen_coeffs_set,&
323!$OMP screen_coeffs_kind,&
324!$OMP screen_coeffs_pgf,&
325!$OMP pgf_product_list_size,&
326!$OMP nkind,&
327!$OMP is_anti_symmetric,&
328!$OMP radii_pgf,&
329!$OMP shm_atomic_block_offset,&
330!$OMP shm_set_offset,&
331!$OMP shm_block_offset,&
332!$OMP shm_task_counter,&
333!$OMP shm_task_list,&
334!$OMP shm_total_bins,&
335!$OMP shm_pmax_block,&
336!$OMP shm_atomic_pair_list,&
337!$OMP shm_pmax_atom,&
338!$OMP use_virial,&
339!$OMP do_print_load_balance_info,&
340!$OMP nkimages,nspins,my_x_data)&
341!$OMP PRIVATE(actual_x_data,atomic_kind_set,atomic_offset_ac,atomic_offset_ad,atomic_offset_bc,&
342!$OMP atomic_offset_bd,atom_of_kind,basis_info,&
343!$OMP basis_parameter,bin,bins_left,bintime_start,bintime_stop,bits_max_val,buffer_left,buffer_overflow,buffer_size,&
344!$OMP buffer_start,cache_size,cartesian_estimate,cartesian_estimate_virial,&
345!$OMP coeffs_kind_max0,compression_factor,counter,current_counter,&
346!$OMP distribution_forces,do_dynamic_load_balancing,do_it,do_periodic,ede_buffer1,ede_buffer2,&
347!$OMP ede_primitives_tmp,ede_primitives_tmp_virial,&
348!$OMP ede_work,ede_work2,ede_work2_virial,ede_work_forces,ede_work_virial,eps_schwarz,eps_storage,estimate_to_store_int,&
349!$OMP fac,first_sgfb,forces_map,general_parameter,handle_bin,handle_getp,handle_load,&
350!$OMP handle_main,hf_fraction,i_atom,iatom_end,iatom_start,ikind,integral_caches,integral_containers,&
351!$OMP i_set_list_ij_start,i_set_list_ij_stop,i_set_list_kl_start,i_set_list_kl_stop,i_thread,iw,j_atom,jatom_end,&
352!$OMP jatom_start,jkind,katom,k_atom,katom_block,katom_end,katom_start,kind_kind_idx,&
353!$OMP kind_of,kkind,kset,la_max,la_min,last_sgf_global,latom,l_atom,&
354!$OMP latom_block,latom_end,latom_start,lb_max,lb_min,lc_max,lc_min,ld_max,&
355!$OMP ld_min,list_ij,list_kl,lkind,ln_10,load_balance_parameter,log10_eps_schwarz,log10_pmax,&
356!$OMP log_2,logger,lset,max_am,max_contraction,max_contraction_val,max_pgf,max_set,&
357!$OMP max_val1,max_val2,max_val2_set,maxval_cache,maxval_container,max_val_memory,mem_compression_counter,mem_eris,&
358!$OMP mem_max_val,memory_parameter,my_bin_id,my_bin_size,my_current_counter,my_istart,my_thread_id,natom,&
359!$OMP nbits,nblocks,ncob,ncos_max,ncpu,neris_incore,neris_onthefly,neris_tmp,&
360!$OMP neris_total,nimages,nints,nneighbors,npgfa,npgfb,npgfc,npgfd,&
361!$OMP nprim_ints,n_processes,nseta,nsetb,nsgfa,nsgfb,nsgfc,nsgfd,&
362!$OMP nsgfl_a,nsgfl_b,nsgfl_c,nsgfl_d,nsgf_max,offset_ac_set,offset_ad_set,&
363!$OMP offset_bc_set,offset_bd_set,pac_buf,pac_buf_beta,pac_buf_resp,pac_buf_resp_beta,pad_buf,pad_buf_beta,pad_buf_resp,&
364!$OMP pad_buf_resp_beta,particle_set,pbc_buf,pbc_buf_beta,pbc_buf_resp,pbc_buf_resp_beta,pbd_buf,pbd_buf_beta, &
365!$OMP pbd_buf_resp, pbd_buf_resp_beta, pgf_list_ij,&
366!$OMP pgf_list_kl,pgf_product_list,pmax_atom,pmax_blocks,pmax_entry,pmax_tmp,potential_parameter,primitive_forces,&
367!$OMP primitive_forces_virial,private_deriv,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,ra,rab2,&
368!$OMP rb,rc,rcd2,rd,screening_parameter,screen_pmat_forces,set_list_ij,set_list_kl,&
369!$OMP sgfb,spherical_estimate,spherical_estimate_virial,sphi_a_ext,sphi_a_ext_set,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
370!$OMP sphi_b,sphi_b_ext,sphi_b_ext_set,sphi_b_u1,sphi_b_u2,sphi_b_u3,sphi_c_ext,sphi_c_ext_set,&
371!$OMP sphi_c_u1,sphi_c_u2,sphi_c_u3,sphi_d_ext,sphi_d_ext_set,sphi_d_u1,sphi_d_u2,sphi_d_u3,&
372!$OMP storage_counter_integrals,stor_count_max_val,swap_id,symm_fac,t2,tmp_block,tmp_i8, tmp_i4,&
373!$OMP tmp_index,tmp_r_1,tmp_r_2,tmp_screen_pgf1,tmp_screen_pgf2,tmp_task_list,tmp_task_list_cost,tmp_virial,&
374!$OMP treat_forces_in_core,use_disk_storage,zeta,zetb,zetc,zetd)
375
376 i_thread = 0
377!$ i_thread = omp_get_thread_num()
378
379 ln_10 = log(10.0_dp)
380 log_2 = log10(2.0_dp)
381 actual_x_data => my_x_data(irep, i_thread + 1)
382 do_periodic = actual_x_data%periodic_parameter%do_periodic
383
384 screening_parameter = actual_x_data%screening_parameter
385 general_parameter = actual_x_data%general_parameter
386 potential_parameter = actual_x_data%potential_parameter
387 basis_info => actual_x_data%basis_info
388
389 load_balance_parameter => actual_x_data%load_balance_parameter
390 basis_parameter => actual_x_data%basis_parameter
391
392 ! IF(with_resp_density) THEN
393 ! ! here we also do a copy of the original load_balance_parameter
394 ! ! since if MP2 forces are required after the calculation of the HFX
395 ! ! derivatives we need to calculate again the SCF energy.
396 ! ALLOCATE(load_balance_parameter_energy)
397 ! load_balance_parameter_energy = actual_x_data%load_balance_parameter
398 ! END IF
399
400 memory_parameter => actual_x_data%memory_parameter
401
402 cache_size = memory_parameter%cache_size
403 bits_max_val = memory_parameter%bits_max_val
404
405 use_disk_storage = .false.
406
407 CALL get_qs_env(qs_env=qs_env, &
408 atomic_kind_set=atomic_kind_set, &
409 particle_set=particle_set)
410
411 max_set = basis_info%max_set
412 max_am = basis_info%max_am
413 natom = SIZE(particle_set, 1)
414
415 treat_forces_in_core = memory_parameter%treat_forces_in_core
416
417 hf_fraction = general_parameter%fraction
418 hf_fraction = hf_fraction*my_adiabatic_rescale_factor
419 eps_schwarz = screening_parameter%eps_schwarz_forces
420 IF (eps_schwarz < 0.0_dp) THEN
421 log10_eps_schwarz = log_zero
422 ELSE
423 !! ** make eps_schwarz tighter in case of a stress calculation
424 IF (use_virial) eps_schwarz = eps_schwarz/actual_x_data%periodic_parameter%R_max_stress
425 log10_eps_schwarz = log10(eps_schwarz)
426 END IF
427 screen_pmat_forces = screening_parameter%do_p_screening_forces
428 ! don't do density screening in case of response forces
429 IF (with_resp_density) screen_pmat_forces = .false.
430
431 eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
432
433 counter = 0_int_8
434
435 !! Copy the libint_guy
436 private_deriv = actual_x_data%lib_deriv
437
438 !! Get screening parameter
439
440 CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, &
441 atom_of_kind=atom_of_kind, &
442 kind_of=kind_of)
443
444 !! Create helper arrray for mapping local basis functions to global ones
445 ALLOCATE (last_sgf_global(0:natom))
446 last_sgf_global(0) = 0
447 DO iatom = 1, natom
448 ikind = kind_of(iatom)
449 last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
450 END DO
451
452 ALLOCATE (max_contraction(max_set, natom))
453
454 max_contraction = 0.0_dp
455 max_pgf = 0
456 DO jatom = 1, natom
457 jkind = kind_of(jatom)
458 lb_max => basis_parameter(jkind)%lmax
459 npgfb => basis_parameter(jkind)%npgf
460 nsetb = basis_parameter(jkind)%nset
461 first_sgfb => basis_parameter(jkind)%first_sgf
462 sphi_b => basis_parameter(jkind)%sphi
463 nsgfb => basis_parameter(jkind)%nsgf
464 DO jset = 1, nsetb
465 ! takes the primitive to contracted transformation into account
466 ncob = npgfb(jset)*ncoset(lb_max(jset))
467 sgfb = first_sgfb(1, jset)
468 ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
469 ! the maximum value after multiplication with sphi_b
470 max_contraction(jset, jatom) = maxval((/(sum(abs(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
471 max_pgf = max(max_pgf, npgfb(jset))
472 END DO
473 END DO
474
475 ncpu = para_env%num_pe
476 n_processes = ncpu*n_threads
477
478 !! initialize some counters
479 neris_total = 0_int_8
480 neris_incore = 0_int_8
481 neris_onthefly = 0_int_8
482 mem_eris = 0_int_8
483 mem_max_val = 0_int_8
484 compression_factor = 0.0_dp
485 nprim_ints = 0_int_8
486 neris_tmp = 0_int_8
487 max_val_memory = 1_int_8
488
489!$OMP MASTER
490 !! Set pointer for is_assoc helper
491 shm_is_assoc_atomic_block => actual_x_data%is_assoc_atomic_block
492 shm_number_of_p_entries = actual_x_data%number_of_p_entries
493 shm_atomic_block_offset => actual_x_data%atomic_block_offset
494 shm_set_offset => actual_x_data%set_offset
495 shm_block_offset => actual_x_data%block_offset
496
497 NULLIFY (full_density_alpha)
498 NULLIFY (full_density_beta)
499 ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
500
501 !! Get the full density from all the processors
502 CALL timeset(routinen//"_getP", handle_getp)
503 CALL get_full_density(para_env, full_density_alpha(:, 1), rho_ao(1, 1)%matrix, shm_number_of_p_entries, &
504 shm_block_offset, &
505 kind_of, basis_parameter, get_max_vals_spin=.false., &
506 antisymmetric=is_anti_symmetric)
507 ! for now only closed shell case
508 IF (with_resp_density) THEN
509 NULLIFY (full_density_resp)
510 ALLOCATE (full_density_resp(shm_block_offset(ncpu + 1)))
511 CALL get_full_density(para_env, full_density_resp, rho_ao_resp(1)%matrix, shm_number_of_p_entries, &
512 shm_block_offset, &
513 kind_of, basis_parameter, get_max_vals_spin=.false., &
514 antisymmetric=is_anti_symmetric)
515 END IF
516
517 IF (nspins == 2) THEN
518 ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), 1))
519 CALL get_full_density(para_env, full_density_beta(:, 1), rho_ao(2, 1)%matrix, shm_number_of_p_entries, &
520 shm_block_offset, &
521 kind_of, basis_parameter, get_max_vals_spin=.false., &
522 antisymmetric=is_anti_symmetric)
523 ! With resp density
524 IF (with_resp_density) THEN
525 NULLIFY (full_density_resp_beta)
526 ALLOCATE (full_density_resp_beta(shm_block_offset(ncpu + 1)))
527 CALL get_full_density(para_env, full_density_resp_beta, rho_ao_resp(2)%matrix, shm_number_of_p_entries, &
528 shm_block_offset, &
529 kind_of, basis_parameter, get_max_vals_spin=.false., &
530 antisymmetric=is_anti_symmetric)
531 END IF
532
533 END IF
534 CALL timestop(handle_getp)
535
536 !! Calculate max entries for screening on actual density. If screen_p_mat_forces = FALSE, the
537 !! matrix is initialized to 1.0
538 IF (screen_pmat_forces) THEN
539 NULLIFY (shm_initial_p)
540 shm_initial_p => actual_x_data%initial_p_forces
541 shm_pmax_atom => actual_x_data%pmax_atom_forces
542 IF (memory_parameter%recalc_forces) THEN
543 CALL update_pmax_mat(shm_initial_p, &
544 actual_x_data%map_atom_to_kind_atom, &
545 actual_x_data%set_offset, &
546 actual_x_data%atomic_block_offset, &
547 shm_pmax_atom, &
548 full_density_alpha, full_density_beta, &
549 natom, kind_of, basis_parameter, &
550 nkind, actual_x_data%is_assoc_atomic_block)
551 END IF
552 END IF
553
554 ! restore as full density the HF density
555 ! maybe in the future
556 IF (with_resp_density .AND. .NOT. my_resp_only) THEN
557 full_density_alpha(:, 1) = full_density_alpha(:, 1) - full_density_resp
558 IF (nspins == 2) THEN
559 full_density_beta(:, 1) = &
560 full_density_beta(:, 1) - full_density_resp_beta
561 END IF
562 ! full_density_resp=full_density+full_density_resp
563 END IF
564
565 screen_coeffs_set => actual_x_data%screen_funct_coeffs_set
566 screen_coeffs_kind => actual_x_data%screen_funct_coeffs_kind
567 screen_coeffs_pgf => actual_x_data%screen_funct_coeffs_pgf
568 radii_pgf => actual_x_data%pair_dist_radii_pgf
569 shm_pmax_block => actual_x_data%pmax_block
570
571!$OMP END MASTER
572!$OMP BARRIER
573
574!$OMP MASTER
575 CALL timeset(routinen//"_load", handle_load)
576!$OMP END MASTER
577 !! Load balance the work
578 IF (actual_x_data%memory_parameter%recalc_forces) THEN
579 IF (actual_x_data%b_first_load_balance_forces) THEN
580 CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
581 screen_coeffs_set, screen_coeffs_kind, &
582 shm_is_assoc_atomic_block, do_periodic, &
583 load_balance_parameter, kind_of, basis_parameter, shm_initial_p, &
584 shm_pmax_atom, i_thread, n_threads, &
585 cell, screen_pmat_forces, actual_x_data%map_atom_to_kind_atom, &
586 nkind, hfx_do_eval_forces, shm_pmax_block, use_virial)
587 actual_x_data%b_first_load_balance_forces = .false.
588 ELSE
589 CALL hfx_update_load_balance(actual_x_data, para_env, &
590 load_balance_parameter, &
591 i_thread, n_threads, hfx_do_eval_forces)
592 END IF
593 END IF
594!$OMP MASTER
595 CALL timestop(handle_load)
596!$OMP END MASTER
597
598 !! precompute maximum nco and allocate scratch
599 ncos_max = 0
600 nsgf_max = 0
601 DO iatom = 1, natom
602 ikind = kind_of(iatom)
603 nseta = basis_parameter(ikind)%nset
604 npgfa => basis_parameter(ikind)%npgf
605 la_max => basis_parameter(ikind)%lmax
606 nsgfa => basis_parameter(ikind)%nsgf
607 DO iset = 1, nseta
608 ncos_max = max(ncos_max, ncoset(la_max(iset)))
609 nsgf_max = max(nsgf_max, nsgfa(iset))
610 END DO
611 END DO
612
613 !! Allocate work arrays
614 ALLOCATE (primitive_forces(12*nsgf_max**4))
615 primitive_forces = 0.0_dp
616
617 ! ** Allocate buffers for pgf_lists
618 nneighbors = SIZE(actual_x_data%neighbor_cells)
619 ALLOCATE (pgf_list_ij(max_pgf**2))
620 ALLOCATE (pgf_list_kl(max_pgf**2))
621!$OMP ATOMIC READ
622 tmp_i4 = pgf_product_list_size
623 ALLOCATE (pgf_product_list(tmp_i4))
624 ALLOCATE (nimages(max_pgf**2))
625
626 DO i = 1, max_pgf**2
627 ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
628 ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
629 END DO
630
631 ALLOCATE (pbd_buf(nsgf_max**2))
632 ALLOCATE (pbc_buf(nsgf_max**2))
633 ALLOCATE (pad_buf(nsgf_max**2))
634 ALLOCATE (pac_buf(nsgf_max**2))
635 IF (with_resp_density) THEN
636 ALLOCATE (pbd_buf_resp(nsgf_max**2))
637 ALLOCATE (pbc_buf_resp(nsgf_max**2))
638 ALLOCATE (pad_buf_resp(nsgf_max**2))
639 ALLOCATE (pac_buf_resp(nsgf_max**2))
640 END IF
641 IF (nspins == 2) THEN
642 ALLOCATE (pbd_buf_beta(nsgf_max**2))
643 ALLOCATE (pbc_buf_beta(nsgf_max**2))
644 ALLOCATE (pad_buf_beta(nsgf_max**2))
645 ALLOCATE (pac_buf_beta(nsgf_max**2))
646 IF (with_resp_density) THEN
647 ALLOCATE (pbd_buf_resp_beta(nsgf_max**2))
648 ALLOCATE (pbc_buf_resp_beta(nsgf_max**2))
649 ALLOCATE (pad_buf_resp_beta(nsgf_max**2))
650 ALLOCATE (pac_buf_resp_beta(nsgf_max**2))
651 END IF
652 END IF
653
654 ALLOCATE (ede_work(ncos_max**4*12))
655 ALLOCATE (ede_work2(ncos_max**4*12))
656 ALLOCATE (ede_work_forces(ncos_max**4*12))
657 ALLOCATE (ede_buffer1(ncos_max**4))
658 ALLOCATE (ede_buffer2(ncos_max**4))
659 ALLOCATE (ede_primitives_tmp(nsgf_max**4))
660
661 IF (use_virial) THEN
662 ALLOCATE (primitive_forces_virial(12*nsgf_max**4*3))
663 primitive_forces_virial = 0.0_dp
664 ALLOCATE (ede_work_virial(ncos_max**4*12*3))
665 ALLOCATE (ede_work2_virial(ncos_max**4*12*3))
666 ALLOCATE (ede_primitives_tmp_virial(nsgf_max**4))
667 tmp_virial = 0.0_dp
668 ELSE
669 ! dummy allocation
670 ALLOCATE (primitive_forces_virial(1))
671 primitive_forces_virial = 0.0_dp
672 ALLOCATE (ede_work_virial(1))
673 ALLOCATE (ede_work2_virial(1))
674 ALLOCATE (ede_primitives_tmp_virial(1))
675 END IF
676
677 !! Start caluclating integrals of the form (ab|cd) or (ij|kl)
678 !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
679 !!
680 !! (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
681 !!
682 !! by iterating in the following way
683 !!
684 !! DO iatom=1,natom and DO katom=1,natom
685 !! DO jatom=iatom,natom DO latom=katom,natom
686 !!
687 !! The third symmetry
688 !!
689 !! (ab|cd) = (cd|ab)
690 !!
691 !! is taken into account by the following criterion:
692 !!
693 !! IF(katom+latom<=iatom+jatom) THEN
694 !! IF( ((iatom+jatom).EQ.(katom+latom) ) .AND.(katom<iatom)) CYCLE
695 !!
696 !! Furthermore, if iatom==jatom==katom==latom we cycle, because the derivatives are zero anyway.
697 !!
698 !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
699 !! factor ( symm_fac ).
700 !!
701 !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
702 !! different hierarchies of short range screening matrices.
703 !!
704 !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
705 !! defined as :
706 !!
707 !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
708 !!
709 !! This tells the process where to start the main loops and how many bunches of integrals it has to
710 !! calculate. The original parallelization is a simple modulo distribution that is binned and
711 !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
712 !! we need to know which was the initial cpu_id.
713 !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
714 !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
715 !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
716
717!$OMP BARRIER
718!$OMP MASTER
719 CALL timeset(routinen//"_main", handle_main)
720!$OMP END MASTER
721!$OMP BARRIER
722
723 coeffs_kind_max0 = maxval(screen_coeffs_kind(:, :)%x(2))
724 ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
725 ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
726
727!$OMP BARRIER
728
729 IF (actual_x_data%memory_parameter%recalc_forces) THEN
730 actual_x_data%distribution_forces%ram_counter = huge(distribution_forces%ram_counter)
731 memory_parameter%ram_counter_forces = huge(memory_parameter%ram_counter_forces)
732 END IF
733
734 IF (treat_forces_in_core) THEN
735 buffer_overflow = .false.
736 ELSE
737 buffer_overflow = .true.
738 END IF
739
740 do_dynamic_load_balancing = .true.
741 IF (n_threads == 1) do_dynamic_load_balancing = .false.
742
743 IF (do_dynamic_load_balancing) THEN
744 my_bin_size = SIZE(actual_x_data%distribution_forces)
745 ELSE
746 my_bin_size = 1
747 END IF
748 !! We do not need the containers if MAX_MEM = 0
749 IF (treat_forces_in_core) THEN
750 !! IF new md step -> reinitialize containers
751 IF (actual_x_data%memory_parameter%recalc_forces) THEN
752 CALL dealloc_containers(actual_x_data%store_forces, memory_parameter%actual_memory_usage)
753 CALL alloc_containers(actual_x_data%store_forces, my_bin_size)
754
755 DO bin = 1, my_bin_size
756 maxval_container => actual_x_data%store_forces%maxval_container(bin)
757 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
758 CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .false.)
759 DO i = 1, 64
760 CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .false.)
761 END DO
762 END DO
763 END IF
764
765 !! Decompress the first cache for maxvals and integrals
766 IF (.NOT. actual_x_data%memory_parameter%recalc_forces) THEN
767 DO bin = 1, my_bin_size
768 maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
769 maxval_container => actual_x_data%store_forces%maxval_container(bin)
770 integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
771 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
772 CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
773 memory_parameter%actual_memory_usage, .false.)
774 DO i = 1, 64
775 CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
776 memory_parameter%actual_memory_usage, .false.)
777 END DO
778 END DO
779 END IF
780 END IF
781
782!$OMP BARRIER
783!$OMP MASTER
784 IF (do_dynamic_load_balancing) THEN
785 ! ** Lets construct the task list
786 shm_total_bins = 0
787 DO i = 1, n_threads
788 shm_total_bins = shm_total_bins + SIZE(my_x_data(irep, i)%distribution_forces)
789 END DO
790 ALLOCATE (tmp_task_list(shm_total_bins))
791 shm_task_counter = 0
792 DO i = 1, n_threads
793 DO bin = 1, SIZE(my_x_data(irep, i)%distribution_forces)
794 shm_task_counter = shm_task_counter + 1
795 tmp_task_list(shm_task_counter)%thread_id = i
796 tmp_task_list(shm_task_counter)%bin_id = bin
797 tmp_task_list(shm_task_counter)%cost = my_x_data(irep, i)%distribution_forces(bin)%cost
798 END DO
799 END DO
800
801 ! ** Now sort the task list
802 ALLOCATE (tmp_task_list_cost(shm_total_bins))
803 ALLOCATE (tmp_index(shm_total_bins))
804
805 DO i = 1, shm_total_bins
806 tmp_task_list_cost(i) = tmp_task_list(i)%cost
807 END DO
808
809 CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
810
811 ALLOCATE (actual_x_data%task_list(shm_total_bins))
812
813 DO i = 1, shm_total_bins
814 actual_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
815 END DO
816
817 shm_task_list => actual_x_data%task_list
818 shm_task_counter = 0
819
820 DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
821 END IF
822
823 !! precalculate maximum density matrix elements in blocks
824 shm_pmax_block => actual_x_data%pmax_block
825 actual_x_data%pmax_block = 0.0_dp
826 IF (screen_pmat_forces) THEN
827 DO iatom_block = 1, SIZE(actual_x_data%blocks)
828 iatom_start = actual_x_data%blocks(iatom_block)%istart
829 iatom_end = actual_x_data%blocks(iatom_block)%iend
830 DO jatom_block = 1, SIZE(actual_x_data%blocks)
831 jatom_start = actual_x_data%blocks(jatom_block)%istart
832 jatom_end = actual_x_data%blocks(jatom_block)%iend
833 shm_pmax_block(iatom_block, jatom_block) = maxval(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
834 END DO
835 END DO
836 END IF
837 shm_atomic_pair_list => actual_x_data%atomic_pair_list_forces
838 IF (actual_x_data%memory_parameter%recalc_forces) THEN
839 CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
840 do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
841 actual_x_data%blocks)
842 END IF
843
844 my_bin_size = SIZE(actual_x_data%distribution_forces)
845 DO bin = 1, my_bin_size
846 actual_x_data%distribution_forces(bin)%time_forces = 0.0_dp
847 END DO
848!$OMP END MASTER
849!$OMP BARRIER
850
851 IF (treat_forces_in_core) THEN
852 IF (my_bin_size > 0) THEN
853 maxval_container => actual_x_data%store_forces%maxval_container(1)
854 maxval_cache => actual_x_data%store_forces%maxval_cache(1)
855
856 integral_containers => actual_x_data%store_forces%integral_containers(:, 1)
857 integral_caches => actual_x_data%store_forces%integral_caches(:, 1)
858 END IF
859 END IF
860
861 nblocks = load_balance_parameter%nblocks
862
863 bins_left = .true.
864 do_it = .true.
865 bin = 0
866 DO WHILE (bins_left)
867 IF (.NOT. do_dynamic_load_balancing) THEN
868 bin = bin + 1
869 IF (bin > my_bin_size) THEN
870 do_it = .false.
871 bins_left = .false.
872 ELSE
873 do_it = .true.
874 bins_left = .true.
875 distribution_forces => actual_x_data%distribution_forces(bin)
876 END IF
877 ELSE
878!$OMP CRITICAL(hfxderivatives_critical)
879 shm_task_counter = shm_task_counter + 1
880 IF (shm_task_counter <= shm_total_bins) THEN
881 my_thread_id = shm_task_list(shm_task_counter)%thread_id
882 my_bin_id = shm_task_list(shm_task_counter)%bin_id
883 IF (treat_forces_in_core) THEN
884 maxval_cache => my_x_data(irep, my_thread_id)%store_forces%maxval_cache(my_bin_id)
885 maxval_container => my_x_data(irep, my_thread_id)%store_forces%maxval_container(my_bin_id)
886 integral_caches => my_x_data(irep, my_thread_id)%store_forces%integral_caches(:, my_bin_id)
887 integral_containers => my_x_data(irep, my_thread_id)%store_forces%integral_containers(:, my_bin_id)
888 END IF
889 distribution_forces => my_x_data(irep, my_thread_id)%distribution_forces(my_bin_id)
890 do_it = .true.
891 bins_left = .true.
892 IF (actual_x_data%memory_parameter%recalc_forces) THEN
893 distribution_forces%ram_counter = huge(distribution_forces%ram_counter)
894 END IF
895 counter = 0_int_8
896 ELSE
897 do_it = .false.
898 bins_left = .false.
899 END IF
900!$OMP END CRITICAL(hfxderivatives_critical)
901 END IF
902
903 IF (.NOT. do_it) cycle
904!$OMP MASTER
905 CALL timeset(routinen//"_bin", handle_bin)
906!$OMP END MASTER
907 bintime_start = m_walltime()
908 my_istart = distribution_forces%istart
909 my_current_counter = 0
910 IF (distribution_forces%number_of_atom_quartets == 0 .OR. &
911 my_istart == -1_int_8) my_istart = nblocks**4
912 atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
913 latom_block = int(modulo(atom_block, nblocks)) + 1
914 tmp_block = atom_block/nblocks
915 katom_block = int(modulo(tmp_block, nblocks)) + 1
916 IF (latom_block < katom_block) cycle
917 tmp_block = tmp_block/nblocks
918 jatom_block = int(modulo(tmp_block, nblocks)) + 1
919 tmp_block = tmp_block/nblocks
920 iatom_block = int(modulo(tmp_block, nblocks)) + 1
921 IF (jatom_block < iatom_block) cycle
922 my_current_counter = my_current_counter + 1
923 IF (my_current_counter > distribution_forces%number_of_atom_quartets) EXIT atomic_blocks
924
925 iatom_start = actual_x_data%blocks(iatom_block)%istart
926 iatom_end = actual_x_data%blocks(iatom_block)%iend
927 jatom_start = actual_x_data%blocks(jatom_block)%istart
928 jatom_end = actual_x_data%blocks(jatom_block)%iend
929 katom_start = actual_x_data%blocks(katom_block)%istart
930 katom_end = actual_x_data%blocks(katom_block)%iend
931 latom_start = actual_x_data%blocks(latom_block)%istart
932 latom_end = actual_x_data%blocks(latom_block)%iend
933
934 pmax_blocks = max(shm_pmax_block(katom_block, iatom_block) + &
935 shm_pmax_block(latom_block, jatom_block), &
936 shm_pmax_block(latom_block, iatom_block) + &
937 shm_pmax_block(katom_block, jatom_block))
938
939 IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) cycle
940
941 CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
942 jatom_start, jatom_end, &
943 kind_of, basis_parameter, particle_set, &
944 do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
945 log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
946
947 CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
948 latom_start, latom_end, &
949 kind_of, basis_parameter, particle_set, &
950 do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
951 log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
952
953 DO i_list_ij = 1, list_ij%n_element
954 iatom = list_ij%elements(i_list_ij)%pair(1)
955 jatom = list_ij%elements(i_list_ij)%pair(2)
956 i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
957 i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
958 ikind = list_ij%elements(i_list_ij)%kind_pair(1)
959 jkind = list_ij%elements(i_list_ij)%kind_pair(2)
960 ra = list_ij%elements(i_list_ij)%r1
961 rb = list_ij%elements(i_list_ij)%r2
962 rab2 = list_ij%elements(i_list_ij)%dist2
963
964 la_max => basis_parameter(ikind)%lmax
965 la_min => basis_parameter(ikind)%lmin
966 npgfa => basis_parameter(ikind)%npgf
967 nseta = basis_parameter(ikind)%nset
968 zeta => basis_parameter(ikind)%zet
969 nsgfa => basis_parameter(ikind)%nsgf
970 sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
971 nsgfl_a => basis_parameter(ikind)%nsgfl
972 sphi_a_u1 = ubound(sphi_a_ext, 1)
973 sphi_a_u2 = ubound(sphi_a_ext, 2)
974 sphi_a_u3 = ubound(sphi_a_ext, 3)
975
976 lb_max => basis_parameter(jkind)%lmax
977 lb_min => basis_parameter(jkind)%lmin
978 npgfb => basis_parameter(jkind)%npgf
979 nsetb = basis_parameter(jkind)%nset
980 zetb => basis_parameter(jkind)%zet
981 nsgfb => basis_parameter(jkind)%nsgf
982 sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
983 nsgfl_b => basis_parameter(jkind)%nsgfl
984 sphi_b_u1 = ubound(sphi_b_ext, 1)
985 sphi_b_u2 = ubound(sphi_b_ext, 2)
986 sphi_b_u3 = ubound(sphi_b_ext, 3)
987
988 i_atom = atom_of_kind(iatom)
989 forces_map(1, 1) = ikind
990 forces_map(1, 2) = i_atom
991 j_atom = atom_of_kind(jatom)
992 forces_map(2, 1) = jkind
993 forces_map(2, 2) = j_atom
994
995 DO i_list_kl = 1, list_kl%n_element
996 katom = list_kl%elements(i_list_kl)%pair(1)
997 latom = list_kl%elements(i_list_kl)%pair(2)
998
999 !All four centers equivalent => zero-contribution
1000!VIIRAL IF((iatom==jatom .AND. iatom==katom .AND. iatom==latom)) CYCLE
1001 IF (.NOT. (katom + latom <= iatom + jatom)) cycle
1002 IF (((iatom + jatom) .EQ. (katom + latom)) .AND. (katom < iatom)) cycle
1003
1004 i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1005 i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1006 kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1007 lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1008 rc = list_kl%elements(i_list_kl)%r1
1009 rd = list_kl%elements(i_list_kl)%r2
1010 rcd2 = list_kl%elements(i_list_kl)%dist2
1011
1012 IF (screen_pmat_forces) THEN
1013 pmax_atom = max(shm_pmax_atom(katom, iatom) + &
1014 shm_pmax_atom(latom, jatom), &
1015 shm_pmax_atom(latom, iatom) + &
1016 shm_pmax_atom(katom, jatom))
1017 ELSE
1018 pmax_atom = 0.0_dp
1019 END IF
1020
1021 IF ((screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1022 screen_coeffs_kind(jkind, ikind)%x(2)) + &
1023 (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1024 screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) cycle
1025
1026 IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1027 shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1028 shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1029 shm_is_assoc_atomic_block(latom, jatom) >= 1)) cycle
1030 k_atom = atom_of_kind(katom)
1031 forces_map(3, 1) = kkind
1032 forces_map(3, 2) = k_atom
1033
1034 l_atom = atom_of_kind(latom)
1035 forces_map(4, 1) = lkind
1036 forces_map(4, 2) = l_atom
1037
1038 IF (nspins == 1) THEN
1039 fac = 0.25_dp*hf_fraction
1040 ELSE
1041 fac = 0.5_dp*hf_fraction
1042 END IF
1043 !calculate symmetry_factor
1044 symm_fac = 0.25_dp
1045 IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1046 IF (katom == latom) symm_fac = symm_fac*2.0_dp
1047 IF (iatom == katom .AND. jatom == latom .AND. &
1048 iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1049 IF (iatom == katom .AND. iatom == jatom .AND. &
1050 katom == latom) symm_fac = symm_fac*2.0_dp
1051
1052 symm_fac = 1.0_dp/symm_fac
1053 fac = fac*symm_fac
1054
1055 lc_max => basis_parameter(kkind)%lmax
1056 lc_min => basis_parameter(kkind)%lmin
1057 npgfc => basis_parameter(kkind)%npgf
1058 zetc => basis_parameter(kkind)%zet
1059 nsgfc => basis_parameter(kkind)%nsgf
1060 sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1061 nsgfl_c => basis_parameter(kkind)%nsgfl
1062 sphi_c_u1 = ubound(sphi_c_ext, 1)
1063 sphi_c_u2 = ubound(sphi_c_ext, 2)
1064 sphi_c_u3 = ubound(sphi_c_ext, 3)
1065
1066 ld_max => basis_parameter(lkind)%lmax
1067 ld_min => basis_parameter(lkind)%lmin
1068 npgfd => basis_parameter(lkind)%npgf
1069 zetd => basis_parameter(lkind)%zet
1070 nsgfd => basis_parameter(lkind)%nsgf
1071 sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1072 nsgfl_d => basis_parameter(lkind)%nsgfl
1073 sphi_d_u1 = ubound(sphi_d_ext, 1)
1074 sphi_d_u2 = ubound(sphi_d_ext, 2)
1075 sphi_d_u3 = ubound(sphi_d_ext, 3)
1076
1077 atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1078 atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1079 atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1080 atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1081
1082 IF (jatom < latom) THEN
1083 offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1084 ELSE
1085 offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1086 END IF
1087 IF (jatom < katom) THEN
1088 offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1089 ELSE
1090 offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1091 END IF
1092 IF (iatom < latom) THEN
1093 offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1094 ELSE
1095 offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1096 END IF
1097 IF (iatom < katom) THEN
1098 offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1099 ELSE
1100 offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1101 END IF
1102
1103 IF (screen_pmat_forces) THEN
1104 swap_id = 16
1105 kind_kind_idx = int(get_1d_idx(kkind, ikind, int(nkind, int_8)))
1106 IF (ikind >= kkind) THEN
1107 ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1108 actual_x_data%map_atom_to_kind_atom(katom), &
1109 actual_x_data%map_atom_to_kind_atom(iatom))
1110 ELSE
1111 ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1112 actual_x_data%map_atom_to_kind_atom(iatom), &
1113 actual_x_data%map_atom_to_kind_atom(katom))
1114 swap_id = swap_id + 1
1115 END IF
1116 kind_kind_idx = int(get_1d_idx(lkind, jkind, int(nkind, int_8)))
1117 IF (jkind >= lkind) THEN
1118 ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1119 actual_x_data%map_atom_to_kind_atom(latom), &
1120 actual_x_data%map_atom_to_kind_atom(jatom))
1121 ELSE
1122 ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1123 actual_x_data%map_atom_to_kind_atom(jatom), &
1124 actual_x_data%map_atom_to_kind_atom(latom))
1125 swap_id = swap_id + 2
1126 END IF
1127 kind_kind_idx = int(get_1d_idx(lkind, ikind, int(nkind, int_8)))
1128 IF (ikind >= lkind) THEN
1129 ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1130 actual_x_data%map_atom_to_kind_atom(latom), &
1131 actual_x_data%map_atom_to_kind_atom(iatom))
1132 ELSE
1133 ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1134 actual_x_data%map_atom_to_kind_atom(iatom), &
1135 actual_x_data%map_atom_to_kind_atom(latom))
1136 swap_id = swap_id + 4
1137 END IF
1138 kind_kind_idx = int(get_1d_idx(kkind, jkind, int(nkind, int_8)))
1139 IF (jkind >= kkind) THEN
1140 ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1141 actual_x_data%map_atom_to_kind_atom(katom), &
1142 actual_x_data%map_atom_to_kind_atom(jatom))
1143 ELSE
1144 ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1145 actual_x_data%map_atom_to_kind_atom(jatom), &
1146 actual_x_data%map_atom_to_kind_atom(katom))
1147 swap_id = swap_id + 8
1148 END IF
1149 END IF
1150
1151 !! At this stage, check for memory used in compression
1152
1153 IF (actual_x_data%memory_parameter%recalc_forces) THEN
1154 IF (treat_forces_in_core) THEN
1155 ! ** We know the maximum amount of integrals that we can store per MPI-process
1156 ! ** Now we need to sum the current memory usage among all openMP threads
1157 ! ** We can just read what is currently stored on the corresponding x_data type
1158 ! ** If this is thread i and it tries to read the data from thread j, that is
1159 ! ** currently writing that data, we just dont care, because the possible error
1160 ! ** is of the order of CACHE_SIZE
1161 mem_compression_counter = 0
1162 DO i = 1, n_threads
1163!$OMP ATOMIC READ
1164 tmp_i4 = my_x_data(irep, i)%memory_parameter%actual_memory_usage
1165 mem_compression_counter = mem_compression_counter + &
1166 tmp_i4*memory_parameter%cache_size
1167 END DO
1168 IF (mem_compression_counter + memory_parameter%final_comp_counter_energy &
1169 > memory_parameter%max_compression_counter) THEN
1170 buffer_overflow = .true.
1171 IF (do_dynamic_load_balancing) THEN
1172 distribution_forces%ram_counter = counter
1173 ELSE
1174 memory_parameter%ram_counter_forces = counter
1175 END IF
1176 ELSE
1177 counter = counter + 1
1178 buffer_overflow = .false.
1179 END IF
1180 END IF
1181 ELSE
1182 IF (treat_forces_in_core) THEN
1183 IF (do_dynamic_load_balancing) THEN
1184 IF (distribution_forces%ram_counter == counter) THEN
1185 buffer_overflow = .true.
1186 ELSE
1187 counter = counter + 1
1188 buffer_overflow = .false.
1189 END IF
1190 ELSE
1191 IF (memory_parameter%ram_counter_forces == counter) THEN
1192 buffer_overflow = .true.
1193 ELSE
1194 counter = counter + 1
1195 buffer_overflow = .false.
1196 END IF
1197 END IF
1198 END IF
1199 END IF
1200
1201 DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1202 iset = set_list_ij(i_set_list_ij)%pair(1)
1203 jset = set_list_ij(i_set_list_ij)%pair(2)
1204
1205 ncob = npgfb(jset)*ncoset(lb_max(jset))
1206 max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1207 screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1208 !! Near field screening
1209 IF (max_val1 + (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1210 screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) cycle
1211 sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1212 sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1213
1214 DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1215 kset = set_list_kl(i_set_list_kl)%pair(1)
1216 lset = set_list_kl(i_set_list_kl)%pair(2)
1217
1218 max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1219 screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1220 max_val2 = max_val1 + max_val2_set
1221
1222 !! Near field screening
1223 IF (max_val2 + pmax_atom < log10_eps_schwarz) cycle
1224 sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1225 sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1226
1227 IF (screen_pmat_forces) THEN
1228 CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1229 iset, jset, kset, lset, &
1230 pmax_tmp, swap_id)
1231
1232 log10_pmax = log_2 + pmax_tmp
1233 ELSE
1234 log10_pmax = 0.0_dp
1235 END IF
1236
1237 max_val2 = max_val2 + log10_pmax
1238 IF (max_val2 < log10_eps_schwarz) cycle
1239
1240 pmax_entry = exp(log10_pmax*ln_10)
1241 !! store current number of integrals, update total number and number of integrals in buffer
1242 current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1243 IF (buffer_overflow) THEN
1244 neris_onthefly = neris_onthefly + current_counter
1245 END IF
1246
1247 !! Get integrals from buffer and update Kohn-Sham matrix
1248 IF (.NOT. buffer_overflow .AND. .NOT. actual_x_data%memory_parameter%recalc_forces) THEN
1249 nints = current_counter
1250 CALL hfx_get_single_cache_element(estimate_to_store_int, 6, &
1251 maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1252 use_disk_storage)
1253 spherical_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
1254! IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1255 IF (.NOT. use_virial) THEN
1256 IF (spherical_estimate*pmax_entry < eps_schwarz) cycle
1257 END IF
1258 nbits = exponent(anint(spherical_estimate*pmax_entry/eps_storage)) + 1
1259 buffer_left = nints
1260 buffer_start = 1
1261 neris_incore = neris_incore + int(nints, int_8)
1262 DO WHILE (buffer_left > 0)
1263 buffer_size = min(buffer_left, cache_size)
1264 CALL hfx_get_mult_cache_elements(primitive_forces(buffer_start), &
1265 buffer_size, nbits, &
1266 integral_caches(nbits), &
1267 integral_containers(nbits), &
1268 eps_storage, pmax_entry, &
1269 memory_parameter%actual_memory_usage, &
1270 use_disk_storage)
1271 buffer_left = buffer_left - buffer_size
1272 buffer_start = buffer_start + buffer_size
1273 END DO
1274 END IF
1275 !! Calculate integrals if we run out of buffer or the geometry did change
1276 IF (actual_x_data%memory_parameter%recalc_forces .OR. buffer_overflow) THEN
1277 max_contraction_val = max_contraction(iset, iatom)* &
1278 max_contraction(jset, jatom)* &
1279 max_contraction(kset, katom)* &
1280 max_contraction(lset, latom)* &
1281 pmax_entry
1282 tmp_r_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1283 tmp_r_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1284 tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1285 tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1286
1287 CALL forces4(private_deriv, ra, rb, rc, rd, npgfa(iset), npgfb(jset), &
1288 npgfc(kset), npgfd(lset), &
1289 la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1290 lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1291 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1292 sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1293 sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1294 sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1295 sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1296 zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1297 zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1298 primitive_forces, &
1299 potential_parameter, &
1300 actual_x_data%neighbor_cells, &
1301 screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1302 screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1303 max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1304 log10_pmax, log10_eps_schwarz, &
1305 tmp_r_1, tmp_r_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1306 pgf_list_ij, pgf_list_kl, pgf_product_list, &
1307 nsgfl_a(:, iset), nsgfl_b(:, jset), &
1308 nsgfl_c(:, kset), nsgfl_d(:, lset), &
1309 sphi_a_ext_set, sphi_b_ext_set, sphi_c_ext_set, sphi_d_ext_set, &
1310 ede_work, ede_work2, ede_work_forces, &
1311 ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1312 nimages, do_periodic, use_virial, ede_work_virial, ede_work2_virial, &
1313 ede_primitives_tmp_virial, primitive_forces_virial, &
1314 cartesian_estimate_virial)
1315
1316 nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1317 neris_total = neris_total + nints
1318 nprim_ints = nprim_ints + neris_tmp
1319! IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
1320! estimate_to_store_int = EXPONENT(cartesian_estimate)
1321! estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1322! cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int+1)
1323! IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
1324! IF (cartesian_estimate < eps_schwarz) THEN
1325! CALL hfx_add_single_cache_element( &
1326! estimate_to_store_int, 6, &
1327! maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1328! use_disk_storage, max_val_memory)
1329! END IF
1330! END IF
1331!
1332! IF (.NOT. use_virial) THEN
1333! IF (cartesian_estimate < eps_schwarz) CYCLE
1334! END IF
1335
1336 !! Compress the array for storage
1337 spherical_estimate = 0.0_dp
1338 DO i = 1, nints
1339 spherical_estimate = max(spherical_estimate, abs(primitive_forces(i)))
1340 END DO
1341
1342 IF (use_virial) THEN
1343 spherical_estimate_virial = 0.0_dp
1344 DO i = 1, 3*nints
1345 spherical_estimate_virial = max(spherical_estimate_virial, abs(primitive_forces_virial(i)))
1346 END DO
1347 END IF
1348
1349 IF (spherical_estimate == 0.0_dp) spherical_estimate = tiny(spherical_estimate)
1350 estimate_to_store_int = exponent(spherical_estimate)
1351 estimate_to_store_int = max(estimate_to_store_int, -15_int_8)
1352 IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
1353 CALL hfx_add_single_cache_element(estimate_to_store_int, 6, &
1354 maxval_cache, maxval_container, &
1355 memory_parameter%actual_memory_usage, &
1356 use_disk_storage, max_val_memory)
1357 END IF
1358 spherical_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
1359 IF (.NOT. use_virial) THEN
1360 IF (spherical_estimate*pmax_entry < eps_schwarz) cycle
1361 END IF
1362 IF (.NOT. buffer_overflow) THEN
1363 nbits = exponent(anint(spherical_estimate*pmax_entry/eps_storage)) + 1
1364 buffer_left = nints
1365 buffer_start = 1
1366! neris_incore = neris_incore+nints
1367 neris_incore = neris_incore + int(nints, int_8)
1368
1369 DO WHILE (buffer_left > 0)
1370 buffer_size = min(buffer_left, cache_size)
1371 CALL hfx_add_mult_cache_elements(primitive_forces(buffer_start), &
1372 buffer_size, nbits, &
1373 integral_caches(nbits), &
1374 integral_containers(nbits), &
1375 eps_storage, pmax_entry, &
1376 memory_parameter%actual_memory_usage, &
1377 use_disk_storage)
1378 buffer_left = buffer_left - buffer_size
1379 buffer_start = buffer_start + buffer_size
1380 END DO
1381 ELSE
1382 !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
1383 !! but only if we treat forces in-core
1384 IF (treat_forces_in_core) THEN
1385 DO i = 1, nints
1386 primitive_forces(i) = primitive_forces(i)*pmax_entry
1387 IF (abs(primitive_forces(i)) > eps_storage) THEN
1388 primitive_forces(i) = anint(primitive_forces(i)/eps_storage, dp)*eps_storage/pmax_entry
1389 ELSE
1390 primitive_forces(i) = 0.0_dp
1391 END IF
1392 END DO
1393 END IF
1394 END IF
1395 END IF
1396 IF (with_resp_density) THEN
1397 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1398 full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1399 iatom, jatom, katom, latom, &
1400 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1401 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1402 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1403 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1404 full_density_resp, pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1405 iatom, jatom, katom, latom, &
1406 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1407 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1408 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1409 ELSE
1410 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1411 full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1412 iatom, jatom, katom, latom, &
1413 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1414 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1415 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1416 END IF
1417 IF (nspins == 2) THEN
1418 IF (with_resp_density) THEN
1419 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1420 full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, &
1421 pad_buf_beta, pac_buf_beta, iatom, jatom, katom, latom, &
1422 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1423 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1424 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1425 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1426 full_density_resp_beta, pbd_buf_resp_beta, pbc_buf_resp_beta, &
1427 pad_buf_resp_beta, pac_buf_resp_beta, iatom, jatom, katom, latom, &
1428 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1429 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1430 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1431 ELSE
1432 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1433 full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, pad_buf_beta, &
1434 pac_buf_beta, iatom, jatom, katom, latom, &
1435 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1436 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1437 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1438 END IF
1439 END IF
1440 IF (spherical_estimate*pmax_entry >= eps_schwarz) THEN
1441 DO coord = 1, 12
1442 t2 => primitive_forces((coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1443 coord*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1444
1445 IF (with_resp_density) THEN
1446 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1447 pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1448 t2, force, forces_map, coord, &
1449 pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1450 my_resp_only)
1451 ELSE
1452 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1453 pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1454 t2, force, forces_map, coord)
1455 END IF
1456 IF (nspins == 2) THEN
1457 IF (with_resp_density) THEN
1458 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1459 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1460 fac, t2, force, forces_map, coord, &
1461 pbd_buf_resp_beta, pbc_buf_resp_beta, &
1462 pad_buf_resp_beta, pac_buf_resp_beta, &
1463 my_resp_only)
1464 ELSE
1465 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1466 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, fac, &
1467 t2, force, forces_map, coord)
1468 END IF
1469 END IF
1470 END DO !coord
1471 END IF
1472 IF (use_virial .AND. spherical_estimate_virial*pmax_entry >= eps_schwarz) THEN
1473 DO coord = 1, 12
1474 DO i = 1, 3
1475 t2 => primitive_forces_virial( &
1476 ((i - 1)*12 + coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1477 ((i - 1)*12 + coord)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1478 IF (with_resp_density) THEN
1479 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1480 pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1481 t2, tmp_virial, coord, i, &
1482 pbd_buf_resp, pbc_buf_resp, pad_buf_resp, &
1483 pac_buf_resp, my_resp_only)
1484 ELSE
1485 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1486 pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1487 t2, tmp_virial, coord, i)
1488 END IF
1489 IF (nspins == 2) THEN
1490 IF (with_resp_density) THEN
1491 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1492 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1493 fac, t2, tmp_virial, coord, i, &
1494 pbd_buf_resp_beta, pbc_buf_resp_beta, &
1495 pad_buf_resp_beta, pac_buf_resp_beta, &
1496 my_resp_only)
1497 ELSE
1498 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1499 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1500 fac, t2, tmp_virial, coord, i)
1501 END IF
1502 END IF
1503 END DO
1504 END DO !coord
1505 END IF
1506
1507 END DO !i_set_list_kl
1508 END DO !i_set_list_ij
1509 END DO !i_list_kl
1510 END DO !i_list_ij
1511 END DO atomic_blocks
1512 bintime_stop = m_walltime()
1513 distribution_forces%time_forces = bintime_stop - bintime_start
1514!$OMP MASTER
1515 CALL timestop(handle_bin)
1516!$OMP END MASTER
1517 END DO !bin
1518
1519!$OMP MASTER
1520 logger => cp_get_default_logger()
1521 do_print_load_balance_info = .false.
1522 do_print_load_balance_info = btest(cp_print_key_should_output(logger%iter_info, hfx_section, &
1523 "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
1524!$OMP END MASTER
1525!$OMP BARRIER
1526 IF (do_print_load_balance_info) THEN
1527 iw = -1
1528!$OMP MASTER
1529 iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
1530 extension=".scfLog")
1531!$OMP END MASTER
1532
1533 CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
1535
1536!$OMP MASTER
1537 CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1538 "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1539!$OMP END MASTER
1540 END IF
1541
1542 IF (use_virial) THEN
1543 DO i = 1, 3
1544 DO j = 1, 3
1545 DO k = 1, 3
1546!$OMP ATOMIC
1547 virial%pv_fock_4c(i, j) = virial%pv_fock_4c(i, j) + tmp_virial(i, k)*cell%hmat(j, k)
1548 END DO
1549 END DO
1550 END DO
1551 END IF
1552
1553!$OMP BARRIER
1554 !! Get some number about ERIS
1555!$OMP ATOMIC
1556 shm_neris_total = shm_neris_total + neris_total
1557!$OMP ATOMIC
1558 shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1559!$OMP ATOMIC
1560 shm_nprim_ints = shm_nprim_ints + nprim_ints
1561
1562 storage_counter_integrals = memory_parameter%actual_memory_usage* &
1563 memory_parameter%cache_size
1564 stor_count_max_val = max_val_memory*memory_parameter%cache_size
1565!$OMP ATOMIC
1566 shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1567!$OMP ATOMIC
1568 shm_neris_incore = shm_neris_incore + neris_incore
1569!$OMP ATOMIC
1570 shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1571!$OMP BARRIER
1572
1573 ! IF(with_resp_density) THEN
1574 ! ! restore original load_balance_parameter
1575 ! actual_x_data%load_balance_parameter = load_balance_parameter_energy
1576 ! DEALLOCATE(load_balance_parameter_energy)
1577 ! END IF
1578
1579!$OMP MASTER
1580 !! Print some memeory information if this is the first step
1581 IF (actual_x_data%memory_parameter%recalc_forces) THEN
1582 tmp_i8(1:6) = (/shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, &
1583 shm_neris_total, shm_nprim_ints, shm_stor_count_max_val/)
1584 CALL para_env%sum(tmp_i8)
1585 shm_storage_counter_integrals = tmp_i8(1)
1586 shm_neris_onthefly = tmp_i8(2)
1587 shm_neris_incore = tmp_i8(3)
1588 shm_neris_total = tmp_i8(4)
1589 shm_nprim_ints = tmp_i8(5)
1590 shm_stor_count_max_val = tmp_i8(6)
1591 mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1592 compression_factor = real(shm_neris_incore, dp)/real(shm_storage_counter_integrals, dp)
1593 mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
1594
1595 IF (shm_neris_incore == 0) THEN
1596 mem_eris = 0
1597 compression_factor = 0.0_dp
1598 END IF
1599
1600 logger => cp_get_default_logger()
1601
1602 iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
1603 extension=".scfLog")
1604 IF (iw > 0) THEN
1605 WRITE (unit=iw, fmt="(/,(T3,A,T65,I16))") &
1606 "HFX_MEM_INFO| Number of cart. primitive DERIV's calculated: ", shm_nprim_ints
1607
1608 WRITE (unit=iw, fmt="((T3,A,T60,I21))") &
1609 "HFX_MEM_INFO| Number of sph. DERIV's calculated: ", shm_neris_total
1610
1611 WRITE (unit=iw, fmt="((T3,A,T60,I21))") &
1612 "HFX_MEM_INFO| Number of sph. DERIV's stored in-core: ", shm_neris_incore
1613
1614 WRITE (unit=iw, fmt="((T3,A,T62,I19))") &
1615 "HFX_MEM_INFO| Number of sph. DERIV's calculated on the fly: ", shm_neris_onthefly
1616
1617 WRITE (unit=iw, fmt="((T3,A,T60,I21))") &
1618 "HFX_MEM_INFO| Total memory consumption DERIV's RAM [MiB]: ", mem_eris
1619
1620 WRITE (unit=iw, fmt="((T3,A,T60,I21))") &
1621 "HFX_MEM_INFO| Whereof max-vals [MiB]: ", mem_max_val
1622
1623 WRITE (unit=iw, fmt="((T3,A,T60,F21.2),/)") &
1624 "HFX_MEM_INFO| Total compression factor DERIV's RAM: ", compression_factor
1625 END IF
1626
1627 CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1628 "HF_INFO")
1629 END IF
1630!$OMP END MASTER
1631
1632 !! flush caches if the geometry changed
1633 IF (do_dynamic_load_balancing) THEN
1634 my_bin_size = SIZE(actual_x_data%distribution_forces)
1635 ELSE
1636 my_bin_size = 1
1637 END IF
1638
1639 IF (actual_x_data%memory_parameter%recalc_forces) THEN
1640 IF (treat_forces_in_core) THEN
1641 DO bin = 1, my_bin_size
1642 maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1643 maxval_container => actual_x_data%store_forces%maxval_container(bin)
1644 integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1645 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1646 CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1647 .false.)
1648 DO i = 1, 64
1649 CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
1650 memory_parameter%actual_memory_usage, .false.)
1651 END DO
1652 END DO
1653 END IF
1654 END IF
1655 !! reset all caches except we calculate all on the fly
1656 IF (treat_forces_in_core) THEN
1657 DO bin = 1, my_bin_size
1658 maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1659 maxval_container => actual_x_data%store_forces%maxval_container(bin)
1660 integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1661 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1662
1663 CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .false.)
1664 DO i = 1, 64
1665 CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
1666 memory_parameter%actual_memory_usage, &
1667 .false.)
1668 END DO
1669 END DO
1670 END IF
1671
1672 IF (actual_x_data%memory_parameter%recalc_forces) THEN
1673 actual_x_data%memory_parameter%recalc_forces = .false.
1674 END IF
1675
1676!$OMP BARRIER
1677!$OMP MASTER
1678 CALL timestop(handle_main)
1679!$OMP END MASTER
1680!$OMP BARRIER
1681 DEALLOCATE (last_sgf_global)
1682!$OMP MASTER
1683 DEALLOCATE (full_density_alpha)
1684 IF (with_resp_density) THEN
1685 DEALLOCATE (full_density_resp)
1686 END IF
1687 IF (nspins == 2) THEN
1688 DEALLOCATE (full_density_beta)
1689 IF (with_resp_density) THEN
1690 DEALLOCATE (full_density_resp_beta)
1691 END IF
1692 END IF
1693 IF (do_dynamic_load_balancing) THEN
1694 DEALLOCATE (actual_x_data%task_list)
1695 END IF
1696!$OMP END MASTER
1697 DEALLOCATE (primitive_forces)
1698 DEALLOCATE (atom_of_kind, kind_of)
1699 DEALLOCATE (max_contraction)
1700 DEALLOCATE (pbd_buf)
1701 DEALLOCATE (pbc_buf)
1702 DEALLOCATE (pad_buf)
1703 DEALLOCATE (pac_buf)
1704
1705 IF (with_resp_density) THEN
1706 DEALLOCATE (pbd_buf_resp)
1707 DEALLOCATE (pbc_buf_resp)
1708 DEALLOCATE (pad_buf_resp)
1709 DEALLOCATE (pac_buf_resp)
1710 END IF
1711
1712 DO i = 1, max_pgf**2
1713 DEALLOCATE (pgf_list_ij(i)%image_list)
1714 DEALLOCATE (pgf_list_kl(i)%image_list)
1715 END DO
1716
1717 DEALLOCATE (pgf_list_ij)
1718 DEALLOCATE (pgf_list_kl)
1719 DEALLOCATE (pgf_product_list)
1720
1721 DEALLOCATE (set_list_ij, set_list_kl)
1722
1723 DEALLOCATE (primitive_forces_virial)
1724
1725 DEALLOCATE (ede_work, ede_work2, ede_work_forces, ede_buffer1, ede_buffer2, ede_primitives_tmp)
1726
1727 DEALLOCATE (ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial)
1728
1729 DEALLOCATE (nimages)
1730
1731 IF (nspins == 2) THEN
1732 DEALLOCATE (pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta)
1733 IF (with_resp_density) THEN
1734 DEALLOCATE (pbd_buf_resp_beta)
1735 DEALLOCATE (pbc_buf_resp_beta)
1736 DEALLOCATE (pad_buf_resp_beta)
1737 DEALLOCATE (pac_buf_resp_beta)
1738 END IF
1739 END IF
1740
1741 !
1742 ! this wraps to free_libint, but currently causes a segfault
1743 ! as a result, we don't call it, but some memory remains allocated
1744 !
1745 ! CALL terminate_libderiv(private_deriv)
1746
1747!$OMP END PARALLEL
1748
1749 CALL timestop(handle)
1750
1751 END SUBROUTINE derivatives_four_center
1752
1753! **************************************************************************************************
1754!> \brief ...
1755!> \param deriv ...
1756!> \param ra ...
1757!> \param rb ...
1758!> \param rc ...
1759!> \param rd ...
1760!> \param npgfa ...
1761!> \param npgfb ...
1762!> \param npgfc ...
1763!> \param npgfd ...
1764!> \param la_min ...
1765!> \param la_max ...
1766!> \param lb_min ...
1767!> \param lb_max ...
1768!> \param lc_min ...
1769!> \param lc_max ...
1770!> \param ld_min ...
1771!> \param ld_max ...
1772!> \param nsgfa ...
1773!> \param nsgfb ...
1774!> \param nsgfc ...
1775!> \param nsgfd ...
1776!> \param sphi_a_u1 ...
1777!> \param sphi_a_u2 ...
1778!> \param sphi_a_u3 ...
1779!> \param sphi_b_u1 ...
1780!> \param sphi_b_u2 ...
1781!> \param sphi_b_u3 ...
1782!> \param sphi_c_u1 ...
1783!> \param sphi_c_u2 ...
1784!> \param sphi_c_u3 ...
1785!> \param sphi_d_u1 ...
1786!> \param sphi_d_u2 ...
1787!> \param sphi_d_u3 ...
1788!> \param zeta ...
1789!> \param zetb ...
1790!> \param zetc ...
1791!> \param zetd ...
1792!> \param primitive_integrals ...
1793!> \param potential_parameter ...
1794!> \param neighbor_cells ...
1795!> \param screen1 ...
1796!> \param screen2 ...
1797!> \param eps_schwarz ...
1798!> \param max_contraction_val ...
1799!> \param cart_estimate ...
1800!> \param cell ...
1801!> \param neris_tmp ...
1802!> \param log10_pmax ...
1803!> \param log10_eps_schwarz ...
1804!> \param R1_pgf ...
1805!> \param R2_pgf ...
1806!> \param pgf1 ...
1807!> \param pgf2 ...
1808!> \param pgf_list_ij ...
1809!> \param pgf_list_kl ...
1810!> \param pgf_product_list ...
1811!> \param nsgfl_a ...
1812!> \param nsgfl_b ...
1813!> \param nsgfl_c ...
1814!> \param nsgfl_d ...
1815!> \param sphi_a_ext ...
1816!> \param sphi_b_ext ...
1817!> \param sphi_c_ext ...
1818!> \param sphi_d_ext ...
1819!> \param ede_work ...
1820!> \param ede_work2 ...
1821!> \param ede_work_forces ...
1822!> \param ede_buffer1 ...
1823!> \param ede_buffer2 ...
1824!> \param ede_primitives_tmp ...
1825!> \param nimages ...
1826!> \param do_periodic ...
1827!> \param use_virial ...
1828!> \param ede_work_virial ...
1829!> \param ede_work2_virial ...
1830!> \param ede_primitives_tmp_virial ...
1831!> \param primitive_integrals_virial ...
1832!> \param cart_estimate_virial ...
1833! **************************************************************************************************
1834 SUBROUTINE forces4(deriv, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
1835 la_min, la_max, lb_min, lb_max, &
1836 lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
1837 sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1838 sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1839 sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1840 sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1841 zeta, zetb, zetc, zetd, &
1842 primitive_integrals, &
1843 potential_parameter, neighbor_cells, &
1844 screen1, screen2, eps_schwarz, max_contraction_val, &
1845 cart_estimate, cell, neris_tmp, log10_pmax, &
1846 log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
1847 pgf_list_ij, pgf_list_kl, &
1848 pgf_product_list, &
1849 nsgfl_a, nsgfl_b, nsgfl_c, &
1850 nsgfl_d, &
1851 sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
1852 ede_work, ede_work2, ede_work_forces, &
1853 ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1854 nimages, do_periodic, use_virial, ede_work_virial, &
1855 ede_work2_virial, ede_primitives_tmp_virial, primitive_integrals_virial, &
1856 cart_estimate_virial)
1857
1858 TYPE(cp_libint_t) :: deriv
1859 REAL(dp), INTENT(IN) :: ra(3), rb(3), rc(3), rd(3)
1860 INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
1861 lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1862 sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
1863 sphi_d_u3
1864 REAL(dp), DIMENSION(1:npgfa), INTENT(IN) :: zeta
1865 REAL(dp), DIMENSION(1:npgfb), INTENT(IN) :: zetb
1866 REAL(dp), DIMENSION(1:npgfc), INTENT(IN) :: zetc
1867 REAL(dp), DIMENSION(1:npgfd), INTENT(IN) :: zetd
1868 REAL(dp), &
1869 DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12) :: primitive_integrals
1870 TYPE(hfx_potential_type) :: potential_parameter
1871 TYPE(hfx_cell_type), DIMENSION(:), POINTER :: neighbor_cells
1872 REAL(dp), INTENT(IN) :: screen1(2), screen2(2), eps_schwarz, &
1873 max_contraction_val
1874 REAL(dp) :: cart_estimate
1875 TYPE(cell_type), POINTER :: cell
1876 INTEGER(int_8) :: neris_tmp
1877 REAL(dp), INTENT(IN) :: log10_pmax, log10_eps_schwarz
1878 TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
1879 POINTER :: r1_pgf, r2_pgf, pgf1, pgf2
1880 TYPE(hfx_pgf_list), DIMENSION(*) :: pgf_list_ij, pgf_list_kl
1881 TYPE(hfx_pgf_product_list), ALLOCATABLE, &
1882 DIMENSION(:), INTENT(INOUT) :: pgf_product_list
1883 INTEGER, DIMENSION(0:), INTENT(IN) :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
1884 REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
1885 sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
1886 sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
1887 REAL(dp), DIMENSION(*) :: ede_work, ede_work2, ede_work_forces, &
1888 ede_buffer1, ede_buffer2, &
1889 ede_primitives_tmp
1890 INTEGER, DIMENSION(*) :: nimages
1891 LOGICAL, INTENT(IN) :: do_periodic, use_virial
1892 REAL(dp), DIMENSION(*) :: ede_work_virial, ede_work2_virial, &
1893 ede_primitives_tmp_virial
1894 REAL(dp), &
1895 DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12, 3) :: primitive_integrals_virial
1896 REAL(dp) :: cart_estimate_virial
1897
1898 INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
1899 ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
1900 nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
1901 REAL(dp) :: etainv, tmp_max, tmp_max_virial, zeta_a, &
1902 zeta_b, zeta_c, zeta_d, zetainv
1903
1904 CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
1905 pgf1, r1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
1906 nelements_ij, &
1907 neighbor_cells, nimages, do_periodic)
1908 CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
1909 pgf2, r2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
1910 nelements_kl, &
1911 neighbor_cells, nimages, do_periodic)
1912
1913 cart_estimate = 0.0_dp
1914 primitive_integrals = 0.0_dp
1915 IF (use_virial) THEN
1916 primitive_integrals_virial = 0.0_dp
1917 cart_estimate_virial = 0.0_dp
1918 END IF
1919 neris_tmp = 0_int_8
1920 max_l = la_max + lb_max + lc_max + ld_max + 1
1921 DO list_ij = 1, nelements_ij
1922 zeta_a = pgf_list_ij(list_ij)%zeta
1923 zeta_b = pgf_list_ij(list_ij)%zetb
1924 zetainv = pgf_list_ij(list_ij)%ZetaInv
1925 ipgf = pgf_list_ij(list_ij)%ipgf
1926 jpgf = pgf_list_ij(list_ij)%jpgf
1927
1928 DO list_kl = 1, nelements_kl
1929 zeta_c = pgf_list_kl(list_kl)%zeta
1930 zeta_d = pgf_list_kl(list_kl)%zetb
1931 etainv = pgf_list_kl(list_kl)%ZetaInv
1932 kpgf = pgf_list_kl(list_kl)%ipgf
1933 lpgf = pgf_list_kl(list_kl)%jpgf
1934
1935 CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
1936 nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
1937 potential_parameter, max_l, do_periodic)
1938 s_offset_a = 0
1939 DO la = la_min, la_max
1940 s_offset_b = 0
1941 ncoa = nco(la)
1942 nsgfla = nsgfl_a(la)
1943 nsoa = nso(la)
1944
1945 DO lb = lb_min, lb_max
1946 s_offset_c = 0
1947 ncob = nco(lb)
1948 nsgflb = nsgfl_b(lb)
1949 nsob = nso(lb)
1950
1951 DO lc = lc_min, lc_max
1952 s_offset_d = 0
1953 ncoc = nco(lc)
1954 nsgflc = nsgfl_c(lc)
1955 nsoc = nso(lc)
1956
1957 DO ld = ld_min, ld_max
1958 ncod = nco(ld)
1959 nsgfld = nsgfl_d(ld)
1960 nsod = nso(ld)
1961 tmp_max = 0.0_dp
1962 tmp_max_virial = 0.0_dp
1963 CALL evaluate_deriv_eri(deriv, nproducts, pgf_product_list, &
1964 la, lb, lc, ld, &
1965 ncoa, ncob, ncoc, ncod, &
1966 nsgfa, nsgfb, nsgfc, nsgfd, &
1967 primitive_integrals, &
1968 max_contraction_val, tmp_max, eps_schwarz, neris_tmp, &
1969 zeta_a, zeta_b, zeta_c, zeta_d, zetainv, etainv, &
1970 s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
1971 nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
1972 sphi_a_ext(1, la + 1, ipgf), &
1973 sphi_b_ext(1, lb + 1, jpgf), &
1974 sphi_c_ext(1, lc + 1, kpgf), &
1975 sphi_d_ext(1, ld + 1, lpgf), &
1976 ede_work, ede_work2, ede_work_forces, &
1977 ede_buffer1, ede_buffer2, ede_primitives_tmp, use_virial, &
1978 ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial, &
1979 primitive_integrals_virial, cell, tmp_max_virial)
1980 cart_estimate = max(tmp_max, cart_estimate)
1981 IF (use_virial) cart_estimate_virial = max(tmp_max_virial, cart_estimate_virial)
1982 s_offset_d = s_offset_d + nsod*nsgfld
1983 END DO !ld
1984 s_offset_c = s_offset_c + nsoc*nsgflc
1985 END DO !lc
1986 s_offset_b = s_offset_b + nsob*nsgflb
1987 END DO !lb
1988 s_offset_a = s_offset_a + nsoa*nsgfla
1989 END DO !la
1990 END DO
1991 END DO
1992
1993 END SUBROUTINE forces4
1994
1995! **************************************************************************************************
1996!> \brief Given a 2d index pair, this function returns a 1d index pair for
1997!> a symmetric upper triangle NxN matrix
1998!> The compiler should inline this function, therefore it appears in
1999!> several modules
2000!> \param i 2d index
2001!> \param j 2d index
2002!> \param N matrix size
2003!> \return ...
2004!> \par History
2005!> 03.2009 created [Manuel Guidon]
2006!> \author Manuel Guidon
2007! **************************************************************************************************
2008 PURE FUNCTION get_1d_idx(i, j, N)
2009 INTEGER, INTENT(IN) :: i, j
2010 INTEGER(int_8), INTENT(IN) :: n
2011 INTEGER(int_8) :: get_1d_idx
2012
2013 INTEGER(int_8) :: min_ij
2014
2015 min_ij = min(i, j)
2016 get_1d_idx = min_ij*n + max(i, j) - (min_ij - 1)*min_ij/2 - n
2017
2018 END FUNCTION get_1d_idx
2019
2020! **************************************************************************************************
2021!> \brief This routine prefetches density matrix elements, i.e. reshuffles the
2022!> data in a way that they can be accessed later on in a cache friendly
2023!> way
2024!> \param ma_max Size of matrix blocks
2025!> \param mb_max Size of matrix blocks
2026!> \param mc_max Size of matrix blocks
2027!> \param md_max Size of matrix blocks
2028!> \param density ...
2029!> \param pbd buffer that will contain P(b,d)
2030!> \param pbc buffer that will contain P(b,c)
2031!> \param pad buffer that will contain P(a,d)
2032!> \param pac buffer that will contain P(a,c)
2033!> \param iatom ...
2034!> \param jatom ...
2035!> \param katom ...
2036!> \param latom ...
2037!> \param iset ...
2038!> \param jset ...
2039!> \param kset ...
2040!> \param lset ...
2041!> \param offset_bd_set ...
2042!> \param offset_bc_set ...
2043!> \param offset_ad_set ...
2044!> \param offset_ac_set ...
2045!> \param atomic_offset_bd ...
2046!> \param atomic_offset_bc ...
2047!> \param atomic_offset_ad ...
2048!> \param atomic_offset_ac ...
2049!> \param anti_symmetric ...
2050!> \par History
2051!> 03.2009 created [Manuel Guidon]
2052!> \author Manuel Guidon
2053! **************************************************************************************************
2054 SUBROUTINE prefetch_density_matrix(ma_max, mb_max, mc_max, md_max, &
2055 density, pbd, pbc, pad, pac, &
2056 iatom, jatom, katom, latom, &
2057 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
2058 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
2059 atomic_offset_ad, atomic_offset_ac, anti_symmetric)
2060
2061 INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2062 REAL(dp), DIMENSION(:), INTENT(IN) :: density
2063 REAL(dp), DIMENSION(*), INTENT(INOUT) :: pbd, pbc, pad, pac
2064 INTEGER, INTENT(IN) :: iatom, jatom, katom, latom, iset, jset, &
2065 kset, lset
2066 INTEGER, DIMENSION(:, :), POINTER :: offset_bd_set, offset_bc_set, &
2067 offset_ad_set, offset_ac_set
2068 INTEGER, INTENT(IN) :: atomic_offset_bd, atomic_offset_bc, &
2069 atomic_offset_ad, atomic_offset_ac
2070 LOGICAL :: anti_symmetric
2071
2072 INTEGER :: i, j, ma, mb, mc, md, offset_ac, &
2073 offset_ad, offset_bc, offset_bd
2074 REAL(dp) :: fac
2075
2076 fac = 1.0_dp
2077 IF (anti_symmetric) fac = -1.0_dp
2078 IF (jatom >= latom) THEN
2079 i = 1
2080 offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2081 j = offset_bd
2082 DO md = 1, md_max
2083 DO mb = 1, mb_max
2084 pbd(i) = density(j)*fac
2085 i = i + 1
2086 j = j + 1
2087 END DO
2088 END DO
2089 ELSE
2090 i = 1
2091 offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2092 DO md = 1, md_max
2093 j = offset_bd + md - 1
2094 DO mb = 1, mb_max
2095 pbd(i) = density(j)
2096 i = i + 1
2097 j = j + md_max
2098 END DO
2099 END DO
2100 END IF
2101 IF (jatom >= katom) THEN
2102 i = 1
2103 offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2104 j = offset_bc
2105 DO mc = 1, mc_max
2106 DO mb = 1, mb_max
2107 pbc(i) = density(j)*fac
2108 i = i + 1
2109 j = j + 1
2110 END DO
2111 END DO
2112 ELSE
2113 i = 1
2114 offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2115 DO mc = 1, mc_max
2116 j = offset_bc + mc - 1
2117 DO mb = 1, mb_max
2118 pbc(i) = density(j)
2119 i = i + 1
2120 j = j + mc_max
2121 END DO
2122 END DO
2123 END IF
2124 IF (iatom >= latom) THEN
2125 i = 1
2126 offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2127 j = offset_ad
2128 DO md = 1, md_max
2129 DO ma = 1, ma_max
2130 pad(i) = density(j)*fac
2131 i = i + 1
2132 j = j + 1
2133 END DO
2134 END DO
2135 ELSE
2136 i = 1
2137 offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2138 DO md = 1, md_max
2139 j = offset_ad + md - 1
2140 DO ma = 1, ma_max
2141 pad(i) = density(j)
2142 i = i + 1
2143 j = j + md_max
2144 END DO
2145 END DO
2146 END IF
2147 IF (iatom >= katom) THEN
2148 i = 1
2149 offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2150 j = offset_ac
2151 DO mc = 1, mc_max
2152 DO ma = 1, ma_max
2153 pac(i) = density(j)*fac
2154 i = i + 1
2155 j = j + 1
2156 END DO
2157 END DO
2158 ELSE
2159 i = 1
2160 offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2161 DO mc = 1, mc_max
2162 j = offset_ac + mc - 1
2163 DO ma = 1, ma_max
2164 pac(i) = density(j)
2165 i = i + 1
2166 j = j + mc_max
2167 END DO
2168 END DO
2169 END IF
2170 END SUBROUTINE prefetch_density_matrix
2171
2172! **************************************************************************************************
2173!> \brief This routine updates the forces using buffered density matrices
2174!> \param ma_max Size of matrix blocks
2175!> \param mb_max Size of matrix blocks
2176!> \param mc_max Size of matrix blocks
2177!> \param md_max Size of matrix blocks
2178!> \param pbd buffer that will contain P(b,d)
2179!> \param pbc buffer that will contain P(b,c)
2180!> \param pad buffer that will contain P(a,d)
2181!> \param pac buffer that will contain P(a,c)
2182!> \param fac mulitplication factor (spin, symmetry)
2183!> \param prim primitive forces
2184!> \param force storage loacation for forces
2185!> \param forces_map index table
2186!> \param coord which of the 12 coords to be updated
2187!> \param pbd_resp ...
2188!> \param pbc_resp ...
2189!> \param pad_resp ...
2190!> \param pac_resp ...
2191!> \param resp_only ...
2192!> \par History
2193!> 03.2009 created [Manuel Guidon]
2194!> \author Manuel Guidon
2195! **************************************************************************************************
2196 SUBROUTINE update_forces(ma_max, mb_max, mc_max, md_max, &
2197 pbd, pbc, pad, pac, fac, &
2198 prim, force, forces_map, coord, &
2199 pbd_resp, pbc_resp, pad_resp, pac_resp, resp_only)
2200
2201 INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2202 REAL(dp), DIMENSION(*), INTENT(IN) :: pbd, pbc, pad, pac
2203 REAL(dp), INTENT(IN) :: fac
2204 REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2205 INTENT(IN) :: prim
2206 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
2207 INTEGER, INTENT(IN) :: forces_map(4, 2), coord
2208 REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL :: pbd_resp, pbc_resp, pad_resp, pac_resp
2209 LOGICAL, INTENT(IN), OPTIONAL :: resp_only
2210
2211 INTEGER :: ma, mb, mc, md, p_index
2212 LOGICAL :: full_force, with_resp_density
2213 REAL(dp) :: temp1, temp1_resp, temp3, temp3_resp, &
2214 temp4, teresp
2215
2216 with_resp_density = .false.
2217 IF (PRESENT(pbd_resp) .AND. &
2218 PRESENT(pbc_resp) .AND. &
2219 PRESENT(pad_resp) .AND. &
2220 PRESENT(pac_resp)) with_resp_density = .true.
2221
2222 IF (with_resp_density) THEN
2223 full_force = .true.
2224 IF (PRESENT(resp_only)) full_force = .NOT. resp_only
2225 p_index = 0
2226 temp4 = 0.0_dp
2227 DO md = 1, md_max
2228 DO mc = 1, mc_max
2229 DO mb = 1, mb_max
2230 temp1 = pbc((mc - 1)*mb_max + mb)*fac
2231 temp3 = pbd((md - 1)*mb_max + mb)*fac
2232 temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
2233 temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
2234 DO ma = 1, ma_max
2235 p_index = p_index + 1
2236 ! HF-SCF
2237 IF (full_force) THEN
2238 teresp = temp1*pad((md - 1)*ma_max + ma) + &
2239 temp3*pac((mc - 1)*ma_max + ma)
2240 ELSE
2241 teresp = 0.0_dp
2242 END IF
2243 ! RESP+HF
2244 teresp = teresp + &
2245 pac((mc - 1)*ma_max + ma)*temp3_resp + &
2246 pac_resp((mc - 1)*ma_max + ma)*temp3 + &
2247 pad((md - 1)*ma_max + ma)*temp1_resp + &
2248 pad_resp((md - 1)*ma_max + ma)*temp1
2249 temp4 = temp4 + teresp*prim(p_index)
2250 END DO !ma
2251 END DO !mb
2252 END DO !mc
2253 END DO !md
2254 ELSE
2255 p_index = 0
2256 temp4 = 0.0_dp
2257 DO md = 1, md_max
2258 DO mc = 1, mc_max
2259 DO mb = 1, mb_max
2260 temp1 = pbc((mc - 1)*mb_max + mb)*fac
2261 temp3 = pbd((md - 1)*mb_max + mb)*fac
2262 DO ma = 1, ma_max
2263 p_index = p_index + 1
2264 teresp = temp1*pad((md - 1)*ma_max + ma) + &
2265 temp3*pac((mc - 1)*ma_max + ma)
2266 temp4 = temp4 + teresp*prim(p_index)
2267 END DO !ma
2268 END DO !mb
2269 END DO !mc
2270 END DO !md
2271 END IF
2272
2273!$OMP ATOMIC
2274 force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(mod(coord - 1, 3) + 1, &
2275 forces_map((coord - 1)/3 + 1, 2)) = &
2276 force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(mod(coord - 1, 3) + 1, &
2277 forces_map((coord - 1)/3 + 1, 2)) - &
2278 temp4
2279
2280 END SUBROUTINE update_forces
2281
2282! **************************************************************************************************
2283!> \brief This routine updates the virial using buffered density matrices
2284!> \param ma_max Size of matrix blocks
2285!> \param mb_max Size of matrix blocks
2286!> \param mc_max Size of matrix blocks
2287!> \param md_max Size of matrix blocks
2288!> \param pbd buffer that will contain P(b,d)
2289!> \param pbc buffer that will contain P(b,c)
2290!> \param pad buffer that will contain P(a,d)
2291!> \param pac buffer that will contain P(a,c)
2292!> \param fac mulitplication factor (spin, symmetry)
2293!> \param prim primitive forces
2294!> \param tmp_virial ...
2295!> \param coord which of the 12 coords to be updated
2296!> \param l ...
2297!> \param pbd_resp ...
2298!> \param pbc_resp ...
2299!> \param pad_resp ...
2300!> \param pac_resp ...
2301!> \par History
2302!> 03.2009 created [Manuel Guidon]
2303!> \author Manuel Guidon
2304! **************************************************************************************************
2305 SUBROUTINE update_virial(ma_max, mb_max, mc_max, md_max, &
2306 pbd, pbc, pad, pac, fac, &
2307 prim, tmp_virial, coord, l, &
2308 pbd_resp, pbc_resp, pad_resp, pac_resp, resp_only)
2309
2310 INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2311 REAL(dp), DIMENSION(*), INTENT(IN) :: pbd, pbc, pad, pac
2312 REAL(dp), INTENT(IN) :: fac
2313 REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2314 INTENT(IN) :: prim
2315 REAL(dp) :: tmp_virial(3, 3)
2316 INTEGER, INTENT(IN) :: coord, l
2317 REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL :: pbd_resp, pbc_resp, pad_resp, pac_resp
2318 LOGICAL, INTENT(IN), OPTIONAL :: resp_only
2319
2320 INTEGER :: i, j, ma, mb, mc, md, p_index
2321 LOGICAL :: with_resp_density, full_force
2322 REAL(dp) :: temp1, temp1_resp, temp3, temp3_resp, &
2323 temp4, teresp
2324
2325 with_resp_density = .false.
2326 IF (PRESENT(pbd_resp) .AND. &
2327 PRESENT(pbc_resp) .AND. &
2328 PRESENT(pad_resp) .AND. &
2329 PRESENT(pac_resp)) with_resp_density = .true.
2330
2331 IF (with_resp_density) THEN
2332 full_force = .true.
2333 IF (PRESENT(resp_only)) full_force = .NOT. resp_only
2334 p_index = 0
2335 temp4 = 0.0_dp
2336 DO md = 1, md_max
2337 DO mc = 1, mc_max
2338 DO mb = 1, mb_max
2339 temp1 = pbc((mc - 1)*mb_max + mb)*fac
2340 temp3 = pbd((md - 1)*mb_max + mb)*fac
2341 temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
2342 temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
2343 DO ma = 1, ma_max
2344 p_index = p_index + 1
2345 ! HF-SCF
2346 IF (full_force) THEN
2347 teresp = temp1*pad((md - 1)*ma_max + ma) + &
2348 temp3*pac((mc - 1)*ma_max + ma)
2349 ELSE
2350 teresp = 0.0_dp
2351 END IF
2352 ! RESP+HF
2353 teresp = teresp + &
2354 pac((mc - 1)*ma_max + ma)*temp3_resp + &
2355 pac_resp((mc - 1)*ma_max + ma)*temp3 + &
2356 pad((md - 1)*ma_max + ma)*temp1_resp + &
2357 pad_resp((md - 1)*ma_max + ma)*temp1
2358 temp4 = temp4 + teresp*prim(p_index)
2359 END DO !ma
2360 END DO !mb
2361 END DO !mc
2362 END DO !md
2363 ELSE
2364 p_index = 0
2365 temp4 = 0.0_dp
2366 DO md = 1, md_max
2367 DO mc = 1, mc_max
2368 DO mb = 1, mb_max
2369 temp1 = pbc((mc - 1)*mb_max + mb)*fac
2370 temp3 = pbd((md - 1)*mb_max + mb)*fac
2371 DO ma = 1, ma_max
2372 p_index = p_index + 1
2373 teresp = temp1*pad((md - 1)*ma_max + ma) + &
2374 temp3*pac((mc - 1)*ma_max + ma)
2375 temp4 = temp4 + teresp*prim(p_index)
2376 END DO !ma
2377 END DO !mb
2378 END DO !mc
2379 END DO !md
2380 END IF
2381
2382 j = l
2383 i = mod(coord - 1, 3) + 1
2384 tmp_virial(i, j) = tmp_virial(i, j) - temp4
2385 END SUBROUTINE update_virial
2386
2387
2388! **************************************************************************************************
2389!> \brief This routine calculates the maximum density matrix element, when
2390!> screening on an initial density matrix is applied. Due to symmetry of
2391!> the ERI's, there are always 4 matrix elements to be considered.
2392!> CASE 0-15 belong to an energy calculation (linear screening)
2393!> CASE 16-31 belong to a force calculation (square screening)
2394!> \param ptr_p_1 Pointers to atomic density matrices
2395!> \param ptr_p_2 Pointers to atomic density matrices
2396!> \param ptr_p_3 Pointers to atomic density matrices
2397!> \param ptr_p_4 Pointers to atomic density matrices
2398!> \param iset Current set
2399!> \param jset Current set
2400!> \param kset Current set
2401!> \param lset Current set
2402!> \param pmax_val value to be calculated
2403!> \param swap_id Defines how the matrices are accessed
2404!> \par History
2405!> 06.2009 created [Manuel Guidon]
2406!> \author Manuel Guidon
2407! **************************************************************************************************
2408PURE SUBROUTINE get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, iset, jset, kset, lset, pmax_val, swap_id)
2409
2410 REAL(dp), DIMENSION(:, :), POINTER :: ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4
2411 INTEGER, INTENT(IN) :: iset, jset, kset, lset
2412
2413 REAL(dp), INTENT(OUT) :: pmax_val
2414 INTEGER, INTENT(IN) :: swap_id
2415
2416 REAL(dp) :: pmax_1, pmax_2, pmax_3, pmax_4
2417
2418 SELECT CASE (swap_id)
2419 CASE (0)
2420 pmax_1 = ptr_p_1(kset, iset)
2421 pmax_2 = ptr_p_2(lset, jset)
2422 pmax_3 = ptr_p_3(lset, iset)
2423 pmax_4 = ptr_p_4(kset, jset)
2424 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2425 CASE (1)
2426 pmax_1 = ptr_p_1(iset, kset)
2427 pmax_2 = ptr_p_2(lset, jset)
2428 pmax_3 = ptr_p_3(lset, iset)
2429 pmax_4 = ptr_p_4(kset, jset)
2430 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2431 CASE (2)
2432 pmax_1 = ptr_p_1(kset, iset)
2433 pmax_2 = ptr_p_2(jset, lset)
2434 pmax_3 = ptr_p_3(lset, iset)
2435 pmax_4 = ptr_p_4(kset, jset)
2436 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2437 CASE (3)
2438 pmax_1 = ptr_p_1(iset, kset)
2439 pmax_2 = ptr_p_2(jset, lset)
2440 pmax_3 = ptr_p_3(lset, iset)
2441 pmax_4 = ptr_p_4(kset, jset)
2442 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2443 CASE (4)
2444 pmax_1 = ptr_p_1(kset, iset)
2445 pmax_2 = ptr_p_2(lset, jset)
2446 pmax_3 = ptr_p_3(iset, lset)
2447 pmax_4 = ptr_p_4(kset, jset)
2448 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2449 CASE (5)
2450 pmax_1 = ptr_p_1(iset, kset)
2451 pmax_2 = ptr_p_2(lset, jset)
2452 pmax_3 = ptr_p_3(iset, lset)
2453 pmax_4 = ptr_p_4(kset, jset)
2454 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2455 CASE (6)
2456 pmax_1 = ptr_p_1(kset, iset)
2457 pmax_2 = ptr_p_2(jset, lset)
2458 pmax_3 = ptr_p_3(iset, lset)
2459 pmax_4 = ptr_p_4(kset, jset)
2460 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2461 CASE (7)
2462 pmax_1 = ptr_p_1(iset, kset)
2463 pmax_2 = ptr_p_2(jset, lset)
2464 pmax_3 = ptr_p_3(iset, lset)
2465 pmax_4 = ptr_p_4(kset, jset)
2466 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2467 CASE (8)
2468 pmax_1 = ptr_p_1(kset, iset)
2469 pmax_2 = ptr_p_2(lset, jset)
2470 pmax_3 = ptr_p_3(lset, iset)
2471 pmax_4 = ptr_p_4(jset, kset)
2472 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2473 CASE (9)
2474 pmax_1 = ptr_p_1(iset, kset)
2475 pmax_2 = ptr_p_2(lset, jset)
2476 pmax_3 = ptr_p_3(lset, iset)
2477 pmax_4 = ptr_p_4(jset, kset)
2478 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2479 CASE (10)
2480 pmax_1 = ptr_p_1(kset, iset)
2481 pmax_2 = ptr_p_2(jset, lset)
2482 pmax_3 = ptr_p_3(lset, iset)
2483 pmax_4 = ptr_p_4(jset, kset)
2484 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2485 CASE (11)
2486 pmax_1 = ptr_p_1(iset, kset)
2487 pmax_2 = ptr_p_2(jset, lset)
2488 pmax_3 = ptr_p_3(lset, iset)
2489 pmax_4 = ptr_p_4(jset, kset)
2490 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2491 CASE (12)
2492 pmax_1 = ptr_p_1(kset, iset)
2493 pmax_2 = ptr_p_2(lset, jset)
2494 pmax_3 = ptr_p_3(iset, lset)
2495 pmax_4 = ptr_p_4(jset, kset)
2496 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2497 CASE (13)
2498 pmax_1 = ptr_p_1(iset, kset)
2499 pmax_2 = ptr_p_2(lset, jset)
2500 pmax_3 = ptr_p_3(iset, lset)
2501 pmax_4 = ptr_p_4(jset, kset)
2502 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2503 CASE (14)
2504 pmax_1 = ptr_p_1(kset, iset)
2505 pmax_2 = ptr_p_2(jset, lset)
2506 pmax_3 = ptr_p_3(iset, lset)
2507 pmax_4 = ptr_p_4(jset, kset)
2508 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2509 CASE (15)
2510 pmax_1 = ptr_p_1(iset, kset)
2511 pmax_2 = ptr_p_2(jset, lset)
2512 pmax_3 = ptr_p_3(iset, lset)
2513 pmax_4 = ptr_p_4(jset, kset)
2514 pmax_val = max(pmax_1, pmax_2, pmax_3, pmax_4)
2515 CASE (16)
2516 pmax_1 = ptr_p_1(kset, iset)
2517 pmax_2 = ptr_p_2(lset, jset)
2518 pmax_3 = ptr_p_3(lset, iset)
2519 pmax_4 = ptr_p_4(kset, jset)
2520 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2521 CASE (17)
2522 pmax_1 = ptr_p_1(iset, kset)
2523 pmax_2 = ptr_p_2(lset, jset)
2524 pmax_3 = ptr_p_3(lset, iset)
2525 pmax_4 = ptr_p_4(kset, jset)
2526 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2527 CASE (18)
2528 pmax_1 = ptr_p_1(kset, iset)
2529 pmax_2 = ptr_p_2(jset, lset)
2530 pmax_3 = ptr_p_3(lset, iset)
2531 pmax_4 = ptr_p_4(kset, jset)
2532 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2533 CASE (19)
2534 pmax_1 = ptr_p_1(iset, kset)
2535 pmax_2 = ptr_p_2(jset, lset)
2536 pmax_3 = ptr_p_3(lset, iset)
2537 pmax_4 = ptr_p_4(kset, jset)
2538 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2539 CASE (20)
2540 pmax_1 = ptr_p_1(kset, iset)
2541 pmax_2 = ptr_p_2(lset, jset)
2542 pmax_3 = ptr_p_3(iset, lset)
2543 pmax_4 = ptr_p_4(kset, jset)
2544 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2545 CASE (21)
2546 pmax_1 = ptr_p_1(iset, kset)
2547 pmax_2 = ptr_p_2(lset, jset)
2548 pmax_3 = ptr_p_3(iset, lset)
2549 pmax_4 = ptr_p_4(kset, jset)
2550 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2551 CASE (22)
2552 pmax_1 = ptr_p_1(kset, iset)
2553 pmax_2 = ptr_p_2(jset, lset)
2554 pmax_3 = ptr_p_3(iset, lset)
2555 pmax_4 = ptr_p_4(kset, jset)
2556 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2557 CASE (23)
2558 pmax_1 = ptr_p_1(iset, kset)
2559 pmax_2 = ptr_p_2(jset, lset)
2560 pmax_3 = ptr_p_3(iset, lset)
2561 pmax_4 = ptr_p_4(kset, jset)
2562 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2563 CASE (24)
2564 pmax_1 = ptr_p_1(kset, iset)
2565 pmax_2 = ptr_p_2(lset, jset)
2566 pmax_3 = ptr_p_3(lset, iset)
2567 pmax_4 = ptr_p_4(jset, kset)
2568 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2569 CASE (25)
2570 pmax_1 = ptr_p_1(iset, kset)
2571 pmax_2 = ptr_p_2(lset, jset)
2572 pmax_3 = ptr_p_3(lset, iset)
2573 pmax_4 = ptr_p_4(jset, kset)
2574 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2575 CASE (26)
2576 pmax_1 = ptr_p_1(kset, iset)
2577 pmax_2 = ptr_p_2(jset, lset)
2578 pmax_3 = ptr_p_3(lset, iset)
2579 pmax_4 = ptr_p_4(jset, kset)
2580 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2581 CASE (27)
2582 pmax_1 = ptr_p_1(iset, kset)
2583 pmax_2 = ptr_p_2(jset, lset)
2584 pmax_3 = ptr_p_3(lset, iset)
2585 pmax_4 = ptr_p_4(jset, kset)
2586 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2587 CASE (28)
2588 pmax_1 = ptr_p_1(kset, iset)
2589 pmax_2 = ptr_p_2(lset, jset)
2590 pmax_3 = ptr_p_3(iset, lset)
2591 pmax_4 = ptr_p_4(jset, kset)
2592 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2593 CASE (29)
2594 pmax_1 = ptr_p_1(iset, kset)
2595 pmax_2 = ptr_p_2(lset, jset)
2596 pmax_3 = ptr_p_3(iset, lset)
2597 pmax_4 = ptr_p_4(jset, kset)
2598 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2599 CASE (30)
2600 pmax_1 = ptr_p_1(kset, iset)
2601 pmax_2 = ptr_p_2(jset, lset)
2602 pmax_3 = ptr_p_3(iset, lset)
2603 pmax_4 = ptr_p_4(jset, kset)
2604 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2605 CASE (31)
2606 pmax_1 = ptr_p_1(iset, kset)
2607 pmax_2 = ptr_p_2(jset, lset)
2608 pmax_3 = ptr_p_3(iset, lset)
2609 pmax_4 = ptr_p_4(jset, kset)
2610 pmax_val = max(pmax_1 + pmax_2, pmax_3 + pmax_4)
2611 END SELECT
2612
2613END SUBROUTINE get_pmax_val
2614
2615
2616END MODULE hfx_derivatives
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Types and set/get functions for auxiliary density matrix methods.
Definition admm_types.F:15
Define the atomic kind types and their sub types.
subroutine, public get_atomic_kind_set(atomic_kind_set, atom_of_kind, kind_of, natom_of_kind, maxatom, natom, nshell, fist_potential_present, shell_present, shell_adiabatic, shell_check_distance, damping_present)
Get attributes of an atomic kind set.
Handles all functions related to the CELL.
Definition cell_types.F:15
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
Utility routines to open and close files. Tracking of preconnections.
Definition cp_files.F:16
subroutine, public open_file(file_name, file_status, file_form, file_action, file_position, file_pad, unit_number, debug, skip_get_unit_number, file_access)
Opens the requested file using a free unit number.
Definition cp_files.F:308
subroutine, public close_file(unit_number, file_status, keep_preconnection)
Close an open file given by its logical unit number. Optionally, keep the file and unit preconnected.
Definition cp_files.F:119
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
integer, parameter, public cp_p_file
integer function, public cp_print_key_should_output(iteration_info, basis_section, print_key_path, used_print_key, first_time)
returns what should be done with the given property if btest(res,cp_p_store) then the property should...
Calculation of the incomplete Gamma function F_n(t) for multi-center integrals over Cartesian Gaussia...
Definition gamma.F:15
subroutine, public init_md_ftable(nmax)
Initialize a table of F_n(t) values in the range 0 <= t <= 12 with a stepsize of 0....
Definition gamma.F:540
Routines for data exchange between MPI processes.
subroutine, public get_full_density(para_env, full_density, rho, number_of_p_entries, block_offset, kind_of, basis_parameter, get_max_vals_spin, rho_beta, antisymmetric)
Collects full density matrix from all CPUs
routines and types for Hartree-Fock-Exchange
subroutine, public hfx_add_single_cache_element(value, nbits, cache, container, memory_usage, use_disk_storage, max_val_memory)
This routine adds an int_8 value to a cache. If the cache is full a compression routine is invoked an...
subroutine, public hfx_get_mult_cache_elements(values, nints, nbits, cache, container, eps_schwarz, pmax_entry, memory_usage, use_disk_storage)
This routine returns a bunch real values from a cache. If the cache is empty a decompression routine ...
subroutine, public hfx_flush_last_cache(nbits, cache, container, memory_usage, use_disk_storage)
This routine compresses the last probably not yet compressed cache into a container
subroutine, public hfx_get_single_cache_element(value, nbits, cache, container, memory_usage, use_disk_storage)
This routine returns an int_8 value from a cache. If the cache is empty a decompression routine is in...
subroutine, public hfx_decompress_first_cache(nbits, cache, container, memory_usage, use_disk_storage)
This routine decompresses the first bunch of data in a container and copies them into a cache
subroutine, public hfx_add_mult_cache_elements(values, nints, nbits, cache, container, eps_schwarz, pmax_entry, memory_usage, use_disk_storage)
This routine adds an a few real values to a cache. If the cache is full a compression routine is invo...
subroutine, public hfx_reset_cache_and_container(cache, container, memory_usage, do_disk_storage)
This routine resets the containers list pointer to the first element and moves the element counters o...
Routines to calculate derivatives with respect to basis function origin.
subroutine, public derivatives_four_center(qs_env, rho_ao, rho_ao_resp, hfx_section, para_env, irep, use_virial, adiabatic_rescale_factor, resp_only, external_x_data)
computes four center derivatives for a full basis set and updates the forcesfock_4c arrays....
Interface to the Libint-Library.
subroutine, public evaluate_deriv_eri(deriv, nproducts, pgf_product_list, n_a, n_b, n_c, n_d, ncoa, ncob, ncoc, ncod, nsgfa, nsgfb, nsgfc, nsgfd, primitives, max_contraction, tmp_max_all, eps_schwarz, neris, zeta_a, zeta_b, zeta_c, zeta_d, zetainv, etainv, s_offset_a, s_offset_b, s_offset_c, s_offset_d, nl_a, nl_b, nl_c, nl_d, nsoa, nsob, nsoc, nsod, sphi_a, sphi_b, sphi_c, sphi_d, work, work2, work_forces, buffer1, buffer2, primitives_tmp, use_virial, work_virial, work2_virial, primitives_tmp_virial, primitives_virial, cell, tmp_max_all_virial)
Evaluates derivatives of electron repulsion integrals for a primitive quartet.
Routines for optimizing load balance between processes in HFX calculations.
subroutine, public collect_load_balance_info(para_env, x_data, iw, n_threads, i_thread, eval_type)
...
subroutine, public hfx_load_balance(x_data, eps_schwarz, particle_set, max_set, para_env, coeffs_set, coeffs_kind, is_assoc_atomic_block_global, do_periodic, load_balance_parameter, kind_of, basis_parameter, pmax_set, pmax_atom, i_thread, n_threads, cell, do_p_screening, map_atom_to_kind_atom, nkind, eval_type, pmax_block, use_virial)
Distributes the computation of eri's to all available processes.
subroutine, public hfx_update_load_balance(x_data, para_env, load_balance_parameter, i_thread, n_threads, eval_type)
Cheap way of redistributing the eri's.
Routines for optimizing load balance between processes in HFX calculations.
subroutine, public build_pgf_product_list(list1, list2, product_list, nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, potential_parameter, m_max, do_periodic)
...
subroutine, public build_atomic_pair_list(natom, atomic_pair_list, kind_of, basis_parameter, particle_set, do_periodic, coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, blocks)
...
subroutine, public build_pair_list(natom, list, set_list, i_start, i_end, j_start, j_end, kind_of, basis_parameter, particle_set, do_periodic, coeffs_set, coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, atomic_pair_list)
...
subroutine, public build_pair_list_pgf(npgfa, npgfb, list, zeta, zetb, screen1, screen2, pgf, r_pgf, log10_pmax, log10_eps_schwarz, ra, rb, nelements, neighbor_cells, nimages, do_periodic)
...
integer, save, public pgf_product_list_size
Several screening methods used in HFX calcualtions.
subroutine, public update_pmax_mat(pmax_set, map_atom_to_kind_atom, set_offset, atomic_block_offset, pmax_atom, full_density_alpha, full_density_beta, natom, kind_of, basis_parameter, nkind, is_assoc_atomic_block)
updates the maximum of the density matrix in compressed form for screening purposes
Types and set/get functions for HFX.
Definition hfx_types.F:15
subroutine, public hfx_init_container(container, memory_usage, do_disk_storage)
This routine deletes all list entries in a container in order to deallocate the memory.
Definition hfx_types.F:2523
integer, save, public init_t_c_g0_lmax
Definition hfx_types.F:133
real(dp), parameter, public log_zero
Definition hfx_types.F:118
subroutine, public alloc_containers(data, bin_size)
...
Definition hfx_types.F:2906
subroutine, public dealloc_containers(data, memory_usage)
...
Definition hfx_types.F:2874
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public hfx_do_eval_forces
integer, parameter, public do_potential_truncated
integer, parameter, public do_potential_mix_cl_trunc
objects that represent the structure of input sections and the data contained in an input section
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
Interface to the Libint-Library or a c++ wrapper.
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:123
Definition of mathematical constants and functions.
real(kind=dp), dimension(0:maxfac), parameter, public fac
Interface to the message passing library MPI.
Provides Cartesian and spherical orbital pointers and indices.
integer, dimension(:), allocatable, public nco
integer, dimension(:), allocatable, public ncoset
integer, dimension(:), allocatable, public nso
Define the data structure for the particle information.
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, rhs)
Get the QUICKSTEP environment.
This module computes the basic integrals for the truncated coulomb operator.
Definition t_c_g0.F:57
subroutine, public init(nder, iunit, mepos, group)
...
Definition t_c_g0.F:1357
All kind of helpful little routines.
Definition util.F:14
stores some data used in wavefunction fitting
Definition admm_types.F:120
Provides all information about an atomic kind.
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
type of a logger, at the moment it contains just a print level starting at which level it should be l...
stores some data used in construction of Kohn-Sham matrix
Definition hfx_types.F:509
stores all the informations relevant to an mpi environment