115 irep, use_virial, adiabatic_rescale_factor, resp_only, &
116 external_x_data, nspins)
120 TYPE(
dbcsr_p_type),
DIMENSION(:),
POINTER :: rho_ao_resp
123 INTEGER,
INTENT(IN) :: irep
124 LOGICAL,
INTENT(IN) :: use_virial
125 REAL(
dp),
INTENT(IN),
OPTIONAL :: adiabatic_rescale_factor
126 LOGICAL,
INTENT(IN),
OPTIONAL :: resp_only
127 TYPE(
hfx_type),
DIMENSION(:, :),
POINTER,
OPTIONAL :: external_x_data
128 INTEGER,
INTENT(IN),
OPTIONAL :: nspins
130 CHARACTER(LEN=*),
PARAMETER :: routinen =
'derivatives_four_center'
132 INTEGER :: atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, bin, &
133 bits_max_val, buffer_left, buffer_size, buffer_start, cache_size, coord, current_counter, &
134 forces_map(4, 2), handle, handle_bin, handle_getp, handle_load, handle_main, i, i_atom, &
135 i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
136 i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_thread, iatom, iatom_block, &
137 iatom_end, iatom_start, ikind, iset, iw, j, j_atom, jatom, jatom_block, jatom_end, &
138 jatom_start, jkind, jset, k, k_atom, katom, katom_block, katom_end, katom_start
139 INTEGER :: kind_kind_idx, kkind, kset, l_atom, l_max, latom, latom_block, latom_end, &
140 latom_start, lkind, lset, max_am, max_pgf, max_set, my_bin_id, my_bin_size, my_thread_id, &
141 n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, nneighbors, nseta, &
142 nsetb, nsgf_max, my_nspins, sgfb, shm_task_counter, shm_total_bins, sphi_a_u1, sphi_a_u2, &
143 sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, &
144 sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
145 INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
146 mem_compression_counter, mem_eris, mem_max_val, my_current_counter, my_istart, &
147 n_processes, nblocks, ncpu, neris_incore, neris_onthefly, neris_tmp, neris_total, &
148 nprim_ints, shm_neris_incore, shm_neris_onthefly, shm_neris_total, shm_nprim_ints, &
149 shm_stor_count_max_val, shm_storage_counter_integrals, stor_count_max_val, &
150 storage_counter_integrals, tmp_block, tmp_i8(6)
151 INTEGER(int_8),
ALLOCATABLE,
DIMENSION(:) :: tmp_task_list_cost
152 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: atom_of_kind, kind_of, last_sgf_global, &
154 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
155 ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
156 INTEGER,
DIMENSION(:, :),
POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
157 offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
158 INTEGER,
DIMENSION(:, :),
POINTER,
SAVE :: shm_is_assoc_atomic_block
159 INTEGER,
DIMENSION(:, :, :, :),
POINTER :: shm_set_offset
160 INTEGER,
SAVE :: shm_number_of_p_entries
161 LOGICAL :: bins_left, buffer_overflow, do_dynamic_load_balancing, do_it, do_periodic, &
162 do_print_load_balance_info, is_anti_symmetric, my_resp_only, screen_pmat_forces, &
163 treat_forces_in_core, use_disk_storage, with_resp_density
164 LOGICAL,
DIMENSION(:, :),
POINTER :: shm_atomic_pair_list
165 REAL(
dp) :: bintime_start, bintime_stop, cartesian_estimate, cartesian_estimate_virial, &
166 compression_factor, eps_schwarz, eps_storage,
fac, hf_fraction, ln_10, log10_eps_schwarz, &
167 log10_pmax, log_2, max_contraction_val, max_val1, max_val2, max_val2_set, &
168 my_adiabatic_rescale_factor, pmax_atom, pmax_blocks, pmax_entry, pmax_tmp, ra(3), rab2, &
169 rb(3), rc(3), rcd2, rd(3), spherical_estimate, spherical_estimate_virial, symm_fac, &
171 REAL(
dp),
ALLOCATABLE,
DIMENSION(:) :: ede_buffer1, ede_buffer2, ede_primitives_tmp, &
172 ede_primitives_tmp_virial, ede_work, ede_work2, ede_work2_virial, ede_work_forces, &
173 ede_work_virial, pac_buf, pac_buf_beta, pac_buf_resp, pac_buf_resp_beta, pad_buf, &
174 pad_buf_beta, pad_buf_resp, pad_buf_resp_beta, pbc_buf, pbc_buf_beta, pbc_buf_resp, &
175 pbc_buf_resp_beta, pbd_buf, pbd_buf_beta, pbd_buf_resp, pbd_buf_resp_beta
176 REAL(
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: primitive_forces, primitive_forces_virial
177 REAL(
dp),
DIMENSION(:),
POINTER :: full_density_resp, &
178 full_density_resp_beta, t2
179 REAL(
dp),
DIMENSION(:, :),
POINTER :: full_density_alpha, full_density_beta, &
180 max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, shm_pmax_block, &
181 sphi_b, zeta, zetb, zetc, zetd
182 REAL(
dp),
DIMENSION(:, :, :),
POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
183 sphi_c_ext_set, sphi_d_ext_set
184 REAL(
dp),
DIMENSION(:, :, :, :),
POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
186 REAL(kind=
dp) :: coeffs_kind_max0
203 TYPE(
hfx_p_kind),
DIMENSION(:),
POINTER :: shm_initial_p
204 TYPE(
hfx_pgf_list),
ALLOCATABLE,
DIMENSION(:) :: pgf_list_ij, pgf_list_kl
206 DIMENSION(:) :: pgf_product_list
209 POINTER :: screen_coeffs_kind, tmp_r_1, tmp_r_2, &
210 tmp_screen_pgf1, tmp_screen_pgf2
212 DIMENSION(:, :, :, :),
POINTER :: screen_coeffs_set
214 DIMENSION(:, :, :, :, :, :),
POINTER :: radii_pgf, screen_coeffs_pgf
217 TYPE(
hfx_type),
POINTER :: actual_x_data
218 TYPE(
hfx_type),
DIMENSION(:, :),
POINTER :: my_x_data
221 DIMENSION(:) :: set_list_ij, set_list_kl
226 NULLIFY (dft_control, admm_env)
228 with_resp_density = .false.
229 IF (
ASSOCIATED(rho_ao_resp)) with_resp_density = .true.
231 my_resp_only = .false.
232 IF (
PRESENT(resp_only)) my_resp_only = resp_only
238 particle_set=particle_set, &
239 atomic_kind_set=atomic_kind_set, &
241 dft_control=dft_control)
243 IF (
PRESENT(nspins))
THEN
246 my_nspins = dft_control%nspins
248 nkimages = dft_control%nimages
249 cpassert(nkimages == 1)
251 my_x_data => qs_env%x_data
252 IF (
PRESENT(external_x_data)) my_x_data => external_x_data
255 IF (
SIZE(particle_set, 1) == 1)
THEN
256 IF (.NOT. use_virial)
RETURN
259 CALL timeset(routinen, handle)
261 nkind =
SIZE(atomic_kind_set, 1)
264 l_max = max(l_max, maxval(my_x_data(1, 1)%basis_parameter(ikind)%lmax))
272 CALL open_file(unit_number=unit_id, file_name=my_x_data(1, 1)%potential_parameter%filename)
273 CALL init(l_max, unit_id, para_env%mepos, para_env)
284 shm_neris_onthefly = 0
285 shm_storage_counter_integrals = 0
287 shm_stor_count_max_val = 0
290 CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
292 my_adiabatic_rescale_factor = 1.0_dp
293 IF (
PRESENT(adiabatic_rescale_factor))
THEN
294 my_adiabatic_rescale_factor = adiabatic_rescale_factor
385 log_2 = log10(2.0_dp)
386 actual_x_data => my_x_data(irep, i_thread + 1)
387 do_periodic = actual_x_data%periodic_parameter%do_periodic
389 screening_parameter = actual_x_data%screening_parameter
390 general_parameter = actual_x_data%general_parameter
391 potential_parameter = actual_x_data%potential_parameter
392 basis_info => actual_x_data%basis_info
394 load_balance_parameter => actual_x_data%load_balance_parameter
395 basis_parameter => actual_x_data%basis_parameter
405 memory_parameter => actual_x_data%memory_parameter
407 cache_size = memory_parameter%cache_size
408 bits_max_val = memory_parameter%bits_max_val
410 use_disk_storage = .false.
413 atomic_kind_set=atomic_kind_set, &
414 particle_set=particle_set)
416 max_set = basis_info%max_set
417 max_am = basis_info%max_am
418 natom =
SIZE(particle_set, 1)
420 treat_forces_in_core = memory_parameter%treat_forces_in_core
422 hf_fraction = general_parameter%fraction
423 hf_fraction = hf_fraction*my_adiabatic_rescale_factor
424 eps_schwarz = screening_parameter%eps_schwarz_forces
425 IF (eps_schwarz < 0.0_dp)
THEN
429 IF (use_virial) eps_schwarz = eps_schwarz/actual_x_data%periodic_parameter%R_max_stress
430 log10_eps_schwarz = log10(eps_schwarz)
432 screen_pmat_forces = screening_parameter%do_p_screening_forces
434 IF (with_resp_density) screen_pmat_forces = .false.
436 eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
441 private_deriv = actual_x_data%lib_deriv
446 atom_of_kind=atom_of_kind, &
450 ALLOCATE (last_sgf_global(0:natom))
451 last_sgf_global(0) = 0
453 ikind = kind_of(iatom)
454 last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
457 ALLOCATE (max_contraction(max_set, natom))
459 max_contraction = 0.0_dp
462 jkind = kind_of(jatom)
463 lb_max => basis_parameter(jkind)%lmax
464 npgfb => basis_parameter(jkind)%npgf
465 nsetb = basis_parameter(jkind)%nset
466 first_sgfb => basis_parameter(jkind)%first_sgf
467 sphi_b => basis_parameter(jkind)%sphi
468 nsgfb => basis_parameter(jkind)%nsgf
471 ncob = npgfb(jset)*
ncoset(lb_max(jset))
472 sgfb = first_sgfb(1, jset)
475 max_contraction(jset, jatom) = maxval((/(sum(abs(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
476 max_pgf = max(max_pgf, npgfb(jset))
480 ncpu = para_env%num_pe
481 n_processes = ncpu*n_threads
484 neris_total = 0_int_8
485 neris_incore = 0_int_8
486 neris_onthefly = 0_int_8
488 mem_max_val = 0_int_8
489 compression_factor = 0.0_dp
492 max_val_memory = 1_int_8
496 shm_is_assoc_atomic_block => actual_x_data%is_assoc_atomic_block
497 shm_number_of_p_entries = actual_x_data%number_of_p_entries
498 shm_atomic_block_offset => actual_x_data%atomic_block_offset
499 shm_set_offset => actual_x_data%set_offset
500 shm_block_offset => actual_x_data%block_offset
502 NULLIFY (full_density_alpha)
503 NULLIFY (full_density_beta)
504 ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
507 CALL timeset(routinen//
"_getP", handle_getp)
508 CALL get_full_density(para_env, full_density_alpha(:, 1), rho_ao(1, 1)%matrix, shm_number_of_p_entries, &
510 kind_of, basis_parameter, get_max_vals_spin=.false., &
511 antisymmetric=is_anti_symmetric)
513 IF (with_resp_density)
THEN
514 NULLIFY (full_density_resp)
515 ALLOCATE (full_density_resp(shm_block_offset(ncpu + 1)))
516 CALL get_full_density(para_env, full_density_resp, rho_ao_resp(1)%matrix, shm_number_of_p_entries, &
518 kind_of, basis_parameter, get_max_vals_spin=.false., &
519 antisymmetric=is_anti_symmetric)
522 IF (my_nspins == 2)
THEN
523 ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), 1))
524 CALL get_full_density(para_env, full_density_beta(:, 1), rho_ao(2, 1)%matrix, shm_number_of_p_entries, &
526 kind_of, basis_parameter, get_max_vals_spin=.false., &
527 antisymmetric=is_anti_symmetric)
529 IF (with_resp_density)
THEN
530 NULLIFY (full_density_resp_beta)
531 ALLOCATE (full_density_resp_beta(shm_block_offset(ncpu + 1)))
532 CALL get_full_density(para_env, full_density_resp_beta, rho_ao_resp(2)%matrix, shm_number_of_p_entries, &
534 kind_of, basis_parameter, get_max_vals_spin=.false., &
535 antisymmetric=is_anti_symmetric)
539 CALL timestop(handle_getp)
543 IF (screen_pmat_forces)
THEN
544 NULLIFY (shm_initial_p)
545 shm_initial_p => actual_x_data%initial_p_forces
546 shm_pmax_atom => actual_x_data%pmax_atom_forces
547 IF (memory_parameter%recalc_forces)
THEN
549 actual_x_data%map_atom_to_kind_atom, &
550 actual_x_data%set_offset, &
551 actual_x_data%atomic_block_offset, &
553 full_density_alpha, full_density_beta, &
554 natom, kind_of, basis_parameter, &
555 nkind, actual_x_data%is_assoc_atomic_block)
561 IF (with_resp_density .AND. .NOT. my_resp_only)
THEN
562 full_density_alpha(:, 1) = full_density_alpha(:, 1) - full_density_resp
563 IF (my_nspins == 2)
THEN
564 full_density_beta(:, 1) = &
565 full_density_beta(:, 1) - full_density_resp_beta
570 screen_coeffs_set => actual_x_data%screen_funct_coeffs_set
571 screen_coeffs_kind => actual_x_data%screen_funct_coeffs_kind
572 screen_coeffs_pgf => actual_x_data%screen_funct_coeffs_pgf
573 radii_pgf => actual_x_data%pair_dist_radii_pgf
574 shm_pmax_block => actual_x_data%pmax_block
580 CALL timeset(routinen//
"_load", handle_load)
583 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
584 IF (actual_x_data%b_first_load_balance_forces)
THEN
585 CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
586 screen_coeffs_set, screen_coeffs_kind, &
587 shm_is_assoc_atomic_block, do_periodic, &
588 load_balance_parameter, kind_of, basis_parameter, shm_initial_p, &
589 shm_pmax_atom, i_thread, n_threads, &
590 cell, screen_pmat_forces, actual_x_data%map_atom_to_kind_atom, &
592 actual_x_data%b_first_load_balance_forces = .false.
595 load_balance_parameter, &
600 CALL timestop(handle_load)
607 ikind = kind_of(iatom)
608 nseta = basis_parameter(ikind)%nset
609 npgfa => basis_parameter(ikind)%npgf
610 la_max => basis_parameter(ikind)%lmax
611 nsgfa => basis_parameter(ikind)%nsgf
613 ncos_max = max(ncos_max,
ncoset(la_max(iset)))
614 nsgf_max = max(nsgf_max, nsgfa(iset))
619 ALLOCATE (primitive_forces(12*nsgf_max**4))
620 primitive_forces = 0.0_dp
623 nneighbors =
SIZE(actual_x_data%neighbor_cells)
624 ALLOCATE (pgf_list_ij(max_pgf**2))
625 ALLOCATE (pgf_list_kl(max_pgf**2))
628 ALLOCATE (pgf_product_list(tmp_i4))
629 ALLOCATE (nimages(max_pgf**2))
632 ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
633 ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
636 ALLOCATE (pbd_buf(nsgf_max**2))
637 ALLOCATE (pbc_buf(nsgf_max**2))
638 ALLOCATE (pad_buf(nsgf_max**2))
639 ALLOCATE (pac_buf(nsgf_max**2))
640 IF (with_resp_density)
THEN
641 ALLOCATE (pbd_buf_resp(nsgf_max**2))
642 ALLOCATE (pbc_buf_resp(nsgf_max**2))
643 ALLOCATE (pad_buf_resp(nsgf_max**2))
644 ALLOCATE (pac_buf_resp(nsgf_max**2))
646 IF (my_nspins == 2)
THEN
647 ALLOCATE (pbd_buf_beta(nsgf_max**2))
648 ALLOCATE (pbc_buf_beta(nsgf_max**2))
649 ALLOCATE (pad_buf_beta(nsgf_max**2))
650 ALLOCATE (pac_buf_beta(nsgf_max**2))
651 IF (with_resp_density)
THEN
652 ALLOCATE (pbd_buf_resp_beta(nsgf_max**2))
653 ALLOCATE (pbc_buf_resp_beta(nsgf_max**2))
654 ALLOCATE (pad_buf_resp_beta(nsgf_max**2))
655 ALLOCATE (pac_buf_resp_beta(nsgf_max**2))
659 ALLOCATE (ede_work(ncos_max**4*12))
660 ALLOCATE (ede_work2(ncos_max**4*12))
661 ALLOCATE (ede_work_forces(ncos_max**4*12))
662 ALLOCATE (ede_buffer1(ncos_max**4))
663 ALLOCATE (ede_buffer2(ncos_max**4))
664 ALLOCATE (ede_primitives_tmp(nsgf_max**4))
667 ALLOCATE (primitive_forces_virial(12*nsgf_max**4*3))
668 primitive_forces_virial = 0.0_dp
669 ALLOCATE (ede_work_virial(ncos_max**4*12*3))
670 ALLOCATE (ede_work2_virial(ncos_max**4*12*3))
671 ALLOCATE (ede_primitives_tmp_virial(nsgf_max**4))
675 ALLOCATE (primitive_forces_virial(1))
676 primitive_forces_virial = 0.0_dp
677 ALLOCATE (ede_work_virial(1))
678 ALLOCATE (ede_work2_virial(1))
679 ALLOCATE (ede_primitives_tmp_virial(1))
724 CALL timeset(routinen//
"_main", handle_main)
728 coeffs_kind_max0 = maxval(screen_coeffs_kind(:, :)%x(2))
729 ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
730 ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
734 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
735 actual_x_data%distribution_forces%ram_counter = huge(distribution_forces%ram_counter)
736 memory_parameter%ram_counter_forces = huge(memory_parameter%ram_counter_forces)
739 IF (treat_forces_in_core)
THEN
740 buffer_overflow = .false.
742 buffer_overflow = .true.
745 do_dynamic_load_balancing = .true.
746 IF (n_threads == 1) do_dynamic_load_balancing = .false.
748 IF (do_dynamic_load_balancing)
THEN
749 my_bin_size =
SIZE(actual_x_data%distribution_forces)
754 IF (treat_forces_in_core)
THEN
756 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
757 CALL dealloc_containers(actual_x_data%store_forces, memory_parameter%actual_memory_usage)
760 DO bin = 1, my_bin_size
761 maxval_container => actual_x_data%store_forces%maxval_container(bin)
762 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
763 CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .false.)
765 CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .false.)
771 IF (.NOT. actual_x_data%memory_parameter%recalc_forces)
THEN
772 DO bin = 1, my_bin_size
773 maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
774 maxval_container => actual_x_data%store_forces%maxval_container(bin)
775 integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
776 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
778 memory_parameter%actual_memory_usage, .false.)
781 memory_parameter%actual_memory_usage, .false.)
789 IF (do_dynamic_load_balancing)
THEN
793 shm_total_bins = shm_total_bins +
SIZE(my_x_data(irep, i)%distribution_forces)
795 ALLOCATE (tmp_task_list(shm_total_bins))
798 DO bin = 1,
SIZE(my_x_data(irep, i)%distribution_forces)
799 shm_task_counter = shm_task_counter + 1
800 tmp_task_list(shm_task_counter)%thread_id = i
801 tmp_task_list(shm_task_counter)%bin_id = bin
802 tmp_task_list(shm_task_counter)%cost = my_x_data(irep, i)%distribution_forces(bin)%cost
807 ALLOCATE (tmp_task_list_cost(shm_total_bins))
808 ALLOCATE (tmp_index(shm_total_bins))
810 DO i = 1, shm_total_bins
811 tmp_task_list_cost(i) = tmp_task_list(i)%cost
814 CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
816 ALLOCATE (actual_x_data%task_list(shm_total_bins))
818 DO i = 1, shm_total_bins
819 actual_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
822 shm_task_list => actual_x_data%task_list
825 DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
829 shm_pmax_block => actual_x_data%pmax_block
830 actual_x_data%pmax_block = 0.0_dp
831 IF (screen_pmat_forces)
THEN
832 DO iatom_block = 1,
SIZE(actual_x_data%blocks)
833 iatom_start = actual_x_data%blocks(iatom_block)%istart
834 iatom_end = actual_x_data%blocks(iatom_block)%iend
835 DO jatom_block = 1,
SIZE(actual_x_data%blocks)
836 jatom_start = actual_x_data%blocks(jatom_block)%istart
837 jatom_end = actual_x_data%blocks(jatom_block)%iend
838 shm_pmax_block(iatom_block, jatom_block) = maxval(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
842 shm_atomic_pair_list => actual_x_data%atomic_pair_list_forces
843 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
845 do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
846 actual_x_data%blocks)
849 my_bin_size =
SIZE(actual_x_data%distribution_forces)
850 DO bin = 1, my_bin_size
851 actual_x_data%distribution_forces(bin)%time_forces = 0.0_dp
856 IF (treat_forces_in_core)
THEN
857 IF (my_bin_size > 0)
THEN
858 maxval_container => actual_x_data%store_forces%maxval_container(1)
859 maxval_cache => actual_x_data%store_forces%maxval_cache(1)
861 integral_containers => actual_x_data%store_forces%integral_containers(:, 1)
862 integral_caches => actual_x_data%store_forces%integral_caches(:, 1)
866 nblocks = load_balance_parameter%nblocks
872 IF (.NOT. do_dynamic_load_balancing)
THEN
874 IF (bin > my_bin_size)
THEN
880 distribution_forces => actual_x_data%distribution_forces(bin)
884 shm_task_counter = shm_task_counter + 1
885 IF (shm_task_counter <= shm_total_bins)
THEN
886 my_thread_id = shm_task_list(shm_task_counter)%thread_id
887 my_bin_id = shm_task_list(shm_task_counter)%bin_id
888 IF (treat_forces_in_core)
THEN
889 maxval_cache => my_x_data(irep, my_thread_id)%store_forces%maxval_cache(my_bin_id)
890 maxval_container => my_x_data(irep, my_thread_id)%store_forces%maxval_container(my_bin_id)
891 integral_caches => my_x_data(irep, my_thread_id)%store_forces%integral_caches(:, my_bin_id)
892 integral_containers => my_x_data(irep, my_thread_id)%store_forces%integral_containers(:, my_bin_id)
894 distribution_forces => my_x_data(irep, my_thread_id)%distribution_forces(my_bin_id)
897 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
898 distribution_forces%ram_counter = huge(distribution_forces%ram_counter)
908 IF (.NOT. do_it) cycle
910 CALL timeset(routinen//
"_bin", handle_bin)
913 my_istart = distribution_forces%istart
914 my_current_counter = 0
915 IF (distribution_forces%number_of_atom_quartets == 0 .OR. &
916 my_istart == -1_int_8) my_istart = nblocks**4
917 atomic_blocks:
DO atom_block = my_istart, nblocks**4 - 1, n_processes
918 latom_block = int(
modulo(atom_block, nblocks)) + 1
919 tmp_block = atom_block/nblocks
920 katom_block = int(
modulo(tmp_block, nblocks)) + 1
921 IF (latom_block < katom_block) cycle
922 tmp_block = tmp_block/nblocks
923 jatom_block = int(
modulo(tmp_block, nblocks)) + 1
924 tmp_block = tmp_block/nblocks
925 iatom_block = int(
modulo(tmp_block, nblocks)) + 1
926 IF (jatom_block < iatom_block) cycle
927 my_current_counter = my_current_counter + 1
928 IF (my_current_counter > distribution_forces%number_of_atom_quartets)
EXIT atomic_blocks
930 iatom_start = actual_x_data%blocks(iatom_block)%istart
931 iatom_end = actual_x_data%blocks(iatom_block)%iend
932 jatom_start = actual_x_data%blocks(jatom_block)%istart
933 jatom_end = actual_x_data%blocks(jatom_block)%iend
934 katom_start = actual_x_data%blocks(katom_block)%istart
935 katom_end = actual_x_data%blocks(katom_block)%iend
936 latom_start = actual_x_data%blocks(latom_block)%istart
937 latom_end = actual_x_data%blocks(latom_block)%iend
939 pmax_blocks = max(shm_pmax_block(katom_block, iatom_block) + &
940 shm_pmax_block(latom_block, jatom_block), &
941 shm_pmax_block(latom_block, iatom_block) + &
942 shm_pmax_block(katom_block, jatom_block))
944 IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) cycle
946 CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
947 jatom_start, jatom_end, &
948 kind_of, basis_parameter, particle_set, &
949 do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
950 log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
952 CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
953 latom_start, latom_end, &
954 kind_of, basis_parameter, particle_set, &
955 do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
956 log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
958 DO i_list_ij = 1, list_ij%n_element
959 iatom = list_ij%elements(i_list_ij)%pair(1)
960 jatom = list_ij%elements(i_list_ij)%pair(2)
961 i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
962 i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
963 ikind = list_ij%elements(i_list_ij)%kind_pair(1)
964 jkind = list_ij%elements(i_list_ij)%kind_pair(2)
965 ra = list_ij%elements(i_list_ij)%r1
966 rb = list_ij%elements(i_list_ij)%r2
967 rab2 = list_ij%elements(i_list_ij)%dist2
969 la_max => basis_parameter(ikind)%lmax
970 la_min => basis_parameter(ikind)%lmin
971 npgfa => basis_parameter(ikind)%npgf
972 nseta = basis_parameter(ikind)%nset
973 zeta => basis_parameter(ikind)%zet
974 nsgfa => basis_parameter(ikind)%nsgf
975 sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
976 nsgfl_a => basis_parameter(ikind)%nsgfl
977 sphi_a_u1 = ubound(sphi_a_ext, 1)
978 sphi_a_u2 = ubound(sphi_a_ext, 2)
979 sphi_a_u3 = ubound(sphi_a_ext, 3)
981 lb_max => basis_parameter(jkind)%lmax
982 lb_min => basis_parameter(jkind)%lmin
983 npgfb => basis_parameter(jkind)%npgf
984 nsetb = basis_parameter(jkind)%nset
985 zetb => basis_parameter(jkind)%zet
986 nsgfb => basis_parameter(jkind)%nsgf
987 sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
988 nsgfl_b => basis_parameter(jkind)%nsgfl
989 sphi_b_u1 = ubound(sphi_b_ext, 1)
990 sphi_b_u2 = ubound(sphi_b_ext, 2)
991 sphi_b_u3 = ubound(sphi_b_ext, 3)
993 i_atom = atom_of_kind(iatom)
994 forces_map(1, 1) = ikind
995 forces_map(1, 2) = i_atom
996 j_atom = atom_of_kind(jatom)
997 forces_map(2, 1) = jkind
998 forces_map(2, 2) = j_atom
1000 DO i_list_kl = 1, list_kl%n_element
1001 katom = list_kl%elements(i_list_kl)%pair(1)
1002 latom = list_kl%elements(i_list_kl)%pair(2)
1006 IF (.NOT. (katom + latom <= iatom + jatom)) cycle
1007 IF (((iatom + jatom) .EQ. (katom + latom)) .AND. (katom < iatom)) cycle
1009 i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1010 i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1011 kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1012 lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1013 rc = list_kl%elements(i_list_kl)%r1
1014 rd = list_kl%elements(i_list_kl)%r2
1015 rcd2 = list_kl%elements(i_list_kl)%dist2
1017 IF (screen_pmat_forces)
THEN
1018 pmax_atom = max(shm_pmax_atom(katom, iatom) + &
1019 shm_pmax_atom(latom, jatom), &
1020 shm_pmax_atom(latom, iatom) + &
1021 shm_pmax_atom(katom, jatom))
1026 IF ((screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1027 screen_coeffs_kind(jkind, ikind)%x(2)) + &
1028 (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1029 screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) cycle
1031 IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1032 shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1033 shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1034 shm_is_assoc_atomic_block(latom, jatom) >= 1)) cycle
1035 k_atom = atom_of_kind(katom)
1036 forces_map(3, 1) = kkind
1037 forces_map(3, 2) = k_atom
1039 l_atom = atom_of_kind(latom)
1040 forces_map(4, 1) = lkind
1041 forces_map(4, 2) = l_atom
1043 IF (my_nspins == 1)
THEN
1044 fac = 0.25_dp*hf_fraction
1046 fac = 0.5_dp*hf_fraction
1050 IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1051 IF (katom == latom) symm_fac = symm_fac*2.0_dp
1052 IF (iatom == katom .AND. jatom == latom .AND. &
1053 iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1054 IF (iatom == katom .AND. iatom == jatom .AND. &
1055 katom == latom) symm_fac = symm_fac*2.0_dp
1057 symm_fac = 1.0_dp/symm_fac
1060 lc_max => basis_parameter(kkind)%lmax
1061 lc_min => basis_parameter(kkind)%lmin
1062 npgfc => basis_parameter(kkind)%npgf
1063 zetc => basis_parameter(kkind)%zet
1064 nsgfc => basis_parameter(kkind)%nsgf
1065 sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1066 nsgfl_c => basis_parameter(kkind)%nsgfl
1067 sphi_c_u1 = ubound(sphi_c_ext, 1)
1068 sphi_c_u2 = ubound(sphi_c_ext, 2)
1069 sphi_c_u3 = ubound(sphi_c_ext, 3)
1071 ld_max => basis_parameter(lkind)%lmax
1072 ld_min => basis_parameter(lkind)%lmin
1073 npgfd => basis_parameter(lkind)%npgf
1074 zetd => basis_parameter(lkind)%zet
1075 nsgfd => basis_parameter(lkind)%nsgf
1076 sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1077 nsgfl_d => basis_parameter(lkind)%nsgfl
1078 sphi_d_u1 = ubound(sphi_d_ext, 1)
1079 sphi_d_u2 = ubound(sphi_d_ext, 2)
1080 sphi_d_u3 = ubound(sphi_d_ext, 3)
1082 atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1083 atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1084 atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1085 atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1087 IF (jatom < latom)
THEN
1088 offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1090 offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1092 IF (jatom < katom)
THEN
1093 offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1095 offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1097 IF (iatom < latom)
THEN
1098 offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1100 offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1102 IF (iatom < katom)
THEN
1103 offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1105 offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1108 IF (screen_pmat_forces)
THEN
1110 kind_kind_idx = int(get_1d_idx(kkind, ikind, int(nkind,
int_8)))
1111 IF (ikind >= kkind)
THEN
1112 ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1113 actual_x_data%map_atom_to_kind_atom(katom), &
1114 actual_x_data%map_atom_to_kind_atom(iatom))
1116 ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1117 actual_x_data%map_atom_to_kind_atom(iatom), &
1118 actual_x_data%map_atom_to_kind_atom(katom))
1119 swap_id = swap_id + 1
1121 kind_kind_idx = int(get_1d_idx(lkind, jkind, int(nkind,
int_8)))
1122 IF (jkind >= lkind)
THEN
1123 ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1124 actual_x_data%map_atom_to_kind_atom(latom), &
1125 actual_x_data%map_atom_to_kind_atom(jatom))
1127 ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1128 actual_x_data%map_atom_to_kind_atom(jatom), &
1129 actual_x_data%map_atom_to_kind_atom(latom))
1130 swap_id = swap_id + 2
1132 kind_kind_idx = int(get_1d_idx(lkind, ikind, int(nkind,
int_8)))
1133 IF (ikind >= lkind)
THEN
1134 ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1135 actual_x_data%map_atom_to_kind_atom(latom), &
1136 actual_x_data%map_atom_to_kind_atom(iatom))
1138 ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1139 actual_x_data%map_atom_to_kind_atom(iatom), &
1140 actual_x_data%map_atom_to_kind_atom(latom))
1141 swap_id = swap_id + 4
1143 kind_kind_idx = int(get_1d_idx(kkind, jkind, int(nkind,
int_8)))
1144 IF (jkind >= kkind)
THEN
1145 ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1146 actual_x_data%map_atom_to_kind_atom(katom), &
1147 actual_x_data%map_atom_to_kind_atom(jatom))
1149 ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1150 actual_x_data%map_atom_to_kind_atom(jatom), &
1151 actual_x_data%map_atom_to_kind_atom(katom))
1152 swap_id = swap_id + 8
1158 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
1159 IF (treat_forces_in_core)
THEN
1166 mem_compression_counter = 0
1169 tmp_i4 = my_x_data(irep, i)%memory_parameter%actual_memory_usage
1170 mem_compression_counter = mem_compression_counter + &
1171 tmp_i4*memory_parameter%cache_size
1173 IF (mem_compression_counter + memory_parameter%final_comp_counter_energy &
1174 > memory_parameter%max_compression_counter)
THEN
1175 buffer_overflow = .true.
1176 IF (do_dynamic_load_balancing)
THEN
1177 distribution_forces%ram_counter = counter
1179 memory_parameter%ram_counter_forces = counter
1182 counter = counter + 1
1183 buffer_overflow = .false.
1187 IF (treat_forces_in_core)
THEN
1188 IF (do_dynamic_load_balancing)
THEN
1189 IF (distribution_forces%ram_counter == counter)
THEN
1190 buffer_overflow = .true.
1192 counter = counter + 1
1193 buffer_overflow = .false.
1196 IF (memory_parameter%ram_counter_forces == counter)
THEN
1197 buffer_overflow = .true.
1199 counter = counter + 1
1200 buffer_overflow = .false.
1206 DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1207 iset = set_list_ij(i_set_list_ij)%pair(1)
1208 jset = set_list_ij(i_set_list_ij)%pair(2)
1210 ncob = npgfb(jset)*
ncoset(lb_max(jset))
1211 max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1212 screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1214 IF (max_val1 + (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1215 screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) cycle
1216 sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1217 sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1219 DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1220 kset = set_list_kl(i_set_list_kl)%pair(1)
1221 lset = set_list_kl(i_set_list_kl)%pair(2)
1223 max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1224 screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1225 max_val2 = max_val1 + max_val2_set
1228 IF (max_val2 + pmax_atom < log10_eps_schwarz) cycle
1229 sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1230 sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1232 IF (screen_pmat_forces)
THEN
1233 CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1234 iset, jset, kset, lset, &
1237 log10_pmax = log_2 + pmax_tmp
1242 max_val2 = max_val2 + log10_pmax
1243 IF (max_val2 < log10_eps_schwarz) cycle
1245 pmax_entry = exp(log10_pmax*ln_10)
1247 current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1248 IF (buffer_overflow)
THEN
1249 neris_onthefly = neris_onthefly + current_counter
1253 IF (.NOT. buffer_overflow .AND. .NOT. actual_x_data%memory_parameter%recalc_forces)
THEN
1254 nints = current_counter
1256 maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1258 spherical_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
1260 IF (.NOT. use_virial)
THEN
1261 IF (spherical_estimate*pmax_entry < eps_schwarz) cycle
1263 nbits = exponent(anint(spherical_estimate*pmax_entry/eps_storage)) + 1
1266 neris_incore = neris_incore + int(nints,
int_8)
1267 DO WHILE (buffer_left > 0)
1268 buffer_size = min(buffer_left, cache_size)
1270 buffer_size, nbits, &
1271 integral_caches(nbits), &
1272 integral_containers(nbits), &
1273 eps_storage, pmax_entry, &
1274 memory_parameter%actual_memory_usage, &
1276 buffer_left = buffer_left - buffer_size
1277 buffer_start = buffer_start + buffer_size
1281 IF (actual_x_data%memory_parameter%recalc_forces .OR. buffer_overflow)
THEN
1282 max_contraction_val = max_contraction(iset, iatom)* &
1283 max_contraction(jset, jatom)* &
1284 max_contraction(kset, katom)* &
1285 max_contraction(lset, latom)* &
1287 tmp_r_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1288 tmp_r_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1289 tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1290 tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1292 CALL forces4(private_deriv, ra, rb, rc, rd, npgfa(iset), npgfb(jset), &
1293 npgfc(kset), npgfd(lset), &
1294 la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1295 lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1296 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1297 sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1298 sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1299 sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1300 sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1301 zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1302 zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1304 potential_parameter, &
1305 actual_x_data%neighbor_cells, &
1306 screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1307 screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1308 max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1309 log10_pmax, log10_eps_schwarz, &
1310 tmp_r_1, tmp_r_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1311 pgf_list_ij, pgf_list_kl, pgf_product_list, &
1312 nsgfl_a(:, iset), nsgfl_b(:, jset), &
1313 nsgfl_c(:, kset), nsgfl_d(:, lset), &
1314 sphi_a_ext_set, sphi_b_ext_set, sphi_c_ext_set, sphi_d_ext_set, &
1315 ede_work, ede_work2, ede_work_forces, &
1316 ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1317 nimages, do_periodic, use_virial, ede_work_virial, ede_work2_virial, &
1318 ede_primitives_tmp_virial, primitive_forces_virial, &
1319 cartesian_estimate_virial)
1321 nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1322 neris_total = neris_total + nints
1323 nprim_ints = nprim_ints + neris_tmp
1342 spherical_estimate = 0.0_dp
1344 spherical_estimate = max(spherical_estimate, abs(primitive_forces(i)))
1347 spherical_estimate_virial = 0.0_dp
1348 IF (use_virial)
THEN
1350 spherical_estimate_virial = max(spherical_estimate_virial, abs(primitive_forces_virial(i)))
1354 IF (spherical_estimate == 0.0_dp) spherical_estimate = tiny(spherical_estimate)
1355 estimate_to_store_int = exponent(spherical_estimate)
1356 estimate_to_store_int = max(estimate_to_store_int, -15_int_8)
1357 IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces)
THEN
1359 maxval_cache, maxval_container, &
1360 memory_parameter%actual_memory_usage, &
1361 use_disk_storage, max_val_memory)
1363 spherical_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
1364 IF (.NOT. use_virial)
THEN
1365 IF (spherical_estimate*pmax_entry < eps_schwarz) cycle
1367 IF (.NOT. buffer_overflow)
THEN
1368 nbits = exponent(anint(spherical_estimate*pmax_entry/eps_storage)) + 1
1372 neris_incore = neris_incore + int(nints,
int_8)
1374 DO WHILE (buffer_left > 0)
1375 buffer_size = min(buffer_left, cache_size)
1377 buffer_size, nbits, &
1378 integral_caches(nbits), &
1379 integral_containers(nbits), &
1380 eps_storage, pmax_entry, &
1381 memory_parameter%actual_memory_usage, &
1383 buffer_left = buffer_left - buffer_size
1384 buffer_start = buffer_start + buffer_size
1389 IF (treat_forces_in_core)
THEN
1391 primitive_forces(i) = primitive_forces(i)*pmax_entry
1392 IF (abs(primitive_forces(i)) > eps_storage)
THEN
1393 primitive_forces(i) = anint(primitive_forces(i)/eps_storage,
dp)*eps_storage/pmax_entry
1395 primitive_forces(i) = 0.0_dp
1401 IF (with_resp_density)
THEN
1402 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1403 full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1404 iatom, jatom, katom, latom, &
1405 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1406 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1407 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1408 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1409 full_density_resp, pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1410 iatom, jatom, katom, latom, &
1411 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1412 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1413 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1415 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1416 full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1417 iatom, jatom, katom, latom, &
1418 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1419 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1420 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1422 IF (my_nspins == 2)
THEN
1423 IF (with_resp_density)
THEN
1424 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1425 full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, &
1426 pad_buf_beta, pac_buf_beta, iatom, jatom, katom, latom, &
1427 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1428 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1429 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1430 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1431 full_density_resp_beta, pbd_buf_resp_beta, pbc_buf_resp_beta, &
1432 pad_buf_resp_beta, pac_buf_resp_beta, iatom, jatom, katom, latom, &
1433 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1434 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1435 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1437 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1438 full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, pad_buf_beta, &
1439 pac_buf_beta, iatom, jatom, katom, latom, &
1440 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1441 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1442 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1445 IF (spherical_estimate*pmax_entry >= eps_schwarz)
THEN
1447 t2 => primitive_forces((coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1448 coord*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1450 IF (with_resp_density)
THEN
1451 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1452 pbd_buf, pbc_buf, pad_buf, pac_buf,
fac, &
1453 t2, force, forces_map, coord, &
1454 pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1457 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1458 pbd_buf, pbc_buf, pad_buf, pac_buf,
fac, &
1459 t2, force, forces_map, coord)
1461 IF (my_nspins == 2)
THEN
1462 IF (with_resp_density)
THEN
1463 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1464 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1465 fac, t2, force, forces_map, coord, &
1466 pbd_buf_resp_beta, pbc_buf_resp_beta, &
1467 pad_buf_resp_beta, pac_buf_resp_beta, &
1470 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1471 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta,
fac, &
1472 t2, force, forces_map, coord)
1477 IF (use_virial .AND. spherical_estimate_virial*pmax_entry >= eps_schwarz)
THEN
1480 t2 => primitive_forces_virial( &
1481 ((i - 1)*12 + coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1482 ((i - 1)*12 + coord)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1483 IF (with_resp_density)
THEN
1484 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1485 pbd_buf, pbc_buf, pad_buf, pac_buf,
fac, &
1486 t2, tmp_virial, coord, i, &
1487 pbd_buf_resp, pbc_buf_resp, pad_buf_resp, &
1488 pac_buf_resp, my_resp_only)
1490 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1491 pbd_buf, pbc_buf, pad_buf, pac_buf,
fac, &
1492 t2, tmp_virial, coord, i)
1494 IF (my_nspins == 2)
THEN
1495 IF (with_resp_density)
THEN
1496 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1497 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1498 fac, t2, tmp_virial, coord, i, &
1499 pbd_buf_resp_beta, pbc_buf_resp_beta, &
1500 pad_buf_resp_beta, pac_buf_resp_beta, &
1503 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1504 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1505 fac, t2, tmp_virial, coord, i)
1516 END DO atomic_blocks
1518 distribution_forces%time_forces = bintime_stop - bintime_start
1520 CALL timestop(handle_bin)
1526 do_print_load_balance_info = .false.
1528 "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"),
cp_p_file)
1531 IF (do_print_load_balance_info)
THEN
1535 extension=
".scfLog")
1543 "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1547 IF (use_virial)
THEN
1552 virial%pv_fock_4c(i, j) = virial%pv_fock_4c(i, j) + tmp_virial(i, k)*cell%hmat(j, k)
1561 shm_neris_total = shm_neris_total + neris_total
1563 shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1565 shm_nprim_ints = shm_nprim_ints + nprim_ints
1567 storage_counter_integrals = memory_parameter%actual_memory_usage* &
1568 memory_parameter%cache_size
1569 stor_count_max_val = max_val_memory*memory_parameter%cache_size
1571 shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1573 shm_neris_incore = shm_neris_incore + neris_incore
1575 shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1586 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
1587 tmp_i8(1:6) = (/shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, &
1588 shm_neris_total, shm_nprim_ints, shm_stor_count_max_val/)
1589 CALL para_env%sum(tmp_i8)
1590 shm_storage_counter_integrals = tmp_i8(1)
1591 shm_neris_onthefly = tmp_i8(2)
1592 shm_neris_incore = tmp_i8(3)
1593 shm_neris_total = tmp_i8(4)
1594 shm_nprim_ints = tmp_i8(5)
1595 shm_stor_count_max_val = tmp_i8(6)
1596 mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1597 compression_factor = real(shm_neris_incore,
dp)/real(shm_storage_counter_integrals,
dp)
1598 mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
1600 IF (shm_neris_incore == 0)
THEN
1602 compression_factor = 0.0_dp
1608 extension=
".scfLog")
1610 WRITE (unit=iw, fmt=
"(/,(T3,A,T65,I16))") &
1611 "HFX_MEM_INFO| Number of cart. primitive DERIV's calculated: ", shm_nprim_ints
1613 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
1614 "HFX_MEM_INFO| Number of sph. DERIV's calculated: ", shm_neris_total
1616 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
1617 "HFX_MEM_INFO| Number of sph. DERIV's stored in-core: ", shm_neris_incore
1619 WRITE (unit=iw, fmt=
"((T3,A,T62,I19))") &
1620 "HFX_MEM_INFO| Number of sph. DERIV's calculated on the fly: ", shm_neris_onthefly
1622 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
1623 "HFX_MEM_INFO| Total memory consumption DERIV's RAM [MiB]: ", mem_eris
1625 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
1626 "HFX_MEM_INFO| Whereof max-vals [MiB]: ", mem_max_val
1628 WRITE (unit=iw, fmt=
"((T3,A,T60,F21.2),/)") &
1629 "HFX_MEM_INFO| Total compression factor DERIV's RAM: ", compression_factor
1638 IF (do_dynamic_load_balancing)
THEN
1639 my_bin_size =
SIZE(actual_x_data%distribution_forces)
1644 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
1645 IF (treat_forces_in_core)
THEN
1646 DO bin = 1, my_bin_size
1647 maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1648 maxval_container => actual_x_data%store_forces%maxval_container(bin)
1649 integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1650 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1651 CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1655 memory_parameter%actual_memory_usage, .false.)
1661 IF (treat_forces_in_core)
THEN
1662 DO bin = 1, my_bin_size
1663 maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1664 maxval_container => actual_x_data%store_forces%maxval_container(bin)
1665 integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1666 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1671 memory_parameter%actual_memory_usage, &
1677 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
1678 actual_x_data%memory_parameter%recalc_forces = .false.
1683 CALL timestop(handle_main)
1686 DEALLOCATE (last_sgf_global)
1688 DEALLOCATE (full_density_alpha)
1689 IF (with_resp_density)
THEN
1690 DEALLOCATE (full_density_resp)
1692 IF (my_nspins == 2)
THEN
1693 DEALLOCATE (full_density_beta)
1694 IF (with_resp_density)
THEN
1695 DEALLOCATE (full_density_resp_beta)
1698 IF (do_dynamic_load_balancing)
THEN
1699 DEALLOCATE (actual_x_data%task_list)
1702 DEALLOCATE (primitive_forces)
1703 DEALLOCATE (atom_of_kind, kind_of)
1704 DEALLOCATE (max_contraction)
1705 DEALLOCATE (pbd_buf)
1706 DEALLOCATE (pbc_buf)
1707 DEALLOCATE (pad_buf)
1708 DEALLOCATE (pac_buf)
1710 IF (with_resp_density)
THEN
1711 DEALLOCATE (pbd_buf_resp)
1712 DEALLOCATE (pbc_buf_resp)
1713 DEALLOCATE (pad_buf_resp)
1714 DEALLOCATE (pac_buf_resp)
1717 DO i = 1, max_pgf**2
1718 DEALLOCATE (pgf_list_ij(i)%image_list)
1719 DEALLOCATE (pgf_list_kl(i)%image_list)
1722 DEALLOCATE (pgf_list_ij)
1723 DEALLOCATE (pgf_list_kl)
1724 DEALLOCATE (pgf_product_list)
1726 DEALLOCATE (set_list_ij, set_list_kl)
1728 DEALLOCATE (primitive_forces_virial)
1730 DEALLOCATE (ede_work, ede_work2, ede_work_forces, ede_buffer1, ede_buffer2, ede_primitives_tmp)
1732 DEALLOCATE (ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial)
1734 DEALLOCATE (nimages)
1736 IF (my_nspins == 2)
THEN
1737 DEALLOCATE (pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta)
1738 IF (with_resp_density)
THEN
1739 DEALLOCATE (pbd_buf_resp_beta)
1740 DEALLOCATE (pbc_buf_resp_beta)
1741 DEALLOCATE (pad_buf_resp_beta)
1742 DEALLOCATE (pac_buf_resp_beta)
1754 CALL timestop(handle)