115 irep, use_virial, adiabatic_rescale_factor, resp_only, &
120 TYPE(
dbcsr_p_type),
DIMENSION(:),
POINTER :: rho_ao_resp
123 INTEGER,
INTENT(IN) :: irep
124 LOGICAL,
INTENT(IN) :: use_virial
125 REAL(
dp),
INTENT(IN),
OPTIONAL :: adiabatic_rescale_factor
126 LOGICAL,
INTENT(IN),
OPTIONAL :: resp_only
127 TYPE(
hfx_type),
DIMENSION(:, :),
POINTER,
OPTIONAL :: external_x_data
129 CHARACTER(LEN=*),
PARAMETER :: routinen =
'derivatives_four_center'
131 INTEGER :: atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, bin, &
132 bits_max_val, buffer_left, buffer_size, buffer_start, cache_size, coord, current_counter, &
133 forces_map(4, 2), handle, handle_bin, handle_getp, handle_load, handle_main, i, i_atom, &
134 i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
135 i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_thread, iatom, iatom_block, &
136 iatom_end, iatom_start, ikind, iset, iw, j, j_atom, jatom, jatom_block, jatom_end, &
137 jatom_start, jkind, jset, k, k_atom, katom, katom_block, katom_end, katom_start
138 INTEGER :: kind_kind_idx, kkind, kset, l_atom, l_max, latom, latom_block, latom_end, &
139 latom_start, lkind, lset, max_am, max_pgf, max_set, my_bin_id, my_bin_size, my_thread_id, &
140 n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, nneighbors, nseta, &
141 nsetb, nsgf_max, nspins, sgfb, shm_task_counter, shm_total_bins, sphi_a_u1, sphi_a_u2, &
142 sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, &
143 sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
144 INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
145 mem_compression_counter, mem_eris, mem_max_val, my_current_counter, my_istart, &
146 n_processes, nblocks, ncpu, neris_incore, neris_onthefly, neris_tmp, neris_total, &
147 nprim_ints, shm_neris_incore, shm_neris_onthefly, shm_neris_total, shm_nprim_ints, &
148 shm_stor_count_max_val, shm_storage_counter_integrals, stor_count_max_val, &
149 storage_counter_integrals, tmp_block, tmp_i8(6)
150 INTEGER(int_8),
ALLOCATABLE,
DIMENSION(:) :: tmp_task_list_cost
151 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: atom_of_kind, kind_of, last_sgf_global, &
153 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
154 ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
155 INTEGER,
DIMENSION(:, :),
POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
156 offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
157 INTEGER,
DIMENSION(:, :),
POINTER,
SAVE :: shm_is_assoc_atomic_block
158 INTEGER,
DIMENSION(:, :, :, :),
POINTER :: shm_set_offset
159 INTEGER,
SAVE :: shm_number_of_p_entries
160 LOGICAL :: bins_left, buffer_overflow, do_dynamic_load_balancing, do_it, do_periodic, &
161 do_print_load_balance_info, is_anti_symmetric, my_resp_only, screen_pmat_forces, &
162 treat_forces_in_core, use_disk_storage, with_resp_density
163 LOGICAL,
DIMENSION(:, :),
POINTER :: shm_atomic_pair_list
164 REAL(
dp) :: bintime_start, bintime_stop, cartesian_estimate, cartesian_estimate_virial, &
165 compression_factor, eps_schwarz, eps_storage,
fac, hf_fraction, ln_10, log10_eps_schwarz, &
166 log10_pmax, log_2, max_contraction_val, max_val1, max_val2, max_val2_set, &
167 my_adiabatic_rescale_factor, pmax_atom, pmax_blocks, pmax_entry, pmax_tmp, ra(3), rab2, &
168 rb(3), rc(3), rcd2, rd(3), spherical_estimate, spherical_estimate_virial, symm_fac, &
170 REAL(
dp),
ALLOCATABLE,
DIMENSION(:) :: ede_buffer1, ede_buffer2, ede_primitives_tmp, &
171 ede_primitives_tmp_virial, ede_work, ede_work2, ede_work2_virial, ede_work_forces, &
172 ede_work_virial, pac_buf, pac_buf_beta, pac_buf_resp, pac_buf_resp_beta, pad_buf, &
173 pad_buf_beta, pad_buf_resp, pad_buf_resp_beta, pbc_buf, pbc_buf_beta, pbc_buf_resp, &
174 pbc_buf_resp_beta, pbd_buf, pbd_buf_beta, pbd_buf_resp, pbd_buf_resp_beta
175 REAL(
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: primitive_forces, primitive_forces_virial
176 REAL(
dp),
DIMENSION(:),
POINTER :: full_density_resp, &
177 full_density_resp_beta, t2
178 REAL(
dp),
DIMENSION(:, :),
POINTER :: full_density_alpha, full_density_beta, &
179 max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, shm_pmax_block, &
180 sphi_b, zeta, zetb, zetc, zetd
181 REAL(
dp),
DIMENSION(:, :, :),
POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
182 sphi_c_ext_set, sphi_d_ext_set
183 REAL(
dp),
DIMENSION(:, :, :, :),
POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
185 REAL(kind=
dp) :: coeffs_kind_max0
202 TYPE(
hfx_p_kind),
DIMENSION(:),
POINTER :: shm_initial_p
203 TYPE(
hfx_pgf_list),
ALLOCATABLE,
DIMENSION(:) :: pgf_list_ij, pgf_list_kl
205 DIMENSION(:) :: pgf_product_list
208 POINTER :: screen_coeffs_kind, tmp_r_1, tmp_r_2, &
209 tmp_screen_pgf1, tmp_screen_pgf2
211 DIMENSION(:, :, :, :),
POINTER :: screen_coeffs_set
213 DIMENSION(:, :, :, :, :, :),
POINTER :: radii_pgf, screen_coeffs_pgf
216 TYPE(
hfx_type),
POINTER :: actual_x_data
217 TYPE(
hfx_type),
DIMENSION(:, :),
POINTER :: my_x_data
220 DIMENSION(:) :: set_list_ij, set_list_kl
225 NULLIFY (dft_control, admm_env)
227 with_resp_density = .false.
228 IF (
ASSOCIATED(rho_ao_resp)) with_resp_density = .true.
230 my_resp_only = .false.
231 IF (
PRESENT(resp_only)) my_resp_only = resp_only
237 particle_set=particle_set, &
238 atomic_kind_set=atomic_kind_set, &
240 dft_control=dft_control)
242 nspins = dft_control%nspins
243 nkimages = dft_control%nimages
244 cpassert(nkimages == 1)
246 my_x_data => qs_env%x_data
247 IF (
PRESENT(external_x_data)) my_x_data => external_x_data
250 IF (
SIZE(particle_set, 1) == 1)
THEN
251 IF (.NOT. use_virial)
RETURN
254 CALL timeset(routinen, handle)
256 nkind =
SIZE(atomic_kind_set, 1)
259 l_max = max(l_max, maxval(my_x_data(1, 1)%basis_parameter(ikind)%lmax))
267 CALL open_file(unit_number=unit_id, file_name=my_x_data(1, 1)%potential_parameter%filename)
268 CALL init(l_max, unit_id, para_env%mepos, para_env)
279 shm_neris_onthefly = 0
280 shm_storage_counter_integrals = 0
282 shm_stor_count_max_val = 0
285 CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
287 my_adiabatic_rescale_factor = 1.0_dp
288 IF (
PRESENT(adiabatic_rescale_factor))
THEN
289 my_adiabatic_rescale_factor = adiabatic_rescale_factor
380 log_2 = log10(2.0_dp)
381 actual_x_data => my_x_data(irep, i_thread + 1)
382 do_periodic = actual_x_data%periodic_parameter%do_periodic
384 screening_parameter = actual_x_data%screening_parameter
385 general_parameter = actual_x_data%general_parameter
386 potential_parameter = actual_x_data%potential_parameter
387 basis_info => actual_x_data%basis_info
389 load_balance_parameter => actual_x_data%load_balance_parameter
390 basis_parameter => actual_x_data%basis_parameter
400 memory_parameter => actual_x_data%memory_parameter
402 cache_size = memory_parameter%cache_size
403 bits_max_val = memory_parameter%bits_max_val
405 use_disk_storage = .false.
408 atomic_kind_set=atomic_kind_set, &
409 particle_set=particle_set)
411 max_set = basis_info%max_set
412 max_am = basis_info%max_am
413 natom =
SIZE(particle_set, 1)
415 treat_forces_in_core = memory_parameter%treat_forces_in_core
417 hf_fraction = general_parameter%fraction
418 hf_fraction = hf_fraction*my_adiabatic_rescale_factor
419 eps_schwarz = screening_parameter%eps_schwarz_forces
420 IF (eps_schwarz < 0.0_dp)
THEN
424 IF (use_virial) eps_schwarz = eps_schwarz/actual_x_data%periodic_parameter%R_max_stress
425 log10_eps_schwarz = log10(eps_schwarz)
427 screen_pmat_forces = screening_parameter%do_p_screening_forces
429 IF (with_resp_density) screen_pmat_forces = .false.
431 eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
436 private_deriv = actual_x_data%lib_deriv
441 atom_of_kind=atom_of_kind, &
445 ALLOCATE (last_sgf_global(0:natom))
446 last_sgf_global(0) = 0
448 ikind = kind_of(iatom)
449 last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
452 ALLOCATE (max_contraction(max_set, natom))
454 max_contraction = 0.0_dp
457 jkind = kind_of(jatom)
458 lb_max => basis_parameter(jkind)%lmax
459 npgfb => basis_parameter(jkind)%npgf
460 nsetb = basis_parameter(jkind)%nset
461 first_sgfb => basis_parameter(jkind)%first_sgf
462 sphi_b => basis_parameter(jkind)%sphi
463 nsgfb => basis_parameter(jkind)%nsgf
466 ncob = npgfb(jset)*
ncoset(lb_max(jset))
467 sgfb = first_sgfb(1, jset)
470 max_contraction(jset, jatom) = maxval((/(sum(abs(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
471 max_pgf = max(max_pgf, npgfb(jset))
475 ncpu = para_env%num_pe
476 n_processes = ncpu*n_threads
479 neris_total = 0_int_8
480 neris_incore = 0_int_8
481 neris_onthefly = 0_int_8
483 mem_max_val = 0_int_8
484 compression_factor = 0.0_dp
487 max_val_memory = 1_int_8
491 shm_is_assoc_atomic_block => actual_x_data%is_assoc_atomic_block
492 shm_number_of_p_entries = actual_x_data%number_of_p_entries
493 shm_atomic_block_offset => actual_x_data%atomic_block_offset
494 shm_set_offset => actual_x_data%set_offset
495 shm_block_offset => actual_x_data%block_offset
497 NULLIFY (full_density_alpha)
498 NULLIFY (full_density_beta)
499 ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
502 CALL timeset(routinen//
"_getP", handle_getp)
503 CALL get_full_density(para_env, full_density_alpha(:, 1), rho_ao(1, 1)%matrix, shm_number_of_p_entries, &
505 kind_of, basis_parameter, get_max_vals_spin=.false., &
506 antisymmetric=is_anti_symmetric)
508 IF (with_resp_density)
THEN
509 NULLIFY (full_density_resp)
510 ALLOCATE (full_density_resp(shm_block_offset(ncpu + 1)))
511 CALL get_full_density(para_env, full_density_resp, rho_ao_resp(1)%matrix, shm_number_of_p_entries, &
513 kind_of, basis_parameter, get_max_vals_spin=.false., &
514 antisymmetric=is_anti_symmetric)
517 IF (nspins == 2)
THEN
518 ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), 1))
519 CALL get_full_density(para_env, full_density_beta(:, 1), rho_ao(2, 1)%matrix, shm_number_of_p_entries, &
521 kind_of, basis_parameter, get_max_vals_spin=.false., &
522 antisymmetric=is_anti_symmetric)
524 IF (with_resp_density)
THEN
525 NULLIFY (full_density_resp_beta)
526 ALLOCATE (full_density_resp_beta(shm_block_offset(ncpu + 1)))
527 CALL get_full_density(para_env, full_density_resp_beta, rho_ao_resp(2)%matrix, shm_number_of_p_entries, &
529 kind_of, basis_parameter, get_max_vals_spin=.false., &
530 antisymmetric=is_anti_symmetric)
534 CALL timestop(handle_getp)
538 IF (screen_pmat_forces)
THEN
539 NULLIFY (shm_initial_p)
540 shm_initial_p => actual_x_data%initial_p_forces
541 shm_pmax_atom => actual_x_data%pmax_atom_forces
542 IF (memory_parameter%recalc_forces)
THEN
544 actual_x_data%map_atom_to_kind_atom, &
545 actual_x_data%set_offset, &
546 actual_x_data%atomic_block_offset, &
548 full_density_alpha, full_density_beta, &
549 natom, kind_of, basis_parameter, &
550 nkind, actual_x_data%is_assoc_atomic_block)
556 IF (with_resp_density .AND. .NOT. my_resp_only)
THEN
557 full_density_alpha(:, 1) = full_density_alpha(:, 1) - full_density_resp
558 IF (nspins == 2)
THEN
559 full_density_beta(:, 1) = &
560 full_density_beta(:, 1) - full_density_resp_beta
565 screen_coeffs_set => actual_x_data%screen_funct_coeffs_set
566 screen_coeffs_kind => actual_x_data%screen_funct_coeffs_kind
567 screen_coeffs_pgf => actual_x_data%screen_funct_coeffs_pgf
568 radii_pgf => actual_x_data%pair_dist_radii_pgf
569 shm_pmax_block => actual_x_data%pmax_block
575 CALL timeset(routinen//
"_load", handle_load)
578 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
579 IF (actual_x_data%b_first_load_balance_forces)
THEN
580 CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
581 screen_coeffs_set, screen_coeffs_kind, &
582 shm_is_assoc_atomic_block, do_periodic, &
583 load_balance_parameter, kind_of, basis_parameter, shm_initial_p, &
584 shm_pmax_atom, i_thread, n_threads, &
585 cell, screen_pmat_forces, actual_x_data%map_atom_to_kind_atom, &
587 actual_x_data%b_first_load_balance_forces = .false.
590 load_balance_parameter, &
595 CALL timestop(handle_load)
602 ikind = kind_of(iatom)
603 nseta = basis_parameter(ikind)%nset
604 npgfa => basis_parameter(ikind)%npgf
605 la_max => basis_parameter(ikind)%lmax
606 nsgfa => basis_parameter(ikind)%nsgf
608 ncos_max = max(ncos_max,
ncoset(la_max(iset)))
609 nsgf_max = max(nsgf_max, nsgfa(iset))
614 ALLOCATE (primitive_forces(12*nsgf_max**4))
615 primitive_forces = 0.0_dp
618 nneighbors =
SIZE(actual_x_data%neighbor_cells)
619 ALLOCATE (pgf_list_ij(max_pgf**2))
620 ALLOCATE (pgf_list_kl(max_pgf**2))
623 ALLOCATE (pgf_product_list(tmp_i4))
624 ALLOCATE (nimages(max_pgf**2))
627 ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
628 ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
631 ALLOCATE (pbd_buf(nsgf_max**2))
632 ALLOCATE (pbc_buf(nsgf_max**2))
633 ALLOCATE (pad_buf(nsgf_max**2))
634 ALLOCATE (pac_buf(nsgf_max**2))
635 IF (with_resp_density)
THEN
636 ALLOCATE (pbd_buf_resp(nsgf_max**2))
637 ALLOCATE (pbc_buf_resp(nsgf_max**2))
638 ALLOCATE (pad_buf_resp(nsgf_max**2))
639 ALLOCATE (pac_buf_resp(nsgf_max**2))
641 IF (nspins == 2)
THEN
642 ALLOCATE (pbd_buf_beta(nsgf_max**2))
643 ALLOCATE (pbc_buf_beta(nsgf_max**2))
644 ALLOCATE (pad_buf_beta(nsgf_max**2))
645 ALLOCATE (pac_buf_beta(nsgf_max**2))
646 IF (with_resp_density)
THEN
647 ALLOCATE (pbd_buf_resp_beta(nsgf_max**2))
648 ALLOCATE (pbc_buf_resp_beta(nsgf_max**2))
649 ALLOCATE (pad_buf_resp_beta(nsgf_max**2))
650 ALLOCATE (pac_buf_resp_beta(nsgf_max**2))
654 ALLOCATE (ede_work(ncos_max**4*12))
655 ALLOCATE (ede_work2(ncos_max**4*12))
656 ALLOCATE (ede_work_forces(ncos_max**4*12))
657 ALLOCATE (ede_buffer1(ncos_max**4))
658 ALLOCATE (ede_buffer2(ncos_max**4))
659 ALLOCATE (ede_primitives_tmp(nsgf_max**4))
662 ALLOCATE (primitive_forces_virial(12*nsgf_max**4*3))
663 primitive_forces_virial = 0.0_dp
664 ALLOCATE (ede_work_virial(ncos_max**4*12*3))
665 ALLOCATE (ede_work2_virial(ncos_max**4*12*3))
666 ALLOCATE (ede_primitives_tmp_virial(nsgf_max**4))
670 ALLOCATE (primitive_forces_virial(1))
671 primitive_forces_virial = 0.0_dp
672 ALLOCATE (ede_work_virial(1))
673 ALLOCATE (ede_work2_virial(1))
674 ALLOCATE (ede_primitives_tmp_virial(1))
719 CALL timeset(routinen//
"_main", handle_main)
723 coeffs_kind_max0 = maxval(screen_coeffs_kind(:, :)%x(2))
724 ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
725 ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
729 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
730 actual_x_data%distribution_forces%ram_counter = huge(distribution_forces%ram_counter)
731 memory_parameter%ram_counter_forces = huge(memory_parameter%ram_counter_forces)
734 IF (treat_forces_in_core)
THEN
735 buffer_overflow = .false.
737 buffer_overflow = .true.
740 do_dynamic_load_balancing = .true.
741 IF (n_threads == 1) do_dynamic_load_balancing = .false.
743 IF (do_dynamic_load_balancing)
THEN
744 my_bin_size =
SIZE(actual_x_data%distribution_forces)
749 IF (treat_forces_in_core)
THEN
751 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
752 CALL dealloc_containers(actual_x_data%store_forces, memory_parameter%actual_memory_usage)
755 DO bin = 1, my_bin_size
756 maxval_container => actual_x_data%store_forces%maxval_container(bin)
757 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
758 CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .false.)
760 CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .false.)
766 IF (.NOT. actual_x_data%memory_parameter%recalc_forces)
THEN
767 DO bin = 1, my_bin_size
768 maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
769 maxval_container => actual_x_data%store_forces%maxval_container(bin)
770 integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
771 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
773 memory_parameter%actual_memory_usage, .false.)
776 memory_parameter%actual_memory_usage, .false.)
784 IF (do_dynamic_load_balancing)
THEN
788 shm_total_bins = shm_total_bins +
SIZE(my_x_data(irep, i)%distribution_forces)
790 ALLOCATE (tmp_task_list(shm_total_bins))
793 DO bin = 1,
SIZE(my_x_data(irep, i)%distribution_forces)
794 shm_task_counter = shm_task_counter + 1
795 tmp_task_list(shm_task_counter)%thread_id = i
796 tmp_task_list(shm_task_counter)%bin_id = bin
797 tmp_task_list(shm_task_counter)%cost = my_x_data(irep, i)%distribution_forces(bin)%cost
802 ALLOCATE (tmp_task_list_cost(shm_total_bins))
803 ALLOCATE (tmp_index(shm_total_bins))
805 DO i = 1, shm_total_bins
806 tmp_task_list_cost(i) = tmp_task_list(i)%cost
809 CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
811 ALLOCATE (actual_x_data%task_list(shm_total_bins))
813 DO i = 1, shm_total_bins
814 actual_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
817 shm_task_list => actual_x_data%task_list
820 DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
824 shm_pmax_block => actual_x_data%pmax_block
825 actual_x_data%pmax_block = 0.0_dp
826 IF (screen_pmat_forces)
THEN
827 DO iatom_block = 1,
SIZE(actual_x_data%blocks)
828 iatom_start = actual_x_data%blocks(iatom_block)%istart
829 iatom_end = actual_x_data%blocks(iatom_block)%iend
830 DO jatom_block = 1,
SIZE(actual_x_data%blocks)
831 jatom_start = actual_x_data%blocks(jatom_block)%istart
832 jatom_end = actual_x_data%blocks(jatom_block)%iend
833 shm_pmax_block(iatom_block, jatom_block) = maxval(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
837 shm_atomic_pair_list => actual_x_data%atomic_pair_list_forces
838 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
840 do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
841 actual_x_data%blocks)
844 my_bin_size =
SIZE(actual_x_data%distribution_forces)
845 DO bin = 1, my_bin_size
846 actual_x_data%distribution_forces(bin)%time_forces = 0.0_dp
851 IF (treat_forces_in_core)
THEN
852 IF (my_bin_size > 0)
THEN
853 maxval_container => actual_x_data%store_forces%maxval_container(1)
854 maxval_cache => actual_x_data%store_forces%maxval_cache(1)
856 integral_containers => actual_x_data%store_forces%integral_containers(:, 1)
857 integral_caches => actual_x_data%store_forces%integral_caches(:, 1)
861 nblocks = load_balance_parameter%nblocks
867 IF (.NOT. do_dynamic_load_balancing)
THEN
869 IF (bin > my_bin_size)
THEN
875 distribution_forces => actual_x_data%distribution_forces(bin)
879 shm_task_counter = shm_task_counter + 1
880 IF (shm_task_counter <= shm_total_bins)
THEN
881 my_thread_id = shm_task_list(shm_task_counter)%thread_id
882 my_bin_id = shm_task_list(shm_task_counter)%bin_id
883 IF (treat_forces_in_core)
THEN
884 maxval_cache => my_x_data(irep, my_thread_id)%store_forces%maxval_cache(my_bin_id)
885 maxval_container => my_x_data(irep, my_thread_id)%store_forces%maxval_container(my_bin_id)
886 integral_caches => my_x_data(irep, my_thread_id)%store_forces%integral_caches(:, my_bin_id)
887 integral_containers => my_x_data(irep, my_thread_id)%store_forces%integral_containers(:, my_bin_id)
889 distribution_forces => my_x_data(irep, my_thread_id)%distribution_forces(my_bin_id)
892 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
893 distribution_forces%ram_counter = huge(distribution_forces%ram_counter)
903 IF (.NOT. do_it) cycle
905 CALL timeset(routinen//
"_bin", handle_bin)
908 my_istart = distribution_forces%istart
909 my_current_counter = 0
910 IF (distribution_forces%number_of_atom_quartets == 0 .OR. &
911 my_istart == -1_int_8) my_istart = nblocks**4
912 atomic_blocks:
DO atom_block = my_istart, nblocks**4 - 1, n_processes
913 latom_block = int(
modulo(atom_block, nblocks)) + 1
914 tmp_block = atom_block/nblocks
915 katom_block = int(
modulo(tmp_block, nblocks)) + 1
916 IF (latom_block < katom_block) cycle
917 tmp_block = tmp_block/nblocks
918 jatom_block = int(
modulo(tmp_block, nblocks)) + 1
919 tmp_block = tmp_block/nblocks
920 iatom_block = int(
modulo(tmp_block, nblocks)) + 1
921 IF (jatom_block < iatom_block) cycle
922 my_current_counter = my_current_counter + 1
923 IF (my_current_counter > distribution_forces%number_of_atom_quartets)
EXIT atomic_blocks
925 iatom_start = actual_x_data%blocks(iatom_block)%istart
926 iatom_end = actual_x_data%blocks(iatom_block)%iend
927 jatom_start = actual_x_data%blocks(jatom_block)%istart
928 jatom_end = actual_x_data%blocks(jatom_block)%iend
929 katom_start = actual_x_data%blocks(katom_block)%istart
930 katom_end = actual_x_data%blocks(katom_block)%iend
931 latom_start = actual_x_data%blocks(latom_block)%istart
932 latom_end = actual_x_data%blocks(latom_block)%iend
934 pmax_blocks = max(shm_pmax_block(katom_block, iatom_block) + &
935 shm_pmax_block(latom_block, jatom_block), &
936 shm_pmax_block(latom_block, iatom_block) + &
937 shm_pmax_block(katom_block, jatom_block))
939 IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) cycle
941 CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
942 jatom_start, jatom_end, &
943 kind_of, basis_parameter, particle_set, &
944 do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
945 log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
947 CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
948 latom_start, latom_end, &
949 kind_of, basis_parameter, particle_set, &
950 do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
951 log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
953 DO i_list_ij = 1, list_ij%n_element
954 iatom = list_ij%elements(i_list_ij)%pair(1)
955 jatom = list_ij%elements(i_list_ij)%pair(2)
956 i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
957 i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
958 ikind = list_ij%elements(i_list_ij)%kind_pair(1)
959 jkind = list_ij%elements(i_list_ij)%kind_pair(2)
960 ra = list_ij%elements(i_list_ij)%r1
961 rb = list_ij%elements(i_list_ij)%r2
962 rab2 = list_ij%elements(i_list_ij)%dist2
964 la_max => basis_parameter(ikind)%lmax
965 la_min => basis_parameter(ikind)%lmin
966 npgfa => basis_parameter(ikind)%npgf
967 nseta = basis_parameter(ikind)%nset
968 zeta => basis_parameter(ikind)%zet
969 nsgfa => basis_parameter(ikind)%nsgf
970 sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
971 nsgfl_a => basis_parameter(ikind)%nsgfl
972 sphi_a_u1 = ubound(sphi_a_ext, 1)
973 sphi_a_u2 = ubound(sphi_a_ext, 2)
974 sphi_a_u3 = ubound(sphi_a_ext, 3)
976 lb_max => basis_parameter(jkind)%lmax
977 lb_min => basis_parameter(jkind)%lmin
978 npgfb => basis_parameter(jkind)%npgf
979 nsetb = basis_parameter(jkind)%nset
980 zetb => basis_parameter(jkind)%zet
981 nsgfb => basis_parameter(jkind)%nsgf
982 sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
983 nsgfl_b => basis_parameter(jkind)%nsgfl
984 sphi_b_u1 = ubound(sphi_b_ext, 1)
985 sphi_b_u2 = ubound(sphi_b_ext, 2)
986 sphi_b_u3 = ubound(sphi_b_ext, 3)
988 i_atom = atom_of_kind(iatom)
989 forces_map(1, 1) = ikind
990 forces_map(1, 2) = i_atom
991 j_atom = atom_of_kind(jatom)
992 forces_map(2, 1) = jkind
993 forces_map(2, 2) = j_atom
995 DO i_list_kl = 1, list_kl%n_element
996 katom = list_kl%elements(i_list_kl)%pair(1)
997 latom = list_kl%elements(i_list_kl)%pair(2)
1001 IF (.NOT. (katom + latom <= iatom + jatom)) cycle
1002 IF (((iatom + jatom) .EQ. (katom + latom)) .AND. (katom < iatom)) cycle
1004 i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1005 i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1006 kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1007 lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1008 rc = list_kl%elements(i_list_kl)%r1
1009 rd = list_kl%elements(i_list_kl)%r2
1010 rcd2 = list_kl%elements(i_list_kl)%dist2
1012 IF (screen_pmat_forces)
THEN
1013 pmax_atom = max(shm_pmax_atom(katom, iatom) + &
1014 shm_pmax_atom(latom, jatom), &
1015 shm_pmax_atom(latom, iatom) + &
1016 shm_pmax_atom(katom, jatom))
1021 IF ((screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1022 screen_coeffs_kind(jkind, ikind)%x(2)) + &
1023 (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1024 screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) cycle
1026 IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1027 shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1028 shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1029 shm_is_assoc_atomic_block(latom, jatom) >= 1)) cycle
1030 k_atom = atom_of_kind(katom)
1031 forces_map(3, 1) = kkind
1032 forces_map(3, 2) = k_atom
1034 l_atom = atom_of_kind(latom)
1035 forces_map(4, 1) = lkind
1036 forces_map(4, 2) = l_atom
1038 IF (nspins == 1)
THEN
1039 fac = 0.25_dp*hf_fraction
1041 fac = 0.5_dp*hf_fraction
1045 IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1046 IF (katom == latom) symm_fac = symm_fac*2.0_dp
1047 IF (iatom == katom .AND. jatom == latom .AND. &
1048 iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1049 IF (iatom == katom .AND. iatom == jatom .AND. &
1050 katom == latom) symm_fac = symm_fac*2.0_dp
1052 symm_fac = 1.0_dp/symm_fac
1055 lc_max => basis_parameter(kkind)%lmax
1056 lc_min => basis_parameter(kkind)%lmin
1057 npgfc => basis_parameter(kkind)%npgf
1058 zetc => basis_parameter(kkind)%zet
1059 nsgfc => basis_parameter(kkind)%nsgf
1060 sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1061 nsgfl_c => basis_parameter(kkind)%nsgfl
1062 sphi_c_u1 = ubound(sphi_c_ext, 1)
1063 sphi_c_u2 = ubound(sphi_c_ext, 2)
1064 sphi_c_u3 = ubound(sphi_c_ext, 3)
1066 ld_max => basis_parameter(lkind)%lmax
1067 ld_min => basis_parameter(lkind)%lmin
1068 npgfd => basis_parameter(lkind)%npgf
1069 zetd => basis_parameter(lkind)%zet
1070 nsgfd => basis_parameter(lkind)%nsgf
1071 sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1072 nsgfl_d => basis_parameter(lkind)%nsgfl
1073 sphi_d_u1 = ubound(sphi_d_ext, 1)
1074 sphi_d_u2 = ubound(sphi_d_ext, 2)
1075 sphi_d_u3 = ubound(sphi_d_ext, 3)
1077 atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1078 atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1079 atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1080 atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1082 IF (jatom < latom)
THEN
1083 offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1085 offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1087 IF (jatom < katom)
THEN
1088 offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1090 offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1092 IF (iatom < latom)
THEN
1093 offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1095 offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1097 IF (iatom < katom)
THEN
1098 offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1100 offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1103 IF (screen_pmat_forces)
THEN
1105 kind_kind_idx = int(get_1d_idx(kkind, ikind, int(nkind,
int_8)))
1106 IF (ikind >= kkind)
THEN
1107 ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1108 actual_x_data%map_atom_to_kind_atom(katom), &
1109 actual_x_data%map_atom_to_kind_atom(iatom))
1111 ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1112 actual_x_data%map_atom_to_kind_atom(iatom), &
1113 actual_x_data%map_atom_to_kind_atom(katom))
1114 swap_id = swap_id + 1
1116 kind_kind_idx = int(get_1d_idx(lkind, jkind, int(nkind,
int_8)))
1117 IF (jkind >= lkind)
THEN
1118 ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1119 actual_x_data%map_atom_to_kind_atom(latom), &
1120 actual_x_data%map_atom_to_kind_atom(jatom))
1122 ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1123 actual_x_data%map_atom_to_kind_atom(jatom), &
1124 actual_x_data%map_atom_to_kind_atom(latom))
1125 swap_id = swap_id + 2
1127 kind_kind_idx = int(get_1d_idx(lkind, ikind, int(nkind,
int_8)))
1128 IF (ikind >= lkind)
THEN
1129 ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1130 actual_x_data%map_atom_to_kind_atom(latom), &
1131 actual_x_data%map_atom_to_kind_atom(iatom))
1133 ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1134 actual_x_data%map_atom_to_kind_atom(iatom), &
1135 actual_x_data%map_atom_to_kind_atom(latom))
1136 swap_id = swap_id + 4
1138 kind_kind_idx = int(get_1d_idx(kkind, jkind, int(nkind,
int_8)))
1139 IF (jkind >= kkind)
THEN
1140 ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1141 actual_x_data%map_atom_to_kind_atom(katom), &
1142 actual_x_data%map_atom_to_kind_atom(jatom))
1144 ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1145 actual_x_data%map_atom_to_kind_atom(jatom), &
1146 actual_x_data%map_atom_to_kind_atom(katom))
1147 swap_id = swap_id + 8
1153 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
1154 IF (treat_forces_in_core)
THEN
1161 mem_compression_counter = 0
1164 tmp_i4 = my_x_data(irep, i)%memory_parameter%actual_memory_usage
1165 mem_compression_counter = mem_compression_counter + &
1166 tmp_i4*memory_parameter%cache_size
1168 IF (mem_compression_counter + memory_parameter%final_comp_counter_energy &
1169 > memory_parameter%max_compression_counter)
THEN
1170 buffer_overflow = .true.
1171 IF (do_dynamic_load_balancing)
THEN
1172 distribution_forces%ram_counter = counter
1174 memory_parameter%ram_counter_forces = counter
1177 counter = counter + 1
1178 buffer_overflow = .false.
1182 IF (treat_forces_in_core)
THEN
1183 IF (do_dynamic_load_balancing)
THEN
1184 IF (distribution_forces%ram_counter == counter)
THEN
1185 buffer_overflow = .true.
1187 counter = counter + 1
1188 buffer_overflow = .false.
1191 IF (memory_parameter%ram_counter_forces == counter)
THEN
1192 buffer_overflow = .true.
1194 counter = counter + 1
1195 buffer_overflow = .false.
1201 DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1202 iset = set_list_ij(i_set_list_ij)%pair(1)
1203 jset = set_list_ij(i_set_list_ij)%pair(2)
1205 ncob = npgfb(jset)*
ncoset(lb_max(jset))
1206 max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1207 screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1209 IF (max_val1 + (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1210 screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) cycle
1211 sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1212 sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1214 DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1215 kset = set_list_kl(i_set_list_kl)%pair(1)
1216 lset = set_list_kl(i_set_list_kl)%pair(2)
1218 max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1219 screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1220 max_val2 = max_val1 + max_val2_set
1223 IF (max_val2 + pmax_atom < log10_eps_schwarz) cycle
1224 sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1225 sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1227 IF (screen_pmat_forces)
THEN
1228 CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1229 iset, jset, kset, lset, &
1232 log10_pmax = log_2 + pmax_tmp
1237 max_val2 = max_val2 + log10_pmax
1238 IF (max_val2 < log10_eps_schwarz) cycle
1240 pmax_entry = exp(log10_pmax*ln_10)
1242 current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1243 IF (buffer_overflow)
THEN
1244 neris_onthefly = neris_onthefly + current_counter
1248 IF (.NOT. buffer_overflow .AND. .NOT. actual_x_data%memory_parameter%recalc_forces)
THEN
1249 nints = current_counter
1251 maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1253 spherical_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
1255 IF (.NOT. use_virial)
THEN
1256 IF (spherical_estimate*pmax_entry < eps_schwarz) cycle
1258 nbits = exponent(anint(spherical_estimate*pmax_entry/eps_storage)) + 1
1261 neris_incore = neris_incore + int(nints,
int_8)
1262 DO WHILE (buffer_left > 0)
1263 buffer_size = min(buffer_left, cache_size)
1265 buffer_size, nbits, &
1266 integral_caches(nbits), &
1267 integral_containers(nbits), &
1268 eps_storage, pmax_entry, &
1269 memory_parameter%actual_memory_usage, &
1271 buffer_left = buffer_left - buffer_size
1272 buffer_start = buffer_start + buffer_size
1276 IF (actual_x_data%memory_parameter%recalc_forces .OR. buffer_overflow)
THEN
1277 max_contraction_val = max_contraction(iset, iatom)* &
1278 max_contraction(jset, jatom)* &
1279 max_contraction(kset, katom)* &
1280 max_contraction(lset, latom)* &
1282 tmp_r_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1283 tmp_r_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1284 tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1285 tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1287 CALL forces4(private_deriv, ra, rb, rc, rd, npgfa(iset), npgfb(jset), &
1288 npgfc(kset), npgfd(lset), &
1289 la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1290 lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1291 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1292 sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1293 sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1294 sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1295 sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1296 zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1297 zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1299 potential_parameter, &
1300 actual_x_data%neighbor_cells, &
1301 screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1302 screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1303 max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1304 log10_pmax, log10_eps_schwarz, &
1305 tmp_r_1, tmp_r_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1306 pgf_list_ij, pgf_list_kl, pgf_product_list, &
1307 nsgfl_a(:, iset), nsgfl_b(:, jset), &
1308 nsgfl_c(:, kset), nsgfl_d(:, lset), &
1309 sphi_a_ext_set, sphi_b_ext_set, sphi_c_ext_set, sphi_d_ext_set, &
1310 ede_work, ede_work2, ede_work_forces, &
1311 ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1312 nimages, do_periodic, use_virial, ede_work_virial, ede_work2_virial, &
1313 ede_primitives_tmp_virial, primitive_forces_virial, &
1314 cartesian_estimate_virial)
1316 nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1317 neris_total = neris_total + nints
1318 nprim_ints = nprim_ints + neris_tmp
1337 spherical_estimate = 0.0_dp
1339 spherical_estimate = max(spherical_estimate, abs(primitive_forces(i)))
1342 IF (use_virial)
THEN
1343 spherical_estimate_virial = 0.0_dp
1345 spherical_estimate_virial = max(spherical_estimate_virial, abs(primitive_forces_virial(i)))
1349 IF (spherical_estimate == 0.0_dp) spherical_estimate = tiny(spherical_estimate)
1350 estimate_to_store_int = exponent(spherical_estimate)
1351 estimate_to_store_int = max(estimate_to_store_int, -15_int_8)
1352 IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces)
THEN
1354 maxval_cache, maxval_container, &
1355 memory_parameter%actual_memory_usage, &
1356 use_disk_storage, max_val_memory)
1358 spherical_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
1359 IF (.NOT. use_virial)
THEN
1360 IF (spherical_estimate*pmax_entry < eps_schwarz) cycle
1362 IF (.NOT. buffer_overflow)
THEN
1363 nbits = exponent(anint(spherical_estimate*pmax_entry/eps_storage)) + 1
1367 neris_incore = neris_incore + int(nints,
int_8)
1369 DO WHILE (buffer_left > 0)
1370 buffer_size = min(buffer_left, cache_size)
1372 buffer_size, nbits, &
1373 integral_caches(nbits), &
1374 integral_containers(nbits), &
1375 eps_storage, pmax_entry, &
1376 memory_parameter%actual_memory_usage, &
1378 buffer_left = buffer_left - buffer_size
1379 buffer_start = buffer_start + buffer_size
1384 IF (treat_forces_in_core)
THEN
1386 primitive_forces(i) = primitive_forces(i)*pmax_entry
1387 IF (abs(primitive_forces(i)) > eps_storage)
THEN
1388 primitive_forces(i) = anint(primitive_forces(i)/eps_storage,
dp)*eps_storage/pmax_entry
1390 primitive_forces(i) = 0.0_dp
1396 IF (with_resp_density)
THEN
1397 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1398 full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1399 iatom, jatom, katom, latom, &
1400 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1401 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1402 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1403 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1404 full_density_resp, pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1405 iatom, jatom, katom, latom, &
1406 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1407 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1408 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1410 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1411 full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1412 iatom, jatom, katom, latom, &
1413 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1414 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1415 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1417 IF (nspins == 2)
THEN
1418 IF (with_resp_density)
THEN
1419 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1420 full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, &
1421 pad_buf_beta, pac_buf_beta, iatom, jatom, katom, latom, &
1422 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1423 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1424 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1425 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1426 full_density_resp_beta, pbd_buf_resp_beta, pbc_buf_resp_beta, &
1427 pad_buf_resp_beta, pac_buf_resp_beta, iatom, jatom, katom, latom, &
1428 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1429 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1430 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1432 CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1433 full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, pad_buf_beta, &
1434 pac_buf_beta, iatom, jatom, katom, latom, &
1435 iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1436 offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1437 atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1440 IF (spherical_estimate*pmax_entry >= eps_schwarz)
THEN
1442 t2 => primitive_forces((coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1443 coord*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1445 IF (with_resp_density)
THEN
1446 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1447 pbd_buf, pbc_buf, pad_buf, pac_buf,
fac, &
1448 t2, force, forces_map, coord, &
1449 pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1452 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1453 pbd_buf, pbc_buf, pad_buf, pac_buf,
fac, &
1454 t2, force, forces_map, coord)
1456 IF (nspins == 2)
THEN
1457 IF (with_resp_density)
THEN
1458 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1459 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1460 fac, t2, force, forces_map, coord, &
1461 pbd_buf_resp_beta, pbc_buf_resp_beta, &
1462 pad_buf_resp_beta, pac_buf_resp_beta, &
1465 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1466 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta,
fac, &
1467 t2, force, forces_map, coord)
1472 IF (use_virial .AND. spherical_estimate_virial*pmax_entry >= eps_schwarz)
THEN
1475 t2 => primitive_forces_virial( &
1476 ((i - 1)*12 + coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1477 ((i - 1)*12 + coord)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1478 IF (with_resp_density)
THEN
1479 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1480 pbd_buf, pbc_buf, pad_buf, pac_buf,
fac, &
1481 t2, tmp_virial, coord, i, &
1482 pbd_buf_resp, pbc_buf_resp, pad_buf_resp, &
1483 pac_buf_resp, my_resp_only)
1485 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1486 pbd_buf, pbc_buf, pad_buf, pac_buf,
fac, &
1487 t2, tmp_virial, coord, i)
1489 IF (nspins == 2)
THEN
1490 IF (with_resp_density)
THEN
1491 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1492 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1493 fac, t2, tmp_virial, coord, i, &
1494 pbd_buf_resp_beta, pbc_buf_resp_beta, &
1495 pad_buf_resp_beta, pac_buf_resp_beta, &
1498 CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1499 pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1500 fac, t2, tmp_virial, coord, i)
1511 END DO atomic_blocks
1513 distribution_forces%time_forces = bintime_stop - bintime_start
1515 CALL timestop(handle_bin)
1521 do_print_load_balance_info = .false.
1523 "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"),
cp_p_file)
1526 IF (do_print_load_balance_info)
THEN
1530 extension=
".scfLog")
1538 "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1542 IF (use_virial)
THEN
1547 virial%pv_fock_4c(i, j) = virial%pv_fock_4c(i, j) + tmp_virial(i, k)*cell%hmat(j, k)
1556 shm_neris_total = shm_neris_total + neris_total
1558 shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1560 shm_nprim_ints = shm_nprim_ints + nprim_ints
1562 storage_counter_integrals = memory_parameter%actual_memory_usage* &
1563 memory_parameter%cache_size
1564 stor_count_max_val = max_val_memory*memory_parameter%cache_size
1566 shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1568 shm_neris_incore = shm_neris_incore + neris_incore
1570 shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1581 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
1582 tmp_i8(1:6) = (/shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, &
1583 shm_neris_total, shm_nprim_ints, shm_stor_count_max_val/)
1584 CALL para_env%sum(tmp_i8)
1585 shm_storage_counter_integrals = tmp_i8(1)
1586 shm_neris_onthefly = tmp_i8(2)
1587 shm_neris_incore = tmp_i8(3)
1588 shm_neris_total = tmp_i8(4)
1589 shm_nprim_ints = tmp_i8(5)
1590 shm_stor_count_max_val = tmp_i8(6)
1591 mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1592 compression_factor = real(shm_neris_incore,
dp)/real(shm_storage_counter_integrals,
dp)
1593 mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
1595 IF (shm_neris_incore == 0)
THEN
1597 compression_factor = 0.0_dp
1603 extension=
".scfLog")
1605 WRITE (unit=iw, fmt=
"(/,(T3,A,T65,I16))") &
1606 "HFX_MEM_INFO| Number of cart. primitive DERIV's calculated: ", shm_nprim_ints
1608 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
1609 "HFX_MEM_INFO| Number of sph. DERIV's calculated: ", shm_neris_total
1611 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
1612 "HFX_MEM_INFO| Number of sph. DERIV's stored in-core: ", shm_neris_incore
1614 WRITE (unit=iw, fmt=
"((T3,A,T62,I19))") &
1615 "HFX_MEM_INFO| Number of sph. DERIV's calculated on the fly: ", shm_neris_onthefly
1617 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
1618 "HFX_MEM_INFO| Total memory consumption DERIV's RAM [MiB]: ", mem_eris
1620 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
1621 "HFX_MEM_INFO| Whereof max-vals [MiB]: ", mem_max_val
1623 WRITE (unit=iw, fmt=
"((T3,A,T60,F21.2),/)") &
1624 "HFX_MEM_INFO| Total compression factor DERIV's RAM: ", compression_factor
1633 IF (do_dynamic_load_balancing)
THEN
1634 my_bin_size =
SIZE(actual_x_data%distribution_forces)
1639 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
1640 IF (treat_forces_in_core)
THEN
1641 DO bin = 1, my_bin_size
1642 maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1643 maxval_container => actual_x_data%store_forces%maxval_container(bin)
1644 integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1645 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1646 CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1650 memory_parameter%actual_memory_usage, .false.)
1656 IF (treat_forces_in_core)
THEN
1657 DO bin = 1, my_bin_size
1658 maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1659 maxval_container => actual_x_data%store_forces%maxval_container(bin)
1660 integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1661 integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1666 memory_parameter%actual_memory_usage, &
1672 IF (actual_x_data%memory_parameter%recalc_forces)
THEN
1673 actual_x_data%memory_parameter%recalc_forces = .false.
1678 CALL timestop(handle_main)
1681 DEALLOCATE (last_sgf_global)
1683 DEALLOCATE (full_density_alpha)
1684 IF (with_resp_density)
THEN
1685 DEALLOCATE (full_density_resp)
1687 IF (nspins == 2)
THEN
1688 DEALLOCATE (full_density_beta)
1689 IF (with_resp_density)
THEN
1690 DEALLOCATE (full_density_resp_beta)
1693 IF (do_dynamic_load_balancing)
THEN
1694 DEALLOCATE (actual_x_data%task_list)
1697 DEALLOCATE (primitive_forces)
1698 DEALLOCATE (atom_of_kind, kind_of)
1699 DEALLOCATE (max_contraction)
1700 DEALLOCATE (pbd_buf)
1701 DEALLOCATE (pbc_buf)
1702 DEALLOCATE (pad_buf)
1703 DEALLOCATE (pac_buf)
1705 IF (with_resp_density)
THEN
1706 DEALLOCATE (pbd_buf_resp)
1707 DEALLOCATE (pbc_buf_resp)
1708 DEALLOCATE (pad_buf_resp)
1709 DEALLOCATE (pac_buf_resp)
1712 DO i = 1, max_pgf**2
1713 DEALLOCATE (pgf_list_ij(i)%image_list)
1714 DEALLOCATE (pgf_list_kl(i)%image_list)
1717 DEALLOCATE (pgf_list_ij)
1718 DEALLOCATE (pgf_list_kl)
1719 DEALLOCATE (pgf_product_list)
1721 DEALLOCATE (set_list_ij, set_list_kl)
1723 DEALLOCATE (primitive_forces_virial)
1725 DEALLOCATE (ede_work, ede_work2, ede_work_forces, ede_buffer1, ede_buffer2, ede_primitives_tmp)
1727 DEALLOCATE (ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial)
1729 DEALLOCATE (nimages)
1731 IF (nspins == 2)
THEN
1732 DEALLOCATE (pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta)
1733 IF (with_resp_density)
THEN
1734 DEALLOCATE (pbd_buf_resp_beta)
1735 DEALLOCATE (pbc_buf_resp_beta)
1736 DEALLOCATE (pad_buf_resp_beta)
1737 DEALLOCATE (pac_buf_resp_beta)
1749 CALL timestop(handle)