135 geometry_did_change, irep, distribute_fock_matrix, &
139 TYPE(
hfx_type),
DIMENSION(:, :),
POINTER :: x_data
140 TYPE(
dbcsr_p_type),
DIMENSION(:, :),
POINTER :: ks_matrix
141 REAL(kind=
dp),
INTENT(OUT) :: ehfx
145 LOGICAL :: geometry_did_change
147 LOGICAL,
INTENT(IN) :: distribute_fock_matrix
148 INTEGER,
INTENT(IN) :: ispin
150 CHARACTER(LEN=*),
PARAMETER :: routinen =
'integrate_four_center'
152 CHARACTER(len=default_string_length) :: eps_scaling_str, eps_schwarz_min_str
153 INTEGER :: act_atomic_block_offset, act_set_offset, atomic_offset_ac, atomic_offset_ad, &
154 atomic_offset_bc, atomic_offset_bd, bin, bits_max_val, buffer_left, buffer_size, &
155 buffer_start, cache_size, current_counter, handle, handle_bin, handle_dist_ks, &
156 handle_getp, handle_load, handle_main, i, i_list_ij, i_list_kl, i_set_list_ij, &
157 i_set_list_ij_start, i_set_list_ij_stop, i_set_list_kl, i_set_list_kl_start, &
158 i_set_list_kl_stop, i_thread, iatom, iatom_block, iatom_end, iatom_start, ikind, img, &
159 iset, iw, j, jatom, jatom_block, jatom_end, jatom_start, jkind, jset, katom, katom_block, &
161 INTEGER :: katom_start, kind_kind_idx, kkind, kset, l_max, latom, latom_block, latom_end, &
162 latom_start, lkind, lset, ma, max_am, max_pgf, max_set, mb, my_bin_id, my_bin_size, &
163 my_thread_id, n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, &
164 nneighbors, nseta, nsetb, nsgf_max, nspins, pa, sgfb, shm_task_counter, shm_total_bins, &
165 sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, &
166 sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
167 INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
168 mb_size_buffers, mb_size_f, mb_size_p, mem_compression_counter, &
169 mem_compression_counter_disk, mem_eris, mem_eris_disk, mem_max_val, memsize_after, &
170 memsize_before, my_current_counter, my_istart, n_processes, nblocks, ncpu, neris_disk, &
171 neris_incore, neris_onthefly, neris_tmp, neris_total, nprim_ints, &
172 shm_mem_compression_counter, shm_neris_disk, shm_neris_incore, shm_neris_onthefly, &
173 shm_neris_total, shm_nprim_ints, shm_stor_count_int_disk, shm_stor_count_max_val, &
174 shm_storage_counter_integrals, stor_count_int_disk
175 INTEGER(int_8) :: stor_count_max_val, storage_counter_integrals, subtr_size_mb, tmp_block, &
177 INTEGER(int_8),
ALLOCATABLE,
DIMENSION(:) :: tmp_task_list_cost
178 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: kind_of, last_sgf_global, nimages, &
180 INTEGER,
DIMENSION(:),
POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
181 ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
182 INTEGER,
DIMENSION(:, :),
POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
183 offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
184 INTEGER,
DIMENSION(:, :),
POINTER,
SAVE :: shm_is_assoc_atomic_block
185 INTEGER,
DIMENSION(:, :, :),
POINTER :: cell_to_index
186 INTEGER,
DIMENSION(:, :, :, :),
POINTER :: shm_set_offset
187 INTEGER,
SAVE :: shm_number_of_p_entries
188 LOGICAL :: bins_left, buffer_overflow, do_disk_storage, do_dynamic_load_balancing, do_it, &
189 do_kpoints, do_p_screening, do_periodic, do_print_load_balance_info, is_anti_symmetric, &
190 ks_fully_occ, my_geo_change, treat_lsd_in_core, use_disk_storage
191 LOGICAL,
DIMENSION(:, :),
POINTER :: shm_atomic_pair_list
192 REAL(
dp) :: afac, bintime_start, bintime_stop, cartesian_estimate, compression_factor, &
193 compression_factor_disk, ene_x_aa, ene_x_aa_diag, ene_x_bb, ene_x_bb_diag, eps_schwarz, &
194 eps_storage, etmp,
fac, hf_fraction, ln_10, log10_eps_schwarz, log10_pmax, &
195 max_contraction_val, max_val1, max_val2, max_val2_set, pmax_atom, pmax_blocks, &
196 pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, screen_kind_kl, &
197 spherical_estimate, symm_fac
198 REAL(
dp),
ALLOCATABLE,
DIMENSION(:) :: ee_buffer1, ee_buffer2, ee_primitives_tmp, ee_work, &
199 ee_work2, kac_buf, kad_buf, kbc_buf, kbd_buf, pac_buf, pad_buf, pbc_buf, pbd_buf, &
201 REAL(
dp),
DIMENSION(:),
POINTER :: p_work
202 REAL(
dp),
DIMENSION(:, :),
POINTER :: full_density_alpha, full_density_beta, full_ks_alpha, &
203 full_ks_beta, max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, &
204 shm_pmax_block, sphi_b, zeta, zetb, zetc, zetd
205 REAL(
dp),
DIMENSION(:, :, :),
POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
206 sphi_c_ext_set, sphi_d_ext_set
207 REAL(
dp),
DIMENSION(:, :, :, :),
POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
209 REAL(kind=
dp) :: coeffs_kind_max0
214 TYPE(
dbcsr_p_type),
DIMENSION(:),
POINTER :: matrix_ks_aux_fit_hfx
218 TYPE(
hfx_cache_type),
DIMENSION(:),
POINTER :: integral_caches, integral_caches_disk
221 integral_containers_disk
227 TYPE(
hfx_p_kind),
DIMENSION(:),
POINTER :: shm_initial_p
228 TYPE(
hfx_pgf_list),
ALLOCATABLE,
DIMENSION(:) :: pgf_list_ij, pgf_list_kl
230 DIMENSION(:) :: pgf_product_list
233 POINTER :: screen_coeffs_kind, tmp_r_1, tmp_r_2, &
234 tmp_screen_pgf1, tmp_screen_pgf2
236 DIMENSION(:, :, :, :),
POINTER :: screen_coeffs_set
238 DIMENSION(:, :, :, :, :, :),
POINTER :: radii_pgf, screen_coeffs_pgf
241 TYPE(
hfx_type),
POINTER :: actual_x_data, shm_master_x_data
245 DIMENSION(:) :: set_list_ij, set_list_kl
249 NULLIFY (dft_control, matrix_ks_aux_fit_hfx)
251 CALL timeset(routinen, handle)
259 my_geo_change = geometry_did_change
260 IF (ispin == 2) my_geo_change = .false.
266 IF (my_geo_change)
THEN
268 CALL para_env%max(memsize_before)
272 WRITE (unit=iw, fmt=
"(/,(T3,A,T60,I21))") &
273 "HFX_MEM_INFO| Est. max. program size before HFX [MiB]:", memsize_before/(1024*1024)
280 CALL get_qs_env(qs_env=qs_env, atomic_kind_set=atomic_kind_set, cell=cell)
282 NULLIFY (cell_to_index)
283 CALL get_qs_env(qs_env=qs_env, do_kpoints=do_kpoints)
286 CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
291 nkind =
SIZE(atomic_kind_set, 1)
294 l_max = max(l_max, maxval(x_data(1, 1)%basis_parameter(ikind)%lmax))
302 IF (para_env%is_source())
THEN
303 CALL open_file(unit_number=unit_id, file_name=x_data(1, 1)%potential_parameter%filename)
305 CALL init(l_max, unit_id, para_env%mepos, para_env)
306 IF (para_env%is_source())
THEN
318 shm_neris_onthefly = 0
319 shm_storage_counter_integrals = 0
320 shm_stor_count_int_disk = 0
323 shm_stor_count_max_val = 0
413 actual_x_data => x_data(irep, i_thread + 1)
415 shm_master_x_data => x_data(irep, 1)
419 do_periodic = actual_x_data%periodic_parameter%do_periodic
421 IF (do_periodic)
THEN
423 actual_x_data%periodic_parameter%number_of_shells = actual_x_data%periodic_parameter%mode
428 screening_parameter = actual_x_data%screening_parameter
429 potential_parameter = actual_x_data%potential_parameter
431 general_parameter = actual_x_data%general_parameter
432 load_balance_parameter => actual_x_data%load_balance_parameter
433 memory_parameter => actual_x_data%memory_parameter
435 cache_size = memory_parameter%cache_size
436 bits_max_val = memory_parameter%bits_max_val
438 basis_parameter => actual_x_data%basis_parameter
439 basis_info => actual_x_data%basis_info
441 treat_lsd_in_core = general_parameter%treat_lsd_in_core
443 ncpu = para_env%num_pe
444 n_processes = ncpu*n_threads
447 neris_total = 0_int_8
448 neris_incore = 0_int_8
450 neris_onthefly = 0_int_8
452 mem_eris_disk = 0_int_8
453 mem_max_val = 0_int_8
454 compression_factor = 0.0_dp
455 compression_factor_disk = 0.0_dp
458 max_val_memory = 1_int_8
460 max_am = basis_info%max_am
463 atomic_kind_set=atomic_kind_set, &
464 particle_set=particle_set, &
465 dft_control=dft_control)
466 IF (dft_control%do_admm)
CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_hfx=matrix_ks_aux_fit_hfx)
468 do_p_screening = screening_parameter%do_initial_p_screening
470 IF (do_p_screening)
THEN
471 IF (
ASSOCIATED(qs_env%mp2_env))
THEN
472 IF ((qs_env%mp2_env%ri_grad%free_hfx_buffer))
THEN
473 do_p_screening = ((qs_env%mp2_env%p_screen) .AND. (qs_env%mp2_env%not_last_hfx))
475 do_p_screening = .false.
479 max_set = basis_info%max_set
480 natom =
SIZE(particle_set, 1)
483 nkimages = dft_control%nimages
484 cpassert(nkimages == 1)
492 ikind = kind_of(iatom)
493 nseta = basis_parameter(ikind)%nset
494 npgfa => basis_parameter(ikind)%npgf
495 la_max => basis_parameter(ikind)%lmax
496 nsgfa => basis_parameter(ikind)%nsgf
498 ncos_max = max(ncos_max,
ncoset(la_max(iset)))
499 nsgf_max = max(nsgf_max, nsgfa(iset))
503 ALLOCATE (primitive_integrals(nsgf_max**4))
504 primitive_integrals = 0.0_dp
506 ALLOCATE (pbd_buf(nsgf_max**2))
507 ALLOCATE (pbc_buf(nsgf_max**2))
508 ALLOCATE (pad_buf(nsgf_max**2))
509 ALLOCATE (pac_buf(nsgf_max**2))
510 ALLOCATE (kbd_buf(nsgf_max**2))
511 ALLOCATE (kbc_buf(nsgf_max**2))
512 ALLOCATE (kad_buf(nsgf_max**2))
513 ALLOCATE (kac_buf(nsgf_max**2))
514 ALLOCATE (ee_work(ncos_max**4))
515 ALLOCATE (ee_work2(ncos_max**4))
516 ALLOCATE (ee_buffer1(ncos_max**4))
517 ALLOCATE (ee_buffer2(ncos_max**4))
518 ALLOCATE (ee_primitives_tmp(nsgf_max**4))
520 nspins = dft_control%nspins
522 ALLOCATE (max_contraction(max_set, natom))
524 max_contraction = 0.0_dp
527 jkind = kind_of(jatom)
528 lb_max => basis_parameter(jkind)%lmax
529 nsetb = basis_parameter(jkind)%nset
530 npgfb => basis_parameter(jkind)%npgf
531 first_sgfb => basis_parameter(jkind)%first_sgf
532 sphi_b => basis_parameter(jkind)%sphi
533 nsgfb => basis_parameter(jkind)%nsgf
536 ncob = npgfb(jset)*
ncoset(lb_max(jset))
537 sgfb = first_sgfb(1, jset)
540 max_contraction(jset, jatom) = maxval((/(sum(abs(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
541 max_pgf = max(max_pgf, npgfb(jset))
546 nneighbors =
SIZE(actual_x_data%neighbor_cells)
547 ALLOCATE (pgf_list_ij(max_pgf**2))
548 ALLOCATE (pgf_list_kl(max_pgf**2))
552 ALLOCATE (pgf_product_list(tmp_i4))
553 ALLOCATE (nimages(max_pgf**2))
556 ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
557 ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
562 IF (my_geo_change)
THEN
564 shm_master_x_data%is_assoc_atomic_block, &
565 shm_master_x_data%number_of_p_entries, &
567 shm_master_x_data%atomic_block_offset, &
568 shm_master_x_data%set_offset, &
569 shm_master_x_data%block_offset, &
570 shm_master_x_data%map_atoms_to_cpus, &
573 shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
576 ks_fully_occ = .true.
577 outer:
DO iatom = 1, natom
578 DO jatom = iatom, natom
579 IF (shm_is_assoc_atomic_block(jatom, iatom) == 0)
THEN
580 ks_fully_occ = .false.
586 IF (.NOT. ks_fully_occ)
THEN
587 CALL cp_warn(__location__, &
588 "The Kohn Sham matrix is not 100% occupied. This "// &
589 "may result in incorrect Hartree-Fock results. Setting "// &
590 "MIN_PAIR_LIST_RADIUS to -1 in the QS section ensures a "// &
591 "fully occupied KS matrix. For more information "// &
592 "see FAQ: https://www.cp2k.org/faq:hfx_eps_warning")
597 shm_number_of_p_entries = shm_master_x_data%number_of_p_entries
598 shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
599 shm_atomic_block_offset => shm_master_x_data%atomic_block_offset
600 shm_set_offset => shm_master_x_data%set_offset
601 shm_block_offset => shm_master_x_data%block_offset
607 subtr_size_mb = 2_int_8*shm_block_offset(ncpu + 1)
609 IF (.NOT. treat_lsd_in_core)
THEN
610 IF (nspins == 2) subtr_size_mb = subtr_size_mb*2_int_8
613 IF (do_p_screening .OR. screening_parameter%do_p_screening_forces)
THEN
614 subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
617 IF (screening_parameter%do_p_screening_forces)
THEN
618 IF (memory_parameter%treat_forces_in_core)
THEN
619 subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
623 subtr_size_mb = subtr_size_mb + nsgf_max**4*n_threads
625 subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
628 subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
630 subtr_size_mb = subtr_size_mb + max_set**2*nkind**2
632 subtr_size_mb = subtr_size_mb + nkind**2
634 subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
637 subtr_size_mb = subtr_size_mb + natom**2
640 IF (do_p_screening)
THEN
641 subtr_size_mb = subtr_size_mb + natom**2
643 IF (screening_parameter%do_p_screening_forces)
THEN
644 IF (memory_parameter%treat_forces_in_core)
THEN
645 subtr_size_mb = subtr_size_mb + natom**2
650 subtr_size_mb = subtr_size_mb*8_int_8/1024_int_8/1024_int_8
659 use_disk_storage = .false.
661 do_disk_storage = memory_parameter%do_disk_storage
662 IF (do_disk_storage)
THEN
663 maxval_container_disk => actual_x_data%store_ints%maxval_container_disk
664 maxval_cache_disk => actual_x_data%store_ints%maxval_cache_disk
666 integral_containers_disk => actual_x_data%store_ints%integral_containers_disk
667 integral_caches_disk => actual_x_data%store_ints%integral_caches_disk
670 IF (my_geo_change)
THEN
671 memory_parameter%ram_counter = huge(memory_parameter%ram_counter)
674 IF (my_geo_change)
THEN
675 memory_parameter%recalc_forces = .true.
677 IF (.NOT. memory_parameter%treat_forces_in_core) memory_parameter%recalc_forces = .true.
681 eps_schwarz = screening_parameter%eps_schwarz
682 IF (eps_schwarz <= 0.0_dp)
THEN
685 log10_eps_schwarz = log10(eps_schwarz)
688 eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
691 hf_fraction = general_parameter%fraction
696 potential_parameter = actual_x_data%potential_parameter
700 IF (.NOT. memory_parameter%do_all_on_the_fly)
THEN
701 buffer_overflow = .false.
703 buffer_overflow = .true.
707 private_lib = actual_x_data%lib
710 ALLOCATE (last_sgf_global(0:natom))
711 last_sgf_global(0) = 0
713 ikind = kind_of(iatom)
714 last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
720 NULLIFY (full_density_alpha, full_density_beta)
721 ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
722 IF (.NOT. treat_lsd_in_core .OR. nspins == 1)
THEN
723 CALL timeset(routinen//
"_getP", handle_getp)
725 CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
726 shm_master_x_data%block_offset, &
727 kind_of, basis_parameter, get_max_vals_spin=.false., antisymmetric=is_anti_symmetric)
730 IF (nspins == 2)
THEN
731 ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), nkimages))
733 CALL get_full_density(para_env, full_density_beta(:, img), rho_ao(2, img)%matrix, shm_number_of_p_entries, &
734 shm_master_x_data%block_offset, &
735 kind_of, basis_parameter, get_max_vals_spin=.false., antisymmetric=is_anti_symmetric)
738 CALL timestop(handle_getp)
743 NULLIFY (shm_initial_p)
744 IF (do_p_screening)
THEN
745 shm_initial_p => shm_master_x_data%initial_p
746 shm_pmax_atom => shm_master_x_data%pmax_atom
747 IF (my_geo_change)
THEN
749 shm_master_x_data%map_atom_to_kind_atom, &
750 shm_master_x_data%set_offset, &
751 shm_master_x_data%atomic_block_offset, &
753 full_density_alpha, full_density_beta, &
754 natom, kind_of, basis_parameter, &
755 nkind, shm_master_x_data%is_assoc_atomic_block)
759 IF (do_p_screening)
THEN
760 CALL timeset(routinen//
"_getP", handle_getp)
762 CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(1, img)%matrix, shm_number_of_p_entries, &
763 shm_master_x_data%block_offset, &
764 kind_of, basis_parameter, get_max_vals_spin=.true., &
765 rho_beta=rho_ao(2, img)%matrix, antisymmetric=is_anti_symmetric)
767 CALL timestop(handle_getp)
772 NULLIFY (shm_initial_p)
773 shm_initial_p => actual_x_data%initial_p
774 shm_pmax_atom => shm_master_x_data%pmax_atom
775 IF (my_geo_change)
THEN
777 shm_master_x_data%map_atom_to_kind_atom, &
778 shm_master_x_data%set_offset, &
779 shm_master_x_data%atomic_block_offset, &
781 full_density_alpha, full_density_beta, &
782 natom, kind_of, basis_parameter, &
783 nkind, shm_master_x_data%is_assoc_atomic_block)
788 CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
789 shm_master_x_data%block_offset, &
790 kind_of, basis_parameter, get_max_vals_spin=.false., &
791 antisymmetric=is_anti_symmetric)
795 NULLIFY (full_ks_alpha, full_ks_beta)
796 ALLOCATE (shm_master_x_data%full_ks_alpha(shm_block_offset(ncpu + 1), nkimages))
797 full_ks_alpha => shm_master_x_data%full_ks_alpha
798 full_ks_alpha = 0.0_dp
800 IF (.NOT. treat_lsd_in_core)
THEN
801 IF (nspins == 2)
THEN
802 ALLOCATE (shm_master_x_data%full_ks_beta(shm_block_offset(ncpu + 1), nkimages))
803 full_ks_beta => shm_master_x_data%full_ks_beta
804 full_ks_beta = 0.0_dp
816 IF (.NOT. shm_master_x_data%screen_funct_is_initialized)
THEN
818 shm_master_x_data%pair_dist_radii_pgf, max_set, max_pgf, eps_schwarz, &
822 shm_master_x_data%screen_funct_coeffs_set, &
823 shm_master_x_data%screen_funct_coeffs_kind, &
824 shm_master_x_data%screen_funct_coeffs_pgf, &
825 shm_master_x_data%pair_dist_radii_pgf, &
826 max_set, max_pgf, n_threads, i_thread, p_work)
829 shm_master_x_data%screen_funct_is_initialized = .true.
835 screen_coeffs_set => shm_master_x_data%screen_funct_coeffs_set
836 screen_coeffs_kind => shm_master_x_data%screen_funct_coeffs_kind
837 screen_coeffs_pgf => shm_master_x_data%screen_funct_coeffs_pgf
838 radii_pgf => shm_master_x_data%pair_dist_radii_pgf
843 IF (nspins == 1)
THEN
844 fac = 0.5_dp*hf_fraction
846 fac = 1.0_dp*hf_fraction
853 CALL timeset(routinen//
"_load", handle_load)
856 IF (my_geo_change)
THEN
857 IF (actual_x_data%b_first_load_balance_energy)
THEN
858 CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
859 screen_coeffs_set, screen_coeffs_kind, &
860 shm_is_assoc_atomic_block, do_periodic, load_balance_parameter, &
861 kind_of, basis_parameter, shm_initial_p, shm_pmax_atom, i_thread, n_threads, &
862 cell, do_p_screening, actual_x_data%map_atom_to_kind_atom, &
864 actual_x_data%b_first_load_balance_energy = .false.
867 load_balance_parameter, &
873 CALL timestop(handle_load)
915 do_dynamic_load_balancing = .true.
917 IF (n_threads == 1 .OR. do_disk_storage) do_dynamic_load_balancing = .false.
919 IF (do_dynamic_load_balancing)
THEN
920 my_bin_size =
SIZE(actual_x_data%distribution_energy)
925 IF (.NOT. memory_parameter%do_all_on_the_fly)
THEN
927 IF (my_geo_change)
THEN
928 CALL dealloc_containers(actual_x_data%store_ints, memory_parameter%actual_memory_usage)
931 DO bin = 1, my_bin_size
932 maxval_container => actual_x_data%store_ints%maxval_container(bin)
933 integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
934 CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .false.)
936 CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .false.)
942 IF (.NOT. my_geo_change)
THEN
943 DO bin = 1, my_bin_size
944 maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
945 maxval_container => actual_x_data%store_ints%maxval_container(bin)
946 integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
947 integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
949 memory_parameter%actual_memory_usage, .false.)
952 memory_parameter%actual_memory_usage, .false.)
961 IF (do_disk_storage)
THEN
963 IF (my_geo_change)
THEN
964 CALL hfx_init_container(maxval_container_disk, memory_parameter%actual_memory_usage_disk, do_disk_storage)
966 CALL hfx_init_container(integral_containers_disk(i), memory_parameter%actual_memory_usage_disk, do_disk_storage)
970 IF (.NOT. my_geo_change)
THEN
972 memory_parameter%actual_memory_usage_disk, .true.)
975 memory_parameter%actual_memory_usage_disk, .true.)
984 IF (do_dynamic_load_balancing)
THEN
988 shm_total_bins = shm_total_bins +
SIZE(x_data(irep, i)%distribution_energy)
990 ALLOCATE (tmp_task_list(shm_total_bins))
993 DO bin = 1,
SIZE(x_data(irep, i)%distribution_energy)
994 shm_task_counter = shm_task_counter + 1
995 tmp_task_list(shm_task_counter)%thread_id = i
996 tmp_task_list(shm_task_counter)%bin_id = bin
997 tmp_task_list(shm_task_counter)%cost = x_data(irep, i)%distribution_energy(bin)%cost
1002 ALLOCATE (tmp_task_list_cost(shm_total_bins))
1003 ALLOCATE (tmp_index(shm_total_bins))
1005 DO i = 1, shm_total_bins
1006 tmp_task_list_cost(i) = tmp_task_list(i)%cost
1009 CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
1011 ALLOCATE (shm_master_x_data%task_list(shm_total_bins))
1013 DO i = 1, shm_total_bins
1014 shm_master_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
1017 shm_task_list => shm_master_x_data%task_list
1018 shm_task_counter = 0
1020 DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
1025 IF (my_bin_size > 0)
THEN
1026 maxval_container => actual_x_data%store_ints%maxval_container(1)
1027 maxval_cache => actual_x_data%store_ints%maxval_cache(1)
1029 integral_containers => actual_x_data%store_ints%integral_containers(:, 1)
1030 integral_caches => actual_x_data%store_ints%integral_caches(:, 1)
1035 CALL timeset(routinen//
"_main", handle_main)
1039 coeffs_kind_max0 = maxval(screen_coeffs_kind(:, :)%x(2))
1040 ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
1041 ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
1047 actual_x_data%pmax_block = 0.0_dp
1048 shm_pmax_block => actual_x_data%pmax_block
1049 IF (do_p_screening)
THEN
1050 DO iatom_block = 1,
SIZE(actual_x_data%blocks)
1051 iatom_start = actual_x_data%blocks(iatom_block)%istart
1052 iatom_end = actual_x_data%blocks(iatom_block)%iend
1053 DO jatom_block = 1,
SIZE(actual_x_data%blocks)
1054 jatom_start = actual_x_data%blocks(jatom_block)%istart
1055 jatom_end = actual_x_data%blocks(jatom_block)%iend
1056 shm_pmax_block(iatom_block, jatom_block) = maxval(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
1060 shm_atomic_pair_list => actual_x_data%atomic_pair_list
1061 IF (my_geo_change)
THEN
1063 do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
1064 actual_x_data%blocks)
1067 my_bin_size =
SIZE(actual_x_data%distribution_energy)
1069 IF (my_geo_change)
THEN
1070 DO bin = 1, my_bin_size
1071 actual_x_data%distribution_energy(bin)%time_first_scf = 0.0_dp
1072 actual_x_data%distribution_energy(bin)%time_other_scf = 0.0_dp
1073 actual_x_data%distribution_energy(bin)%time_forces = 0.0_dp
1079 my_bin_size =
SIZE(actual_x_data%distribution_energy)
1080 nblocks = load_balance_parameter%nblocks
1085 DO WHILE (bins_left)
1086 IF (.NOT. do_dynamic_load_balancing)
THEN
1088 IF (bin > my_bin_size)
THEN
1094 distribution_energy => actual_x_data%distribution_energy(bin)
1098 shm_task_counter = shm_task_counter + 1
1099 IF (shm_task_counter <= shm_total_bins)
THEN
1100 my_thread_id = shm_task_list(shm_task_counter)%thread_id
1101 my_bin_id = shm_task_list(shm_task_counter)%bin_id
1102 IF (.NOT. memory_parameter%do_all_on_the_fly)
THEN
1103 maxval_cache => x_data(irep, my_thread_id)%store_ints%maxval_cache(my_bin_id)
1104 maxval_container => x_data(irep, my_thread_id)%store_ints%maxval_container(my_bin_id)
1105 integral_caches => x_data(irep, my_thread_id)%store_ints%integral_caches(:, my_bin_id)
1106 integral_containers => x_data(irep, my_thread_id)%store_ints%integral_containers(:, my_bin_id)
1108 distribution_energy => x_data(irep, my_thread_id)%distribution_energy(my_bin_id)
1111 IF (my_geo_change)
THEN
1112 distribution_energy%ram_counter = huge(distribution_energy%ram_counter)
1122 IF (.NOT. do_it) cycle
1124 CALL timeset(routinen//
"_bin", handle_bin)
1127 my_istart = distribution_energy%istart
1128 my_current_counter = 0
1129 IF (distribution_energy%number_of_atom_quartets == 0 .OR. &
1130 my_istart == -1_int_8) my_istart = nblocks**4
1131 atomic_blocks:
DO atom_block = my_istart, nblocks**4 - 1, n_processes
1132 latom_block = int(
modulo(atom_block, nblocks)) + 1
1133 tmp_block = atom_block/nblocks
1134 katom_block = int(
modulo(tmp_block, nblocks)) + 1
1135 IF (latom_block < katom_block) cycle
1136 tmp_block = tmp_block/nblocks
1137 jatom_block = int(
modulo(tmp_block, nblocks)) + 1
1138 tmp_block = tmp_block/nblocks
1139 iatom_block = int(
modulo(tmp_block, nblocks)) + 1
1140 IF (jatom_block < iatom_block) cycle
1141 my_current_counter = my_current_counter + 1
1142 IF (my_current_counter > distribution_energy%number_of_atom_quartets)
EXIT atomic_blocks
1144 iatom_start = actual_x_data%blocks(iatom_block)%istart
1145 iatom_end = actual_x_data%blocks(iatom_block)%iend
1146 jatom_start = actual_x_data%blocks(jatom_block)%istart
1147 jatom_end = actual_x_data%blocks(jatom_block)%iend
1148 katom_start = actual_x_data%blocks(katom_block)%istart
1149 katom_end = actual_x_data%blocks(katom_block)%iend
1150 latom_start = actual_x_data%blocks(latom_block)%istart
1151 latom_end = actual_x_data%blocks(latom_block)%iend
1153 pmax_blocks = max(shm_pmax_block(katom_block, iatom_block), &
1154 shm_pmax_block(latom_block, jatom_block), &
1155 shm_pmax_block(latom_block, iatom_block), &
1156 shm_pmax_block(katom_block, jatom_block))
1158 IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) cycle
1160 CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
1161 jatom_start, jatom_end, &
1162 kind_of, basis_parameter, particle_set, &
1163 do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1164 coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1165 shm_atomic_pair_list)
1167 CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
1168 latom_start, latom_end, &
1169 kind_of, basis_parameter, particle_set, &
1170 do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1171 coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1172 shm_atomic_pair_list)
1174 DO i_list_ij = 1, list_ij%n_element
1176 iatom = list_ij%elements(i_list_ij)%pair(1)
1177 jatom = list_ij%elements(i_list_ij)%pair(2)
1178 i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
1179 i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
1180 ikind = list_ij%elements(i_list_ij)%kind_pair(1)
1181 jkind = list_ij%elements(i_list_ij)%kind_pair(2)
1182 ra = list_ij%elements(i_list_ij)%r1
1183 rb = list_ij%elements(i_list_ij)%r2
1184 rab2 = list_ij%elements(i_list_ij)%dist2
1186 la_max => basis_parameter(ikind)%lmax
1187 la_min => basis_parameter(ikind)%lmin
1188 npgfa => basis_parameter(ikind)%npgf
1189 nseta = basis_parameter(ikind)%nset
1190 zeta => basis_parameter(ikind)%zet
1191 nsgfa => basis_parameter(ikind)%nsgf
1192 sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
1193 nsgfl_a => basis_parameter(ikind)%nsgfl
1194 sphi_a_u1 = ubound(sphi_a_ext, 1)
1195 sphi_a_u2 = ubound(sphi_a_ext, 2)
1196 sphi_a_u3 = ubound(sphi_a_ext, 3)
1198 lb_max => basis_parameter(jkind)%lmax
1199 lb_min => basis_parameter(jkind)%lmin
1200 npgfb => basis_parameter(jkind)%npgf
1201 nsetb = basis_parameter(jkind)%nset
1202 zetb => basis_parameter(jkind)%zet
1203 nsgfb => basis_parameter(jkind)%nsgf
1204 sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
1205 nsgfl_b => basis_parameter(jkind)%nsgfl
1206 sphi_b_u1 = ubound(sphi_b_ext, 1)
1207 sphi_b_u2 = ubound(sphi_b_ext, 2)
1208 sphi_b_u3 = ubound(sphi_b_ext, 3)
1210 DO i_list_kl = 1, list_kl%n_element
1211 katom = list_kl%elements(i_list_kl)%pair(1)
1212 latom = list_kl%elements(i_list_kl)%pair(2)
1214 IF (.NOT. (katom + latom <= iatom + jatom)) cycle
1215 IF (((iatom + jatom) .EQ. (katom + latom)) .AND. (katom < iatom)) cycle
1216 i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1217 i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1218 kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1219 lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1220 rc = list_kl%elements(i_list_kl)%r1
1221 rd = list_kl%elements(i_list_kl)%r2
1222 rcd2 = list_kl%elements(i_list_kl)%dist2
1224 IF (do_p_screening)
THEN
1225 pmax_atom = max(shm_pmax_atom(katom, iatom), &
1226 shm_pmax_atom(latom, jatom), &
1227 shm_pmax_atom(latom, iatom), &
1228 shm_pmax_atom(katom, jatom))
1233 screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1234 screen_coeffs_kind(jkind, ikind)%x(2)
1235 screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1236 screen_coeffs_kind(lkind, kkind)%x(2)
1238 IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) cycle
1242 IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1243 shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1244 shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1245 shm_is_assoc_atomic_block(latom, jatom) >= 1)) cycle
1249 IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1250 IF (katom == latom) symm_fac = symm_fac*2.0_dp
1251 IF (iatom == katom .AND. jatom == latom .AND. iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1252 IF (iatom == katom .AND. iatom == jatom .AND. katom == latom) symm_fac = symm_fac*2.0_dp
1253 symm_fac = 1.0_dp/symm_fac
1255 lc_max => basis_parameter(kkind)%lmax
1256 lc_min => basis_parameter(kkind)%lmin
1257 npgfc => basis_parameter(kkind)%npgf
1258 zetc => basis_parameter(kkind)%zet
1259 nsgfc => basis_parameter(kkind)%nsgf
1260 sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1261 nsgfl_c => basis_parameter(kkind)%nsgfl
1262 sphi_c_u1 = ubound(sphi_c_ext, 1)
1263 sphi_c_u2 = ubound(sphi_c_ext, 2)
1264 sphi_c_u3 = ubound(sphi_c_ext, 3)
1266 ld_max => basis_parameter(lkind)%lmax
1267 ld_min => basis_parameter(lkind)%lmin
1268 npgfd => basis_parameter(lkind)%npgf
1269 zetd => basis_parameter(lkind)%zet
1270 nsgfd => basis_parameter(lkind)%nsgf
1271 sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1272 nsgfl_d => basis_parameter(lkind)%nsgfl
1273 sphi_d_u1 = ubound(sphi_d_ext, 1)
1274 sphi_d_u2 = ubound(sphi_d_ext, 2)
1275 sphi_d_u3 = ubound(sphi_d_ext, 3)
1277 atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1278 atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1279 atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1280 atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1282 IF (jatom < latom)
THEN
1283 offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1285 offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1287 IF (jatom < katom)
THEN
1288 offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1290 offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1292 IF (iatom < latom)
THEN
1293 offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1295 offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1297 IF (iatom < katom)
THEN
1298 offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1300 offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1303 IF (do_p_screening)
THEN
1305 kind_kind_idx = int(get_1d_idx(kkind, ikind, int(nkind,
int_8)))
1306 IF (ikind >= kkind)
THEN
1307 ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1308 actual_x_data%map_atom_to_kind_atom(katom), &
1309 actual_x_data%map_atom_to_kind_atom(iatom))
1311 ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1312 actual_x_data%map_atom_to_kind_atom(iatom), &
1313 actual_x_data%map_atom_to_kind_atom(katom))
1314 swap_id = swap_id + 1
1316 kind_kind_idx = int(get_1d_idx(lkind, jkind, int(nkind,
int_8)))
1317 IF (jkind >= lkind)
THEN
1318 ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1319 actual_x_data%map_atom_to_kind_atom(latom), &
1320 actual_x_data%map_atom_to_kind_atom(jatom))
1322 ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1323 actual_x_data%map_atom_to_kind_atom(jatom), &
1324 actual_x_data%map_atom_to_kind_atom(latom))
1325 swap_id = swap_id + 2
1327 kind_kind_idx = int(get_1d_idx(lkind, ikind, int(nkind,
int_8)))
1328 IF (ikind >= lkind)
THEN
1329 ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1330 actual_x_data%map_atom_to_kind_atom(latom), &
1331 actual_x_data%map_atom_to_kind_atom(iatom))
1333 ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1334 actual_x_data%map_atom_to_kind_atom(iatom), &
1335 actual_x_data%map_atom_to_kind_atom(latom))
1336 swap_id = swap_id + 4
1338 kind_kind_idx = int(get_1d_idx(kkind, jkind, int(nkind,
int_8)))
1339 IF (jkind >= kkind)
THEN
1340 ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1341 actual_x_data%map_atom_to_kind_atom(katom), &
1342 actual_x_data%map_atom_to_kind_atom(jatom))
1344 ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1345 actual_x_data%map_atom_to_kind_atom(jatom), &
1346 actual_x_data%map_atom_to_kind_atom(katom))
1347 swap_id = swap_id + 8
1353 IF (my_geo_change)
THEN
1354 IF (.NOT. memory_parameter%do_all_on_the_fly)
THEN
1361 mem_compression_counter = 0
1364 tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1365 mem_compression_counter = mem_compression_counter + &
1366 tmp_i4*memory_parameter%cache_size
1368 IF (mem_compression_counter > memory_parameter%max_compression_counter)
THEN
1369 buffer_overflow = .true.
1370 IF (do_dynamic_load_balancing)
THEN
1371 distribution_energy%ram_counter = counter
1373 memory_parameter%ram_counter = counter
1376 counter = counter + 1
1377 buffer_overflow = .false.
1381 IF (.NOT. memory_parameter%do_all_on_the_fly)
THEN
1382 IF (do_dynamic_load_balancing)
THEN
1383 IF (distribution_energy%ram_counter == counter)
THEN
1384 buffer_overflow = .true.
1386 counter = counter + 1
1387 buffer_overflow = .false.
1391 IF (memory_parameter%ram_counter == counter)
THEN
1392 buffer_overflow = .true.
1394 counter = counter + 1
1395 buffer_overflow = .false.
1401 IF (buffer_overflow .AND. do_disk_storage)
THEN
1402 use_disk_storage = .true.
1403 buffer_overflow = .false.
1406 IF (use_disk_storage)
THEN
1408 tmp_i4 = memory_parameter%actual_memory_usage_disk
1409 mem_compression_counter_disk = tmp_i4*memory_parameter%cache_size
1410 IF (mem_compression_counter_disk > memory_parameter%max_compression_counter_disk)
THEN
1411 buffer_overflow = .true.
1412 use_disk_storage = .false.
1416 DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1417 iset = set_list_ij(i_set_list_ij)%pair(1)
1418 jset = set_list_ij(i_set_list_ij)%pair(2)
1420 ncob = npgfb(jset)*
ncoset(lb_max(jset))
1421 max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1422 screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1424 IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) cycle
1426 sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1427 sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1428 DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1429 kset = set_list_kl(i_set_list_kl)%pair(1)
1430 lset = set_list_kl(i_set_list_kl)%pair(2)
1432 max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1433 screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1434 max_val2 = max_val1 + max_val2_set
1437 IF (max_val2 + pmax_atom < log10_eps_schwarz) cycle
1438 sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1439 sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1441 IF (do_p_screening)
THEN
1442 CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1443 iset, jset, kset, lset, &
1444 pmax_entry, swap_id)
1448 log10_pmax = pmax_entry
1449 max_val2 = max_val2 + log10_pmax
1450 IF (max_val2 < log10_eps_schwarz) cycle
1451 pmax_entry = exp(log10_pmax*ln_10)
1454 current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1455 IF (buffer_overflow)
THEN
1456 neris_onthefly = neris_onthefly + current_counter
1460 IF (.NOT. buffer_overflow .AND. .NOT. my_geo_change)
THEN
1461 nints = current_counter
1462 IF (.NOT. use_disk_storage)
THEN
1464 estimate_to_store_int, 6, &
1465 maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1469 estimate_to_store_int, 6, &
1470 maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1473 spherical_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
1474 IF (spherical_estimate*pmax_entry < eps_schwarz) cycle
1475 nbits = exponent(anint(spherical_estimate*pmax_entry/eps_storage)) + 1
1478 IF (.NOT. use_disk_storage)
THEN
1479 neris_incore = neris_incore + int(nints,
int_8)
1481 neris_disk = neris_disk + int(nints,
int_8)
1483 DO WHILE (buffer_left > 0)
1484 buffer_size = min(buffer_left, cache_size)
1485 IF (.NOT. use_disk_storage)
THEN
1487 buffer_size, nbits, &
1488 integral_caches(nbits), &
1489 integral_containers(nbits), &
1490 eps_storage, pmax_entry, &
1491 memory_parameter%actual_memory_usage, &
1495 buffer_size, nbits, &
1496 integral_caches_disk(nbits), &
1497 integral_containers_disk(nbits), &
1498 eps_storage, pmax_entry, &
1499 memory_parameter%actual_memory_usage_disk, &
1502 buffer_left = buffer_left - buffer_size
1503 buffer_start = buffer_start + buffer_size
1507 IF (my_geo_change .OR. buffer_overflow)
THEN
1508 max_contraction_val = max_contraction(iset, iatom)* &
1509 max_contraction(jset, jatom)* &
1510 max_contraction(kset, katom)* &
1511 max_contraction(lset, latom)*pmax_entry
1512 tmp_r_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1513 tmp_r_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1514 tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1515 tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1517 CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
1518 la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1519 lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1520 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1521 sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1522 sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1523 sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1524 sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1525 zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1526 zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1527 primitive_integrals, &
1528 potential_parameter, &
1529 actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1530 screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1531 max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1532 log10_pmax, log10_eps_schwarz, &
1533 tmp_r_1, tmp_r_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1534 pgf_list_ij, pgf_list_kl, pgf_product_list, &
1535 nsgfl_a(:, iset), nsgfl_b(:, jset), &
1536 nsgfl_c(:, kset), nsgfl_d(:, lset), &
1541 ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
1542 nimages, do_periodic, p_work)
1544 nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1545 neris_total = neris_total + nints
1546 nprim_ints = nprim_ints + neris_tmp
1570 spherical_estimate = 0.0_dp
1572 spherical_estimate = max(spherical_estimate, abs(primitive_integrals(i)))
1575 IF (spherical_estimate == 0.0_dp) spherical_estimate = tiny(spherical_estimate)
1576 estimate_to_store_int = exponent(spherical_estimate)
1577 estimate_to_store_int = max(estimate_to_store_int, -15_int_8)
1579 IF (.NOT. buffer_overflow .AND. my_geo_change)
THEN
1580 IF (.NOT. use_disk_storage)
THEN
1582 estimate_to_store_int, 6, &
1583 maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1584 use_disk_storage, max_val_memory)
1587 estimate_to_store_int, 6, &
1588 maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1592 spherical_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
1593 IF (spherical_estimate*pmax_entry < eps_schwarz) cycle
1594 IF (.NOT. buffer_overflow)
THEN
1595 nbits = exponent(anint(spherical_estimate*pmax_entry/eps_storage)) + 1
1608 IF (nbits > 63)
THEN
1609 WRITE (eps_schwarz_min_str,
'(ES10.3E2)') &
1610 spherical_estimate*pmax_entry/ &
1611 (set_exponent(1.0_dp, 63)*memory_parameter%eps_storage_scaling)
1613 WRITE (eps_scaling_str,
'(ES10.3E2)') &
1614 spherical_estimate*pmax_entry/(set_exponent(1.0_dp, 63)*eps_schwarz)
1616 CALL cp_abort(__location__, &
1617 "Overflow during ERI's compression. Please use a larger "// &
1618 "EPS_SCHWARZ threshold (above "//trim(adjustl(eps_schwarz_min_str))// &
1619 ") or increase the EPS_STORAGE_SCALING factor above "// &
1620 trim(adjustl(eps_scaling_str))//
".")
1625 IF (.NOT. use_disk_storage)
THEN
1626 neris_incore = neris_incore + int(nints,
int_8)
1629 neris_disk = neris_disk + int(nints,
int_8)
1632 DO WHILE (buffer_left > 0)
1633 buffer_size = min(buffer_left, cache_size)
1634 IF (.NOT. use_disk_storage)
THEN
1636 buffer_size, nbits, &
1637 integral_caches(nbits), &
1638 integral_containers(nbits), &
1639 eps_storage, pmax_entry, &
1640 memory_parameter%actual_memory_usage, &
1644 buffer_size, nbits, &
1645 integral_caches_disk(nbits), &
1646 integral_containers_disk(nbits), &
1647 eps_storage, pmax_entry, &
1648 memory_parameter%actual_memory_usage_disk, &
1651 buffer_left = buffer_left - buffer_size
1652 buffer_start = buffer_start + buffer_size
1657 primitive_integrals(i) = primitive_integrals(i)*pmax_entry
1658 IF (abs(primitive_integrals(i)) > eps_storage)
THEN
1659 primitive_integrals(i) = anint(primitive_integrals(i)/eps_storage,
dp)*eps_storage/pmax_entry
1661 primitive_integrals(i) = 0.0_dp
1668 CALL print_integrals( &
1669 iatom, jatom, katom, latom, shm_set_offset, shm_atomic_block_offset, &
1670 iset, jset, kset, lset, nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), primitive_integrals)
1672 IF (.NOT. is_anti_symmetric)
THEN
1674 CALL update_fock_matrix( &
1675 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1676 fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1677 primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1678 kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1679 iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1680 atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1681 IF (.NOT. treat_lsd_in_core)
THEN
1682 IF (nspins == 2)
THEN
1683 CALL update_fock_matrix( &
1684 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1685 fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1686 primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1687 kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1688 iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1689 atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1694 CALL update_fock_matrix_as( &
1695 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1696 fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1697 primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1698 kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1699 iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1700 atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1701 IF (.NOT. treat_lsd_in_core)
THEN
1702 IF (nspins == 2)
THEN
1703 CALL update_fock_matrix_as( &
1704 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1705 fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1706 primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1707 kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1708 iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1709 atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1715 IF (do_disk_storage)
THEN
1716 buffer_overflow = .true.
1720 END DO atomic_blocks
1722 IF (my_geo_change)
THEN
1723 distribution_energy%time_first_scf = bintime_stop - bintime_start
1725 distribution_energy%time_other_scf = &
1726 distribution_energy%time_other_scf + bintime_stop - bintime_start
1729 CALL timestop(handle_bin)
1735 do_print_load_balance_info = .false.
1737 "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"),
cp_p_file)
1740 IF (do_print_load_balance_info)
THEN
1744 extension=
".scfLog")
1752 "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1762 DEALLOCATE (primitive_integrals)
1766 shm_neris_total = shm_neris_total + neris_total
1768 shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1770 shm_nprim_ints = shm_nprim_ints + nprim_ints
1772 storage_counter_integrals = memory_parameter%actual_memory_usage* &
1773 memory_parameter%cache_size
1774 stor_count_int_disk = memory_parameter%actual_memory_usage_disk* &
1775 memory_parameter%cache_size
1776 stor_count_max_val = max_val_memory*memory_parameter%cache_size
1778 shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1780 shm_stor_count_int_disk = shm_stor_count_int_disk + stor_count_int_disk
1782 shm_neris_incore = shm_neris_incore + neris_incore
1784 shm_neris_disk = shm_neris_disk + neris_disk
1786 shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1791 shm_mem_compression_counter = 0
1794 tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1795 shm_mem_compression_counter = shm_mem_compression_counter + &
1796 tmp_i4*memory_parameter%cache_size
1800 actual_x_data%memory_parameter%final_comp_counter_energy = shm_mem_compression_counter
1807 mb_size_p = shm_block_offset(ncpu + 1)/1024/128
1808 mb_size_f = shm_block_offset(ncpu + 1)/1024/128
1809 IF (.NOT. treat_lsd_in_core)
THEN
1810 IF (nspins == 2)
THEN
1811 mb_size_f = mb_size_f*2
1812 mb_size_p = mb_size_p*2
1816 mb_size_buffers = int(nsgf_max,
int_8)**4*n_threads
1818 mb_size_buffers = mb_size_buffers + int(nsgf_max,
int_8)**2*n_threads
1819 subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
1821 mb_size_buffers = mb_size_buffers + max_pgf**2*max_set**2*nkind**2 &
1822 + max_set**2*nkind**2 &
1824 + max_pgf**2*max_set**2*nkind**2
1826 mb_size_buffers = mb_size_buffers + natom**2
1828 IF (do_p_screening)
THEN
1829 mb_size_buffers = mb_size_buffers + natom**2
1831 IF (screening_parameter%do_p_screening_forces)
THEN
1832 IF (memory_parameter%treat_forces_in_core)
THEN
1833 mb_size_buffers = mb_size_buffers + natom**2
1837 IF (do_p_screening .OR. screening_parameter%do_p_screening_forces)
THEN
1838 mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
1841 IF (screening_parameter%do_p_screening_forces)
THEN
1842 IF (memory_parameter%treat_forces_in_core)
THEN
1843 mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
1848 mb_size_buffers = mb_size_buffers/1024/128
1851 IF (is_anti_symmetric) afac = -1.0_dp
1852 CALL timestop(handle_main)
1853 ene_x_aa_diag = 0.0_dp
1854 ene_x_bb_diag = 0.0_dp
1856 ikind = kind_of(iatom)
1857 nseta = basis_parameter(ikind)%nset
1858 nsgfa => basis_parameter(ikind)%nsgf
1860 jkind = kind_of(jatom)
1861 nsetb = basis_parameter(jkind)%nset
1862 nsgfb => basis_parameter(jkind)%nsgf
1863 act_atomic_block_offset = shm_atomic_block_offset(jatom, iatom)
1864 DO img = 1, nkimages
1867 act_set_offset = shm_set_offset(jset, iset, jkind, ikind)
1868 i = act_set_offset + act_atomic_block_offset - 1
1869 DO ma = 1, nsgfa(iset)
1870 j = shm_set_offset(iset, jset, jkind, ikind) + act_atomic_block_offset - 1 + ma - 1
1871 DO mb = 1, nsgfb(jset)
1873 full_ks_alpha(i, img) = (full_ks_alpha(i, img) + full_ks_alpha(j, img)*afac)
1874 full_ks_alpha(j, img) = full_ks_alpha(i, img)*afac
1875 IF (.NOT. treat_lsd_in_core .AND. nspins == 2)
THEN
1876 full_ks_beta(i, img) = (full_ks_beta(i, img) + full_ks_beta(j, img)*afac)
1877 full_ks_beta(j, img) = full_ks_beta(i, img)*afac
1882 ene_x_aa_diag = ene_x_aa_diag + full_ks_alpha(i, img)*full_density_alpha(i, img)
1883 IF (.NOT. treat_lsd_in_core .AND. nspins == 2)
THEN
1884 ene_x_bb_diag = ene_x_bb_diag + full_ks_beta(i, img)*full_density_beta(i, img)
1896 CALL para_env%sync()
1898 IF (is_anti_symmetric) afac = 0._dp
1899 IF (distribute_fock_matrix)
THEN
1901 CALL timeset(routinen//
"_dist_KS", handle_dist_ks)
1902 DO img = 1, nkimages
1903 CALL distribute_ks_matrix(para_env, full_ks_alpha(:, img), ks_matrix(ispin, img)%matrix, shm_number_of_p_entries, &
1904 shm_block_offset, kind_of, basis_parameter, &
1905 off_diag_fac=0.5_dp, diag_fac=afac)
1908 NULLIFY (full_ks_alpha)
1909 DEALLOCATE (shm_master_x_data%full_ks_alpha)
1910 IF (.NOT. treat_lsd_in_core)
THEN
1911 IF (nspins == 2)
THEN
1912 DO img = 1, nkimages
1913 CALL distribute_ks_matrix(para_env, full_ks_beta(:, img), ks_matrix(2, img)%matrix, shm_number_of_p_entries, &
1914 shm_block_offset, kind_of, basis_parameter, &
1915 off_diag_fac=0.5_dp, diag_fac=afac)
1917 NULLIFY (full_ks_beta)
1918 DEALLOCATE (shm_master_x_data%full_ks_beta)
1921 CALL timestop(handle_dist_ks)
1924 IF (distribute_fock_matrix)
THEN
1927 DO img = 1, nkimages
1929 ene_x_aa = ene_x_aa + etmp
1932 IF (dft_control%do_admm)
THEN
1933 cpassert(nkimages == 1)
1934 CALL dbcsr_copy(matrix_ks_aux_fit_hfx(ispin)%matrix, ks_matrix(ispin, 1)%matrix, &
1935 name=
"HF exch. part of matrix_ks_aux_fit for ADMMS")
1939 IF (nspins == 2 .AND. .NOT. treat_lsd_in_core)
THEN
1940 DO img = 1, nkimages
1942 ene_x_bb = ene_x_bb + etmp
1945 IF (dft_control%do_admm)
THEN
1946 cpassert(nkimages == 1)
1947 CALL dbcsr_copy(matrix_ks_aux_fit_hfx(2)%matrix, ks_matrix(2, 1)%matrix, &
1948 name=
"HF exch. part of matrix_ks_aux_fit for ADMMS")
1953 ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
1957 DO img = 1, nkimages
1958 DO pa = 1,
SIZE(full_ks_alpha, 1)
1959 ene_x_aa = ene_x_aa + full_ks_alpha(pa, img)*full_density_alpha(pa, img)
1963 ene_x_aa = (ene_x_aa + ene_x_aa_diag)*0.5_dp
1964 IF (nspins == 2)
THEN
1965 DO img = 1, nkimages
1966 DO pa = 1,
SIZE(full_ks_beta, 1)
1967 ene_x_bb = ene_x_bb + full_ks_beta(pa, img)*full_density_beta(pa, img)
1971 ene_x_bb = (ene_x_bb + ene_x_bb_diag)*0.5_dp
1973 CALL para_env%sum(ene_x_aa)
1974 IF (nspins == 2)
CALL para_env%sum(ene_x_bb)
1975 ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
1979 IF (my_geo_change)
THEN
1980 tmp_i8(1:8) = (/shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, shm_neris_disk, &
1981 shm_neris_total, shm_stor_count_int_disk, shm_nprim_ints, shm_stor_count_max_val/)
1982 CALL para_env%sum(tmp_i8)
1983 shm_storage_counter_integrals = tmp_i8(1)
1984 shm_neris_onthefly = tmp_i8(2)
1985 shm_neris_incore = tmp_i8(3)
1986 shm_neris_disk = tmp_i8(4)
1987 shm_neris_total = tmp_i8(5)
1988 shm_stor_count_int_disk = tmp_i8(6)
1989 shm_nprim_ints = tmp_i8(7)
1990 shm_stor_count_max_val = tmp_i8(8)
1991 CALL para_env%max(memsize_after)
1992 mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1993 compression_factor = real(shm_neris_incore,
dp)/real(shm_storage_counter_integrals,
dp)
1994 mem_eris_disk = (shm_stor_count_int_disk + 128*1024 - 1)/1024/128
1995 compression_factor_disk = real(shm_neris_disk,
dp)/real(shm_stor_count_int_disk,
dp)
1996 mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
1998 IF (shm_neris_incore == 0)
THEN
2000 compression_factor = 0.0_dp
2002 IF (shm_neris_disk == 0)
THEN
2004 compression_factor_disk = 0.0_dp
2008 extension=
".scfLog")
2010 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2011 "HFX_MEM_INFO| Number of cart. primitive ERI's calculated: ", shm_nprim_ints
2013 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2014 "HFX_MEM_INFO| Number of sph. ERI's calculated: ", shm_neris_total
2016 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2017 "HFX_MEM_INFO| Number of sph. ERI's stored in-core: ", shm_neris_incore
2019 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2020 "HFX_MEM_INFO| Number of sph. ERI's stored on disk: ", shm_neris_disk
2022 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2023 "HFX_MEM_INFO| Number of sph. ERI's calculated on the fly: ", shm_neris_onthefly
2025 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2026 "HFX_MEM_INFO| Total memory consumption ERI's RAM [MiB]: ", mem_eris
2028 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2029 "HFX_MEM_INFO| Whereof max-vals [MiB]: ", mem_max_val
2031 WRITE (unit=iw, fmt=
"((T3,A,T60,F21.2))") &
2032 "HFX_MEM_INFO| Total compression factor ERI's RAM: ", compression_factor
2034 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2035 "HFX_MEM_INFO| Total memory consumption ERI's disk [MiB]: ", mem_eris_disk
2037 WRITE (unit=iw, fmt=
"((T3,A,T60,F21.2))") &
2038 "HFX_MEM_INFO| Total compression factor ERI's disk: ", compression_factor_disk
2040 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2041 "HFX_MEM_INFO| Size of density/Fock matrix [MiB]: ", 2_int_8*mb_size_p
2043 IF (do_periodic)
THEN
2044 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2045 "HFX_MEM_INFO| Size of buffers [MiB]: ", mb_size_buffers
2046 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2047 "HFX_MEM_INFO| Number of periodic image cells considered: ",
SIZE(shm_master_x_data%neighbor_cells)
2049 WRITE (unit=iw, fmt=
"((T3,A,T60,I21))") &
2050 "HFX_MEM_INFO| Size of buffers [MiB]: ", mb_size_buffers
2052 WRITE (unit=iw, fmt=
"((T3,A,T60,I21),/)") &
2053 "HFX_MEM_INFO| Est. max. program size after HFX [MiB]:", memsize_after/(1024*1024)
2063 IF (do_dynamic_load_balancing)
THEN
2064 my_bin_size =
SIZE(actual_x_data%distribution_energy)
2069 IF (my_geo_change)
THEN
2070 IF (.NOT. memory_parameter%do_all_on_the_fly)
THEN
2071 DO bin = 1, my_bin_size
2072 maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
2073 maxval_container => actual_x_data%store_ints%maxval_container(bin)
2074 integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
2075 integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
2076 CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
2080 memory_parameter%actual_memory_usage, .false.)
2086 IF (.NOT. memory_parameter%do_all_on_the_fly)
THEN
2087 DO bin = 1, my_bin_size
2088 maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
2089 maxval_container => actual_x_data%store_ints%maxval_container(bin)
2090 integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
2091 integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
2096 memory_parameter%actual_memory_usage, &
2104 IF (do_disk_storage)
THEN
2106 IF (my_geo_change)
THEN
2108 memory_parameter%actual_memory_usage_disk, .true.)
2111 memory_parameter%actual_memory_usage_disk, .true.)
2119 memory_parameter%actual_memory_usage_disk, do_disk_storage)
2125 DEALLOCATE (last_sgf_global)
2127 DEALLOCATE (full_density_alpha)
2128 IF (.NOT. treat_lsd_in_core)
THEN
2129 IF (nspins == 2)
THEN
2130 DEALLOCATE (full_density_beta)
2133 IF (do_dynamic_load_balancing)
THEN
2134 DEALLOCATE (shm_master_x_data%task_list)
2137 DEALLOCATE (pbd_buf, pbc_buf, pad_buf, pac_buf)
2138 DEALLOCATE (kbd_buf, kbc_buf, kad_buf, kac_buf)
2139 DEALLOCATE (set_list_ij, set_list_kl)
2141 DO i = 1, max_pgf**2
2142 DEALLOCATE (pgf_list_ij(i)%image_list)
2143 DEALLOCATE (pgf_list_kl(i)%image_list)
2146 DEALLOCATE (pgf_list_ij)
2147 DEALLOCATE (pgf_list_kl)
2148 DEALLOCATE (pgf_product_list)
2150 DEALLOCATE (max_contraction, kind_of)
2152 DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
2154 DEALLOCATE (nimages)
2159 CALL timestop(handle)