28 dbcsr_type_no_symmetry
56#include "./base/base_uses.f90"
60 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'cp_dbcsr_cp2k_link'
70 INTEGER,
PARAMETER :: mm_driver_auto = 0
71 INTEGER,
PARAMETER :: mm_driver_matmul = 1
72 INTEGER,
PARAMETER :: mm_driver_blas = 2
73 INTEGER,
PARAMETER :: mm_driver_smm = 3
74 INTEGER,
PARAMETER :: mm_driver_xsmm = 4
76 CHARACTER(len=*),
PARAMETER :: mm_name_auto =
"AUTO", &
77 mm_name_blas =
"BLAS", &
78 mm_name_matmul =
"MATMUL", &
79 mm_name_smm =
"SMM", &
94 REAL(kind=
dp) :: rdefault
98 cpassert(.NOT.
ASSOCIATED(section))
100 description=
"Configuration options for the DBCSR library.", &
101 n_keywords=1, n_subsections=0, repeats=.false., &
106 CALL keyword_create(keyword, __location__, name=
"mm_stack_size", &
107 description=
"Size of multiplication parameter stack." &
108 //
" A negative value leaves the decision up to DBCSR.", &
109 usage=
"mm_stack_size 1000", &
115 description=
"Select which backend to use preferably "// &
116 "for matrix block multiplications on the host.", &
117 usage=
"mm_driver blas", &
118 default_i_val=mm_driver_auto, &
119 enum_c_vals=
s2a(
"AUTO",
"BLAS",
"MATMUL",
"SMM",
"XSMM"), &
120 enum_i_vals=(/mm_driver_auto, mm_driver_blas, mm_driver_matmul, mm_driver_smm, &
122 enum_desc=
s2a(
"Choose automatically the best available driver", &
123 "BLAS (requires the BLAS library at link time)", &
125 "Library optimised for Small Matrix Multiplies "// &
126 "(requires the SMM library at link time)", &
132 CALL dbcsr_get_default_config(avg_elements_images=idefault)
133 CALL keyword_create(keyword, __location__, name=
"avg_elements_images", &
134 description=
"Average number of elements (dense limit)" &
135 //
" for each image, which also corresponds to" &
136 //
" the average number of elements exchanged between MPI processes" &
137 //
" during the operations." &
138 //
" A negative or zero value means unlimited.", &
139 usage=
"avg_elements_images 10000", &
140 default_i_val=idefault)
144 CALL dbcsr_get_default_config(num_mult_images=idefault)
145 CALL keyword_create(keyword, __location__, name=
"num_mult_images", &
146 description=
"Multiplicative factor for number of virtual images.", &
147 usage=
"num_mult_images 2", &
148 default_i_val=idefault)
152 CALL dbcsr_get_default_config(use_mpi_allocator=ldefault)
153 CALL keyword_create(keyword, __location__, name=
"use_mpi_allocator", &
154 description=
"Use MPI allocator" &
155 //
" to allocate buffers used in MPI communications.", &
156 usage=
"use_mpi_allocator T", &
157 default_l_val=ldefault)
161 CALL dbcsr_get_default_config(use_mpi_rma=ldefault)
163 description=
"Use RMA for MPI communications" &
164 //
" for each image, which also corresponds to" &
165 //
" the number of elements exchanged between MPI processes" &
166 //
" during the operations.", &
167 usage=
"use_mpi_rma F", &
168 default_l_val=ldefault)
172 CALL dbcsr_get_default_config(num_layers_3d=idefault)
173 CALL keyword_create(keyword, __location__, name=
"num_layers_3D", &
174 description=
"Number of layers for the 3D multplication algorithm.", &
175 usage=
"num_layers_3D 1", &
176 default_i_val=idefault)
180 CALL dbcsr_get_default_config(nstacks=idefault)
181 CALL keyword_create(keyword, __location__, name=
"n_size_mnk_stacks", &
182 description=
"Number of stacks to use for distinct atomic sizes" &
183 //
" (e.g., 2 for a system of mostly waters). ", &
184 usage=
"n_size_mnk_stacks 2", &
185 default_i_val=idefault)
189 CALL dbcsr_get_default_config(use_comm_thread=ldefault)
190 CALL keyword_create(keyword, __location__, name=
"use_comm_thread", &
191 description=
"During multiplication, use a thread to periodically poll" &
192 //
" MPI to progress outstanding message completions. This is" &
193 //
" beneficial on systems without a DMA-capable network adapter" &
194 //
" e.g. Cray XE6.", &
195 usage=
"use_comm_thread T", &
196 default_l_val=ldefault)
200 CALL keyword_create(keyword, __location__, name=
"MAX_ELEMENTS_PER_BLOCK", &
201 description=
"Default block size for turning dense matrices in blocked ones", &
202 usage=
"MAX_ELEMENTS_PER_BLOCK 32", &
207 CALL keyword_create(keyword, __location__, name=
"comm_thread_load", &
208 description=
"If a communications thread is used, specify how much " &
209 //
"multiplication workload (%) the thread should perform in " &
210 //
"addition to communication tasks. " &
211 //
"A negative value leaves the decision up to DBCSR.", &
212 usage=
"comm_thread_load 50", &
217 CALL dbcsr_get_default_config(multrec_limit=idefault)
218 CALL keyword_create(keyword, __location__, name=
"multrec_limit", &
219 description=
"Recursion limit of cache oblivious multrec algorithm.", &
220 default_i_val=idefault)
224 CALL dbcsr_get_default_config(use_mempools_cpu=ldefault)
225 CALL keyword_create(keyword, __location__, name=
"use_mempools_cpu", &
226 description=
"Enable memory pools on the CPU.", &
227 default_l_val=ldefault)
233 description=
"Configuration options for Tensors.", &
234 n_keywords=1, n_subsections=0, repeats=.false.)
236 CALL dbcsr_get_default_config(tas_split_factor=rdefault)
237 CALL keyword_create(keyword, __location__, name=
"TAS_SPLIT_FACTOR", &
238 description=
"Parameter for hybrid DBCSR-TAS matrix-matrix multiplication algorithm: "// &
239 "a TAS matrix is split into s submatrices with s = N_max/(N_min*f) with f "// &
240 "given by this parameter and N_max/N_min the max/min occupancies of the matrices "// &
241 "involved in a multiplication. A large value makes the multiplication Cannon-based "// &
242 "(s=1) and a small value (> 0) makes the multiplication based on TAS algorithm "// &
243 "(s=number of MPI ranks)", &
244 default_r_val=rdefault)
254 description=
"Configuration options for the ACC-Driver.", &
255 n_keywords=1, n_subsections=0, repeats=.false.)
257 CALL dbcsr_get_default_config(accdrv_thread_buffers=idefault)
258 CALL keyword_create(keyword, __location__, name=
"thread_buffers", &
259 description=
"Number of transfer-buffers associated with each thread and corresponding stream.", &
260 default_i_val=idefault)
264 CALL dbcsr_get_default_config(accdrv_avoid_after_busy=ldefault)
265 CALL keyword_create(keyword, __location__, name=
"avoid_after_busy", &
266 description=
"If enabled, stacks are not processed by the acc-driver " &
267 //
"after it has signaled congestion during a round of flushing. " &
268 //
"For the next round of flusing the driver is used again.", &
269 default_l_val=ldefault)
273 CALL dbcsr_get_default_config(accdrv_min_flop_process=idefault)
274 CALL keyword_create(keyword, __location__, name=
"min_flop_process", &
275 description=
"Only process stacks with more than the given number of " &
276 //
"floating-point operations per stack-entry (2*m*n*k).", &
277 default_i_val=idefault)
281 CALL dbcsr_get_default_config(accdrv_stack_sort=ldefault)
283 description=
"Sort multiplication stacks according to C-access.", &
284 default_l_val=ldefault)
288 CALL dbcsr_get_default_config(accdrv_min_flop_sort=idefault)
289 CALL keyword_create(keyword, __location__, name=
"min_flop_sort", &
290 description=
"Only sort stacks with more than the given number of " &
291 //
"floating-point operations per stack-entry (2*m*n*k). " &
292 //
"Alternatively, the stacks are roughly ordered through a " &
293 //
"binning-scheme by Peter Messmer. (Depends on ACC%STACK_SORT)", &
294 default_i_val=idefault)
298 CALL dbcsr_get_default_config(accdrv_do_inhomogenous=ldefault)
299 CALL keyword_create(keyword, __location__, name=
"process_inhomogenous", &
300 description=
"If enabled, inhomogenous stacks are also processed by the acc driver.", &
301 default_l_val=ldefault)
305 CALL dbcsr_get_default_config(accdrv_binning_nbins=idefault)
306 CALL keyword_create(keyword, __location__, name=
"binning_nbins", &
307 description=
"Number of bins used when ordering " &
308 //
"the stacks with the binning-scheme.", &
309 default_i_val=idefault)
313 CALL dbcsr_get_default_config(accdrv_binning_binsize=idefault)
314 CALL keyword_create(keyword, __location__, name=
"binning_binsize", &
315 description=
"Size of bins used when ordering " &
316 //
"the stacks with the binning-scheme.", &
317 default_i_val=idefault)
333 CHARACTER(len=*),
PARAMETER :: routinen =
'cp_dbcsr_config'
335 INTEGER :: handle, ival
340 CALL timeset(routinen, handle)
348 CALL dbcsr_set_config(mm_stack_size=ival)
353 CALL dbcsr_set_config(avg_elements_images=ival)
356 CALL dbcsr_set_config(num_mult_images=ival)
359 CALL dbcsr_set_config(nstacks=ival)
362 CALL dbcsr_set_config(use_mpi_allocator=lval)
365 CALL dbcsr_set_config(use_mpi_rma=lval)
368 CALL dbcsr_set_config(num_layers_3d=ival)
371 CALL dbcsr_set_config(use_comm_thread=lval)
374 CALL dbcsr_set_config(comm_thread_load=ival)
377 CALL dbcsr_set_config(multrec_limit=ival)
380 CALL dbcsr_set_config(use_mempools_cpu=lval)
383 CALL dbcsr_set_config(tas_split_factor=rval)
386 CALL dbcsr_set_config(accdrv_thread_buffers=ival)
389 CALL dbcsr_set_config(accdrv_min_flop_process=ival)
392 CALL dbcsr_set_config(accdrv_stack_sort=lval)
395 CALL dbcsr_set_config(accdrv_min_flop_sort=ival)
398 CALL dbcsr_set_config(accdrv_do_inhomogenous=lval)
401 CALL dbcsr_set_config(accdrv_avoid_after_busy=lval)
404 CALL dbcsr_set_config(accdrv_binning_nbins=ival)
407 CALL dbcsr_set_config(accdrv_binning_binsize=ival)
411 CASE (mm_driver_auto)
412 CALL dbcsr_set_config(mm_driver=
"AUTO")
413#if defined(__LIBXSMM)
416 CASE (mm_driver_blas)
417 CALL dbcsr_set_config(mm_driver=
"BLAS")
418 CASE (mm_driver_matmul)
419 CALL dbcsr_set_config(mm_driver=
"MATMUL")
421 CALL dbcsr_set_config(mm_driver=
"SMM")
422 CASE (mm_driver_xsmm)
423 CALL dbcsr_set_config(mm_driver=
"XSMM")
426 cpabort(
"Unknown mm_driver")
429 CALL timestop(handle)
449 LOGICAL,
INTENT(IN),
OPTIONAL :: desymmetrize
451 CHARACTER(LEN=*),
PARAMETER :: routinen =
'cp_dbcsr_alloc_block_from_nbl'
453 CHARACTER(LEN=1) :: symmetry
454 INTEGER :: blk_cnt, handle, iatom, icol, inode, &
455 irow, jatom, last_jatom, nadd
456 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: cols, rows, tmp
457 LOGICAL :: alloc_full, is_symmetric, new_atom_b
459 DIMENSION(:),
POINTER :: nl_iterator
461 CALL timeset(routinen, handle)
465 cpassert(
ASSOCIATED(sab_orb))
469 IF (
PRESENT(desymmetrize))
THEN
470 IF (desymmetrize .AND. (symmetry == dbcsr_type_no_symmetry))
THEN
471 IF (is_symmetric) alloc_full = .true.
476 ALLOCATE (rows(3), cols(3))
479 IF (alloc_full) nadd = 2
484 IF (inode == 1) last_jatom = 0
485 IF (jatom /= last_jatom)
THEN
492 IF (blk_cnt + nadd .GT.
SIZE(rows))
THEN
493 ALLOCATE (tmp(blk_cnt + nadd))
494 tmp(1:blk_cnt) = rows(1:blk_cnt)
496 ALLOCATE (rows((blk_cnt + nadd)*2))
497 rows(1:blk_cnt) = tmp(1:blk_cnt)
498 tmp(1:blk_cnt) = cols(1:blk_cnt)
500 ALLOCATE (cols((blk_cnt + nadd)*2))
501 cols(1:blk_cnt) = tmp(1:blk_cnt)
505 blk_cnt = blk_cnt + 1
506 rows(blk_cnt) = iatom
507 cols(blk_cnt) = jatom
508 IF (iatom /= jatom)
THEN
509 blk_cnt = blk_cnt + 1
510 rows(blk_cnt) = jatom
511 cols(blk_cnt) = iatom
514 blk_cnt = blk_cnt + 1
515 IF (symmetry == dbcsr_type_no_symmetry)
THEN
516 rows(blk_cnt) = iatom
517 cols(blk_cnt) = jatom
519 IF (iatom <= jatom)
THEN
540 CALL timestop(handle)
561 TYPE(
dbcsr_type),
INTENT(INOUT) :: csr_sparsity
563 CHARACTER(len=*),
PARAMETER :: routinen =
'cp_dbcsr_to_csr_screening'
565 INTEGER :: atom_a, atom_b, handle, iatom, icol, ikind, ipgf, irow, iset, isgf, ishell, &
566 jatom, jkind, jpgf, jset, jsgf, jshell, nkind, nset_a, nset_b
567 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: atom_of_kind
568 INTEGER,
DIMENSION(:),
POINTER :: npgf_a, npgf_b, nshell_a, nshell_b
569 INTEGER,
DIMENSION(:, :),
POINTER :: l_a, l_b
570 LOGICAL :: do_symmetric, found
571 REAL(kind=
dp) :: dab, eps_pgf_orb, r_a, r_b
572 REAL(kind=
dp),
DIMENSION(3) :: rab
573 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: rpgfa, rpgfb, zet_a, zet_b
574 REAL(kind=
dp),
DIMENSION(:, :, :),
POINTER :: gcc_a, gcc_b
575 REAL(kind=
real_8),
DIMENSION(:, :),
POINTER :: screen_blk
581 DIMENSION(:),
POINTER :: nl_iterator
583 POINTER :: neighbour_list
584 TYPE(
qs_kind_type),
DIMENSION(:),
POINTER :: qs_kind_set
586 NULLIFY (screen_blk, atomic_kind_set, basis_set_list_a, &
587 basis_set_list_b, basis_set_a, basis_set_b, nl_iterator, &
588 qs_kind_set, dft_control)
590 CALL timeset(routinen, handle)
595 sab_orb=neighbour_list, &
596 atomic_kind_set=atomic_kind_set, &
597 qs_kind_set=qs_kind_set, &
598 dft_control=dft_control)
600 eps_pgf_orb = dft_control%qs_control%eps_pgf_orb
602 nkind =
SIZE(qs_kind_set)
604 cpassert(
SIZE(neighbour_list) > 0)
606 cpassert(do_symmetric)
607 ALLOCATE (basis_set_list_a(nkind), basis_set_list_b(nkind))
620 ikind=ikind, jkind=jkind, &
621 iatom=iatom, jatom=jatom, &
624 basis_set_a => basis_set_list_a(ikind)%gto_basis_set
625 IF (.NOT.
ASSOCIATED(basis_set_a)) cycle
626 basis_set_b => basis_set_list_b(jkind)%gto_basis_set
627 IF (.NOT.
ASSOCIATED(basis_set_b)) cycle
629 atom_a = atom_of_kind(iatom)
630 atom_b = atom_of_kind(jatom)
632 nset_a = basis_set_a%nset
633 nset_b = basis_set_b%nset
634 npgf_a => basis_set_a%npgf
635 npgf_b => basis_set_b%npgf
636 nshell_a => basis_set_a%nshell
637 nshell_b => basis_set_b%nshell
641 gcc_a => basis_set_a%gcc
642 gcc_b => basis_set_b%gcc
643 zet_a => basis_set_a%zet
644 zet_b => basis_set_b%zet
646 rpgfa => basis_set_a%pgf_radius
647 rpgfb => basis_set_b%pgf_radius
649 IF (iatom <= jatom)
THEN
658 block=screen_blk, found=found)
663 dab = sqrt(rab(1)**2 + rab(2)**2 + rab(3)**2)
671 DO ishell = 1, nshell_a(iset)
674 DO jshell = 1, nshell_b(jset)
675 gto_loop:
DO ipgf = 1, npgf_a(iset)
676 DO jpgf = 1, npgf_b(jset)
677 IF (rpgfa(ipgf, iset) + rpgfb(jpgf, jset) .GE. dab)
THEN
682 gcc_a(ipgf, ishell, iset))
686 gcc_b(jpgf, jshell, jset))
687 IF (r_a + r_b .GE. dab)
THEN
688 IF (irow .EQ. iatom)
THEN
689 screen_blk(isgf + 1:isgf +
nso(l_a(ishell, iset)), &
690 jsgf + 1:jsgf +
nso(l_b(jshell, jset))) = 1.0_dp
692 screen_blk(jsgf + 1:jsgf +
nso(l_b(jshell, jset)), &
693 isgf + 1:isgf +
nso(l_a(ishell, iset))) = 1.0_dp
700 jsgf = jsgf +
nso(l_b(jshell, jset))
703 isgf = isgf +
nso(l_a(ishell, iset))
709 DEALLOCATE (basis_set_list_a, basis_set_list_b)
711 CALL timestop(handle)
All kind of helpful little routines.
real(kind=dp) function, public exp_radius(l, alpha, threshold, prefactor, epsabs, epsrel, rlow)
The radius of a primitive Gaussian function for a given threshold is calculated. g(r) = prefactor*r**...
Define the atomic kind types and their sub types.
subroutine, public get_atomic_kind_set(atomic_kind_set, atom_of_kind, kind_of, natom_of_kind, maxatom, natom, nshell, fist_potential_present, shell_present, shell_adiabatic, shell_check_distance, damping_present)
Get attributes of an atomic kind set.
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public heinecke2016
integer, save, public borstnik2014
integer, save, public schuett2016
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
logical function, public dbcsr_has_symmetry(matrix)
...
character function, public dbcsr_get_matrix_type(matrix)
...
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_reserve_blocks(matrix, rows, cols)
...
subroutine, public dbcsr_finalize(matrix)
...
subroutine, public dbcsr_set(matrix, alpha)
...
Routines that link DBCSR and CP2K concepts together.
subroutine, public cp_dbcsr_to_csr_screening(ks_env, csr_sparsity)
Apply distance screening to refine sparsity pattern of matrices in CSR format (using eps_pgf_orb)....
subroutine, public cp_dbcsr_config(root_section)
Configures options for DBCSR.
subroutine, public create_dbcsr_section(section)
Creates the dbcsr section for configuring DBCSR.
subroutine, public cp_dbcsr_alloc_block_from_nbl(matrix, sab_orb, desymmetrize)
allocate the blocks of a dbcsr based on the neighbor list
DBCSR operations in CP2K.
integer, save, public max_elements_per_block
Defines the basic variable types.
integer, parameter, public dp
integer, parameter, public real_8
Provides Cartesian and spherical orbital pointers and indices.
integer, dimension(:), allocatable, public nso
Some utility functions for the calculation of integrals.
subroutine, public basis_set_list_setup(basis_set_list, basis_type, qs_kind_set)
Set up an easy accessible list of the basis sets for all kinds.
Define the quickstep kind type and their sub types.
subroutine, public get_ks_env(ks_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, complex_ks, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, kinetic, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_ks_im_kp, rho, rho_xc, vppl, rho_core, rho_nlcc, rho_nlcc_g, vee, neighbor_list_id, sab_orb, sab_all, sac_ae, sac_ppl, sac_lri, sap_ppnl, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_vdw, sab_scp, sab_almo, sab_kp, sab_kp_nosym, task_list, task_list_soft, kpoints, do_kpoints, atomic_kind_set, qs_kind_set, cell, cell_ref, use_ref_cell, particle_set, energy, force, local_particles, local_molecules, molecule_kind_set, molecule_set, subsys, cp_subsys, virial, results, atprop, nkind, natom, dft_control, dbcsr_dist, distribution_2d, pw_env, para_env, blacs_env, nelectron_total, nelectron_spin)
...
Define the neighbor list data types and the corresponding functionality.
subroutine, public neighbor_list_iterator_create(iterator_set, nl, search, nthread)
Neighbor list iterator functions.
subroutine, public neighbor_list_iterator_release(iterator_set)
...
subroutine, public get_neighbor_list_set_p(neighbor_list_sets, nlist, symmetric)
Return the components of the first neighbor list set.
integer function, public neighbor_list_iterate(iterator_set, mepos)
...
subroutine, public get_iterator_info(iterator_set, mepos, ikind, jkind, nkind, ilist, nlist, inode, nnode, iatom, jatom, r, cell)
...
Utilities for string manipulations.
Provides all information about an atomic kind.
Provides all information about a quickstep kind.
calculation environment to calculate the ks matrix, holds all the needed vars. assumes that the core ...