72#include "./base/base_uses.f90"
77 LOGICAL,
PRIVATE,
PARAMETER :: debug_this_module = .true.
78 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'input_cp2k_global'
92 INTEGER :: default_dgemm
97 cpassert(.NOT.
ASSOCIATED(section))
99 description=
"Section with general information regarding which kind "// &
100 "of simulation to perform an parameters for the whole PROGRAM", &
101 n_keywords=7, n_subsections=0, repeats=.false.)
105 description=
"how to distribute the processors on the 2d grid needed "// &
106 "by BLACS (and thus SCALAPACK)", usage=
"BLACS_GRID SQUARE", &
108 enum_desc=
s2a(
"Distribution by matrix blocks",
"Distribution by matrix rows", &
109 "Distribution by matrix columns"), &
114 CALL keyword_create(keyword, __location__, name=
"BLACS_REPEATABLE", &
115 description=
"Use a topology for BLACS collectives that is guaranteed to be repeatable "// &
116 "on homogeneous architectures", &
117 usage=
"BLACS_REPEATABLE", &
118 default_l_val=.false., lone_keyword_l_val=.true.)
122 CALL keyword_create(keyword, __location__, name=
"PREFERRED_DIAG_LIBRARY", &
123 description=
"Specifies the diagonalization library to be used. If not available, "// &
124 "the ScaLAPACK library is used", &
125 usage=
"PREFERRED_DIAG_LIBRARY ELPA", &
131 enum_c_vals=
s2a(
"ELPA",
"ScaLAPACK",
"SL",
"CUSOLVER",
"DLAF"), &
132 enum_desc=
s2a(
"ELPA library", &
133 "ScaLAPACK library", &
134 "ScaLAPACK library (shorthand)", &
135 "cuSOLVER (CUDA GPU library)", &
136 "DLA-Future (CUDA/HIP GPU library)"), &
141 CALL keyword_create(keyword, __location__, name=
"PREFERRED_CHOLESKY_LIBRARY", &
142 description=
"Specifies Cholesky decomposition library to be used. If not available, "// &
143 "the ScaLAPACK library is used", &
144 usage=
"PREFERRED_CHOLESKY_LIBRARY DLAF", &
148 enum_c_vals=
s2a(
"ScaLAPACK",
"SL",
"DLAF"), &
149 enum_desc=
s2a(
"ScaLAPACK library", &
150 "ScaLAPACK library (shorthand)", &
151 "DLA-Future (CUDA/HIP GPU library)"), &
156#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
161 CALL keyword_create(keyword, __location__, name=
"PREFERRED_DGEMM_LIBRARY", &
162 description=
"Specifies the DGEMM library to be used. If not available, "// &
163 "the BLAS routine is used. This keyword affects some DGEMM calls in the WFC code and turns on their "// &
164 "acceleration with SpLA. This keyword affects only local DGEMM calls, not the calls to PDGEMM "// &
165 "(see keyword FM%TYPE_OF_MATRIX_MULTIPLICATION).", &
166 usage=
"PREFERRED_DGEMM_LIBRARY SPLA", &
167 default_i_val=default_dgemm, &
169 enum_c_vals=
s2a(
"SPLA",
"BLAS"), &
170 enum_desc=
s2a(
"SPLA library",
"BLAS library"))
174 CALL keyword_create(keyword, __location__, name=
"EPS_CHECK_DIAG", &
175 description=
"Check that the orthonormality of the eigenvectors after a diagonalization "// &
176 "fulfills the specified numerical accuracy. A negative threshold value disables the check.", &
177 usage=
"EPS_CHECK_DIAG 1.0E-14", &
178 default_r_val=-1.0_dp)
183 description=
"Specifies the kernel to be used when ELPA is in use", &
191 CALL keyword_create(keyword, __location__, name=
"ELPA_NEIGVEC_MIN", &
192 description=
"Minimum number of eigenvectors for the use of the eigensolver from "// &
193 "the ELPA library. The eigensolver from the ScaLAPACK library is used as fallback "// &
194 "for all smaller cases", &
195 usage=
"ELPA_NEIGVEC_MIN 32", &
201 description=
"For ELPA, enable a blocked QR step when reducing the input matrix "// &
202 "to banded form in preparation for the actual diagonalization step. "// &
203 "See implementation paper for more details. Requires ELPA version 201505 or newer, "// &
204 "automatically deactivated otherwise. If true, QR is activated only when the "// &
205 "the size of the diagonalized matrix is suitable. Print key PRINT_ELPA is "// &
206 "useful in determining which matrices are suitable for QR. Might accelerate the "// &
207 "diagonalization of suitable matrices.", &
209 default_l_val=.false., lone_keyword_l_val=.true.)
213 CALL keyword_create(keyword, __location__, name=
"ELPA_QR_UNSAFE", &
214 description=
"For ELPA, disable block size limitations when used together with ELPA_QR. "// &
215 "Keyword relevant only with ELPA versions 201605 or newer. Use keyword with caution, "// &
216 "as it might result in wrong eigenvalues with some matrix orders/block sizes "// &
217 "when the number of MPI processes is varied. If the print key PRINT_ELPA is "// &
218 "active the validity of the eigenvalues is checked against values calculated without "// &
220 usage=
"ELPA_QR_UNSAFE", &
221 default_l_val=.false., lone_keyword_l_val=.true.)
226 description=
"Controls the printing of ELPA diagonalization information. "// &
227 "Useful for testing purposes, especially together with keyword ELPA_QR.", &
228 filename=
"__STD_OUT__")
232 CALL keyword_create(keyword, __location__, name=
"DLAF_NEIGVEC_MIN", &
233 description=
"Minimum number of eigenvectors for the use of the eigensolver from "// &
234 "the DLA-Future library. The eigensolver from the ScaLAPACK library is used as fallback "// &
235 "for all smaller cases", &
236 usage=
"DLAF_NEIGVEC_MIN 512", &
241 CALL keyword_create(keyword, __location__, name=
"DLAF_CHOLESKY_N_MIN", &
242 description=
"Minimum matrix size for the use of the Cholesky decomposition from "// &
243 "the DLA-Future library. The Cholesky decomposition from the ScaLAPACK library is used as fallback "// &
244 "for all smaller cases", &
245 usage=
"DLAF_CHOLESKY_N_MIN 512", &
251 keyword, __location__, name=
"PREFERRED_FFT_LIBRARY", &
252 description=
"Specifies the FFT library which should be preferred. "// &
253 "If it is not available, use FFTW3 if this is linked in, if FFTW3 is not available use FFTSG. "// &
254 "Improved performance with FFTW3 can be obtained specifying a proper value for FFTW_PLAN_TYPE. "// &
255 "Contrary to earlier CP2K versions, all libraries will result in the same grids, "// &
256 "i.e. the subset of grids which all FFT libraries can transform. "// &
257 "See EXTENDED_FFT_LENGTHS if larger FFTs or grids that more precisely match a given cutoff are needed, "// &
258 "or older results need to be reproduced. "// &
259 "FFTW3 is often (close to) optimal, and well tested with CP2K.", &
260 usage=
"PREFERRED_FFT_LIBRARY FFTW3", &
264 enum_c_vals=
s2a(
"FFTSG",
"FFTW3",
"FFTW"), &
265 enum_desc=
s2a(
"Stefan Goedecker's FFT (FFTSG), always available, "// &
266 "will be used in case a FFT library is specified and not available.", &
267 "a fast portable FFT library. Recommended. "// &
268 "See also the FFTW_PLAN_TYPE, and FFTW_WISDOM_FILE_NAME keywords.", &
269 "Same as FFTW3 (for compatibility with CP2K 2.3)"))
273 CALL keyword_create(keyword, __location__, name=
"FFTW_WISDOM_FILE_NAME", &
274 description=
"The name of the file that contains wisdom (pre-planned FFTs) for use with FFTW3. "// &
275 "Using wisdom can significantly speed up the FFTs (see the FFTW homepage for details). "// &
276 "Note that wisdom is not transferable between different computer (architectures). "// &
277 "Wisdom can be generated using the fftw-wisdom tool that is part of the fftw installation. "// &
278 "cp2k/tools/cp2k-wisdom is a script that contains some additional info, and can help "// &
279 "to generate a useful default for /etc/fftw/wisdom or particular values for a given simulation.", &
280 usage=
"FFTW_WISDOM_FILE_NAME wisdom.dat", default_lc_val=
"/etc/fftw/wisdom")
284 CALL keyword_create(keyword, __location__, name=
"FFTW_PLAN_TYPE", &
285 description=
"FFTW can have improved performance if it is allowed to plan with "// &
286 "explicit measurements which strategy is best for a given FFT. "// &
287 "While a plan based on measurements is generally faster, "// &
288 "differences in machine load will lead to different plans for the same input file, "// &
289 "and thus numerics for the FFTs will be slightly different from run to run. "// &
290 "PATIENT planning is recommended for long ab initio MD runs.", &
291 usage=
"FFTW_PLAN_TYPE PATIENT", &
295 enum_c_vals=
s2a(
"ESTIMATE", &
299 enum_desc=
s2a(
"Quick estimate, no runtime measurements.", &
300 "Quick measurement, somewhat faster FFTs.", &
301 "Measurements trying a wider range of possibilities.", &
302 "Measurements trying all possibilities - use with caution."))
306 CALL keyword_create(keyword, __location__, name=
"EXTENDED_FFT_LENGTHS", &
307 description=
"Use fft library specific values for the allows number of points in FFTs. "// &
308 "The default is to use the internal FFT lengths. For external fft libraries this may "// &
309 "create an error at the external library level, because the length provided by cp2k is "// &
310 "not supported by the external library. In this case switch on this keyword "// &
311 "to obtain, with certain fft libraries, lengths matching the external fft library lengths, or "// &
312 "larger allowed grids, or grids that more precisely match a given cutoff. "// &
313 "IMPORTANT NOTE: in this case, the actual grids used in CP2K depends on the FFT library. "// &
314 "A change of FFT library must therefore be considered equivalent to a change of basis, "// &
315 "which implies a change of total energy.", &
316 usage=
"EXTENDED_FFT_LENGTHS", &
317 default_l_val=.false., lone_keyword_l_val=.true.)
321 CALL keyword_create(keyword, __location__, name=
"FFT_POOL_SCRATCH_LIMIT", &
322 description=
"Limits the memory usage of the FFT scratch pool, potentially reducing efficiency a bit", &
323 usage=
"FFT_POOL_SCRATCH_LIMIT {INTEGER}", default_i_val=15)
328 description=
"All-to-all communication (FFT) should use single precision", &
329 usage=
"ALLTOALL_SGL YES", &
330 default_l_val=.false., lone_keyword_l_val=.true.)
335 variants=(/
"IOLEVEL"/), &
336 description=
"How much output is written out.", &
337 usage=
"PRINT_LEVEL HIGH", &
339 s2a(
"SILENT",
"LOW",
"MEDIUM",
"HIGH",
"DEBUG"), &
340 enum_desc=
s2a(
"Almost no output", &
341 "Little output",
"Quite some output",
"Lots of output", &
342 "Everything is written out, useful for debugging purposes only"), &
349 keyword, __location__, name=
"PROGRAM_NAME", &
350 variants=(/
"PROGRAM"/), &
351 description=
"Which program should be run", &
352 usage=
"PROGRAM_NAME {STRING}", &
353 enum_c_vals=
s2a(
"ATOM",
"FARMING",
"TEST",
"CP2K",
"OPTIMIZE_INPUT",
"OPTIMIZE_BASIS",
"TMC",
"MC_ANALYSIS",
"SWARM"), &
354 enum_desc=
s2a(
"Runs single atom calculations", &
355 "Runs N independent jobs in a single run", &
356 "Do some benchmarking and testing", &
357 "Runs one of the CP2K package", &
358 "A tool to optimize parameters in a CP2K input", &
359 "A tool to create a MOLOPT or ADMM basis for a given set"// &
360 " of training structures", &
361 "Runs Tree Monte Carlo algorithm using additional input file(s)", &
362 "Runs (Tree) Monte Carlo trajectory file analysis", &
363 "Runs swarm based calculation"), &
371 variants=(/
"PROJECT"/), &
372 description=
"Name of the project (used to build the name of the "// &
373 "trajectory, and other files generated by the program)", &
374 usage=
"PROJECT_NAME {STRING}", &
375 default_c_val=
"PROJECT")
379 CALL keyword_create(keyword, __location__, name=
"OUTPUT_FILE_NAME", &
380 description=
"Name of the output file. "// &
381 "Relevant only if automatically started (through farming for example). "// &
382 "If empty uses the project name as basis for it.", &
383 usage=
"OUTPUT_FILE_NAME {filename}", default_lc_val=
"")
388 keyword, __location__, name=
"RUN_TYPE", &
389 description=
"Type of run that you want to perform Geometry "// &
390 "optimization, md, montecarlo,...", &
391 usage=
"RUN_TYPE MD", &
394 enum_c_vals=
s2a(
"NONE",
"ENERGY",
"ENERGY_FORCE",
"MD",
"GEO_OPT", &
395 "MC",
"DEBUG",
"BSSE",
"LR",
"PINT",
"VIBRATIONAL_ANALYSIS", &
396 "BAND",
"CELL_OPT",
"WFN_OPT",
"WAVEFUNCTION_OPTIMIZATION", &
397 "MOLECULAR_DYNAMICS",
"GEOMETRY_OPTIMIZATION",
"MONTECARLO", &
398 "LINEAR_RESPONSE",
"NORMAL_MODES",
"RT_PROPAGATION", &
399 "EHRENFEST_DYN",
"TAMC",
"TMC",
"DRIVER",
"NEGF"), &
406 enum_desc=
s2a(
"Perform no tasks",
"Computes energy",
"Computes energy and forces", &
407 "Molecular Dynamics",
"Geometry Optimization",
"Monte Carlo", &
408 "Performs a Debug analysis",
"Basis set superposition error",
"Linear Response", &
409 "Path integral",
"Vibrational analysis",
"Band methods", &
410 "Cell optimization. Both cell vectors and atomic positions are optimised.", &
411 "Alias for ENERGY",
"Alias for ENERGY",
"Alias for MD",
"Alias for GEO_OPT", &
412 "Alias for MC",
"Alias for LR",
"Alias for VIBRATIONAL_ANALYSIS", &
413 "Real Time propagation run (fixed ionic positions)", &
414 "Ehrenfest dynamics (using real time propagation of the wavefunction)", &
415 "Temperature Accelerated Monte Carlo (TAMC)", &
416 "Tree Monte Carlo (TMC), a pre-sampling MC algorithm", &
417 "i-PI driver mode", &
418 "Non-equilibrium Green's function method"))
423 variants=(/
"WALLTI"/), &
424 description=
"Maximum execution time for this run. Time in seconds or in HH:MM:SS.", &
425 usage=
"WALLTIME {real} or {HH:MM:SS}", default_lc_val=
"")
430 description=
"If the input should be echoed to the output with all the "// &
431 "defaults made explicit", &
432 usage=
"ECHO_INPUT NO", default_l_val=.false., lone_keyword_l_val=.true.)
436 CALL keyword_create(keyword, __location__, name=
"ECHO_ALL_HOSTS", &
437 description=
"Echo a list of hostname and pid for all MPI processes.", &
438 usage=
"ECHO_ALL_HOSTS NO", default_l_val=.false., lone_keyword_l_val=.true.)
442 CALL keyword_create(keyword, __location__, name=
"ENABLE_MPI_IO", &
443 description=
"Enable MPI parallelization for all supported I/O routines "// &
444 "Currently, only cube file writer/reader routines use MPI I/O. Disabling "// &
445 "this flag might speed up calculations dominated by I/O.", &
446 usage=
"ENABLE_MPI_IO FALSE", default_l_val=.true., lone_keyword_l_val=.true.)
451 description=
"If a debug trace of the execution of the program should be written", &
453 default_l_val=.false., lone_keyword_l_val=.true.)
458 description=
"For parallel TRACEd runs: only the master node writes output.", &
459 usage=
"TRACE_MASTER", &
460 default_l_val=.true., lone_keyword_l_val=.true.)
465 keyword, __location__, name=
"TRACE_MAX", &
466 description=
"Limit the total number a given subroutine is printed in the trace. Accounting is not influenced.", &
467 usage=
"TRACE_MAX 100", default_i_val=huge(0))
472 keyword, __location__, name=
"TRACE_ROUTINES", &
473 description=
"A list of routines to trace. If left empty all routines are traced. Accounting is not influenced.", &
474 usage=
"TRACE_ROUTINES {routine_name1} {routine_name2} ...", type_of_var=
char_t, &
480 keyword, __location__, name=
"FLUSH_SHOULD_FLUSH", &
481 description=
"Flush output regularly, enabling this option might degrade performance significantly on certain machines.", &
482 usage=
"FLUSH_SHOULD_FLUSH", &
483 default_l_val=.true., lone_keyword_l_val=.true.)
488 description=
"At the end of the run write a callgraph to file, "// &
489 "which contains detailed timing informations. "// &
490 "This callgraph can be viewed e.g. with the open-source program kcachegrind.", &
491 usage=
"CALLGRAPH {NONE|MASTER|ALL}", &
493 enum_c_vals=
s2a(
"NONE",
"MASTER",
"ALL"), &
494 enum_desc=
s2a(
"No callgraph gets written", &
495 "Only the master process writes his callgraph", &
496 "All processes write their callgraph (into a separate files)."), &
501 CALL keyword_create(keyword, __location__, name=
"CALLGRAPH_FILE_NAME", &
502 description=
"Name of the callgraph file, which is written at the end of the run. "// &
503 "If not specified the project name will be used as filename.", &
504 usage=
"CALLGRAPH_FILE_NAME {filename}", default_lc_val=
"")
509 description=
"Initial seed for the global (pseudo)random number generator "// &
510 "to create a stream of normally Gaussian distributed random numbers. "// &
511 "Exactly 1 or 6 positive integer values are expected. A single value is "// &
512 "replicated to fill up the full seed array with 6 numbers.", &
515 usage=
"SEED {INTEGER} .. {INTEGER}", &
516 default_i_vals=(/2000/))
521 description=
"Some sections of the input structure are deallocated when not needed,"// &
522 " and reallocated only when used. This reduces the required maximum memory.", &
524 default_l_val=.false., lone_keyword_l_val=.true.)
529 "Controls the printing of the timing report at the end of CP2K execution", &
533 description=
"Specify % of CPUTIME above which the contribution will be inserted in the"// &
534 " final timing report (e.g. 0.02 = 2%)", &
535 usage=
"THRESHOLD {REAL}", &
536 default_r_val=0.02_dp)
540 CALL keyword_create(keyword, __location__, name=
"SORT_BY_SELF_TIME", &
541 description=
"Sort the final timing report by the average self (exclusive) time instead of the "// &
542 "total (inclusive) time of a routine", &
543 usage=
"SORT_BY_SELF_TIME on", &
544 default_l_val=.false., lone_keyword_l_val=.true.)
548 CALL keyword_create(keyword, __location__, name=
"REPORT_MAXLOC", &
549 description=
"Report the rank with the slowest maximum self timing."// &
550 " Can be used to debug hard- or software."// &
551 " Also enables ECHO_ALL_HOSTS to link rank to hostname.", &
552 usage=
"REPORT_MAXLOC on", &
553 default_l_val=.false., lone_keyword_l_val=.true.)
558 description=
"Include message_passing calls in the timing report (useful with CALLGRAPH).", &
559 usage=
"TIME_MPI .FALSE.", &
560 default_l_val=.true., lone_keyword_l_val=.true.)
564 CALL keyword_create(keyword, __location__, name=
"TIMINGS_LEVEL", &
565 description=
"Specify the level of timings report. "// &
566 "Possible values are: 0 (report only CP2K root timer), 1 (all timers).", &
567 usage=
"TIMINGS_LEVEL 1", &
576 "Controls the printing of the references relevant to the calculations performed", &
582 description=
"controls the printing of initialization controlled by the global section", &
588 "controls the printing of physical and mathematical constants", &
591 CALL keyword_create(keyword, __location__, name=
"BASIC_DATA_TYPES", &
592 description=
"Controls the printing of the basic data types.", &
593 default_l_val=.false., lone_keyword_l_val=.true.)
597 description=
"if the printkey is active prints the physical constants", &
598 default_l_val=.true., lone_keyword_l_val=.true.)
601 CALL keyword_create(keyword, __location__, name=
"SPHERICAL_HARMONICS", &
602 description=
"if the printkey is active prints the spherical harmonics", &
607 description=
"Prints the transformation matrices used by the random number generator", &
608 default_l_val=.false., &
609 lone_keyword_l_val=.true.)
613 description=
"Performs a check of the global (pseudo)random "// &
614 "number generator (RNG) and prints the result", &
615 default_l_val=.false., &
616 lone_keyword_l_val=.true.)
619 CALL keyword_create(keyword, __location__, name=
"GLOBAL_GAUSSIAN_RNG", &
620 description=
"Prints the initial status of the global Gaussian "// &
621 "(pseudo)random number stream which is mostly used for "// &
622 "the velocity initialization", &
623 default_l_val=.false., &
624 lone_keyword_l_val=.true.)
630 NULLIFY (sub_section)
632 CALL create_fm_section(sub_section)
640 CALL create_fm_diag_rules_section(sub_section)
644 CALL create_grid_section(sub_section)
656 SUBROUTINE create_fm_section(section)
659 INTEGER :: default_matmul
662 cpassert(.NOT.
ASSOCIATED(section))
664 description=
"Configuration options for the full matrices.", &
665 n_keywords=1, n_subsections=0, repeats=.false.)
670 description=
"Defines the number of rows per scalapack block in "// &
671 "the creation of block cyclic dense matrices. "// &
672 "Use an internal default if zero or negative.", &
678 description=
"Defines the number of columns per scalapack block in "// &
679 "the creation of vlock cyclic dense matrices. "// &
680 "Use an internal default if zero or negative.", &
685 CALL keyword_create(keyword, __location__, name=
"FORCE_BLOCK_SIZE", &
686 description=
"Ensure for small matrices that the layout is compatible "// &
687 "with bigger ones, i.e. no subdivision is performed (can break LAPACK).", &
688 usage=
"FORCE_BLOCK_SIZE", &
689 default_l_val=.false., lone_keyword_l_val=.true.)
699 CALL keyword_create(keyword, __location__, name=
"TYPE_OF_MATRIX_MULTIPLICATION", &
700 description=
"Allows to switch between scalapack pxgemm and COSMA pxgemm. "// &
701 "COSMA reduces the communication costs but increases the memory demands. "// &
702 "The performance of Scalapack's pxgemm on GPU's depends "// &
703 "crucially on the BLOCK_SIZES. Make sure optimized kernels are available.", &
704 default_i_val=default_matmul, &
706 enum_c_vals=
s2a(
"SCALAPACK",
"PDGEMM",
"COSMA"), &
707 enum_desc=
s2a(
"Standard ScaLAPACK pdgemm", &
708 "Alias for ScaLAPACK", &
709 "COSMA is employed. See <https://github.com/eth-cscs/COSMA>."))
714 END SUBROUTINE create_fm_section
721 SUBROUTINE create_fm_diag_rules_section(section)
726 cpassert(.NOT.
ASSOCIATED(section))
727 CALL section_create(section, __location__, name=
"FM_DIAG_SETTINGS", &
728 description=
"This section defines a set of heuristic rules which are "// &
729 "used to calculate the optimal number of CPUs, M, needed to diagonalize a "// &
730 "full matrix distributed on N processors (FM type). If M < N, the matrix "// &
731 "is redistributed onto M processors before it is diagonalized. "// &
732 "The optimal value is calculate according to M = ((K+a*x-1)/(a*x))*a, "// &
733 "where K is the size of the matrix, and {a, x} are integers defined below. "// &
734 "The default values have been selected based on timings on a Cray XE6. "// &
735 "Supports diagonalization libraries SL and ELPA (see keyword ELPA_FORCE_REDISTRIBUTE).", &
736 n_keywords=3, n_subsections=0, repeats=.false.)
741 description=
"Parameter used for defining the rule which determines the optimal "// &
742 "number of CPUs needed to diagonalize a full distributed matrix. The optimal "// &
743 "number of CPUs will be an integer multiple of this variable.", &
744 usage=
"PARAMETER_A 4", type_of_var=
integer_t, &
750 description=
"Parameter used for defining the rule which determines the optimal "// &
751 "number of CPUs needed to diagonalize a full distributed matrix. The optimal "// &
752 "number of CPUs will be roughly proportional to this value.", &
753 usage=
"PARAMETER_X 60", type_of_var=
integer_t, &
758 CALL keyword_create(keyword, __location__, name=
"PRINT_FM_REDISTRIBUTE", &
759 description=
"Controls printing of information related to this section. For each "// &
760 "diagonalized matrix, prints the size of the matrix, the optimal number of CPUs, "// &
761 "as well as notifies if the matrix was redistributed. Useful for testing.", &
762 usage=
"PRINT_FM_REDISTRIBUTE", type_of_var=
logical_t, &
763 default_l_val=.false., lone_keyword_l_val=.true.)
767 CALL keyword_create(keyword, __location__, name=
"ELPA_FORCE_REDISTRIBUTE", &
768 description=
"Controls how to perform redistribution when ELPA is used for diagonalization. "// &
769 "By default, redistribution is always performed using the defined rules. "// &
770 "By turning off this keyword, matrices are redistributed only to prevent crashes in the ELPA "// &
771 "library which happens when the original matrix is distributed over too many processors.", &
772 usage=
"ELPA_FORCE_REDISTRIBUTE", type_of_var=
logical_t, &
773 default_l_val=.true., lone_keyword_l_val=.true.)
777 END SUBROUTINE create_fm_diag_rules_section
784 SUBROUTINE create_grid_section(section)
789 cpassert(.NOT.
ASSOCIATED(section))
791 description=
"Configuration options for the grid library, "// &
792 "which performs e.g. the collocate and integrate of the GPW method.", &
793 n_keywords=1, n_subsections=0, repeats=.false.)
797 description=
"Selects the backed used by the grid library.", &
801 enum_c_vals=
s2a(
"AUTO",
"REFERENCE",
"CPU",
"DGEMM",
"GPU",
"HIP"), &
802 enum_desc=
s2a(
"Let the grid library pick the backend automatically", &
803 "Reference backend implementation", &
804 "Optimized CPU backend", &
805 "Alternative CPU backend based on DGEMM", &
806 "GPU backend optimized for CUDA that also supports HIP", &
807 "HIP backend optimized for ROCm"))
812 description=
"When enabled the reference backend is run in shadow mode "// &
813 "and its results are compared with those from the selected backend. "// &
814 "If the two results differ by too much then the calculation is aborted.", &
815 default_l_val=.false., lone_keyword_l_val=.true.)
820 description=
"When enabled the cpu backend "// &
821 "apply a spherical cutoff on the top of the cube. "// &
822 "There is a performance penalty using it in "// &
823 "combination with the cpu backend but it is on by "// &
824 "default for the regtests", default_l_val=.true., &
825 lone_keyword_l_val=.true.)
829 END SUBROUTINE create_grid_section
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public schonherr2014
integer, save, public frigo2005
integer, save, public ceriotti2014
methods related to the blacs parallel environment
integer, parameter, public blacs_grid_row
integer, parameter, public blacs_grid_col
integer, parameter, public blacs_grid_square
Routines that link DBCSR and CP2K concepts together.
subroutine, public create_dbcsr_section(section)
Creates the dbcsr section for configuring DBCSR.
various cholesky decomposition related routines
integer, parameter, public fm_cholesky_type_dlaf
integer, parameter, public fm_cholesky_type_default
integer, parameter, public fm_cholesky_type_scalapack
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
integer, parameter, public fm_diag_type_cusolver
integer, parameter, public fm_diag_type_dlaf
integer, parameter, public fm_diag_type_scalapack
integer, parameter, public fm_diag_type_default
integer, parameter, public fm_diag_type_elpa
character(len=14), dimension(1), parameter, public elpa_kernel_names
character(len=44), dimension(1), parameter, public elpa_kernel_descriptions
integer, dimension(1), parameter, public elpa_kernel_ids
represent the structure of a full matrix
integer function, public cp_fm_struct_get_nrow_block()
...
integer function, public cp_fm_struct_get_ncol_block()
...
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer, parameter, public debug_print_level
integer, parameter, public low_print_level
integer, parameter, public medium_print_level
integer, parameter, public high_print_level
integer, parameter, public add_last_numeric
integer, parameter, public silent_print_level
subroutine, public cp_print_key_section_create(print_key_section, location, name, description, print_level, each_iter_names, each_iter_values, add_last, filename, common_iter_levels, citations, unit_str)
creates a print_key section
Fortran API for the grid package, which is written in C.
integer, parameter, public grid_backend_auto
integer, parameter, public grid_backend_gpu
integer, parameter, public grid_backend_hip
integer, parameter, public grid_backend_dgemm
integer, parameter, public grid_backend_cpu
integer, parameter, public grid_backend_ref
Defines the basic variable types.
integer, parameter, public dp
Utilities for string manipulations.
Timing routines for accounting.
integer, parameter, public default_timings_level