72#include "./base/base_uses.f90" 
   77   LOGICAL, 
PRIVATE, 
PARAMETER :: debug_this_module = .true.
 
   78   CHARACTER(len=*), 
PARAMETER, 
PRIVATE :: moduleN = 
'input_cp2k_global' 
   92      INTEGER                                            :: default_dgemm
 
   97      cpassert(.NOT. 
ASSOCIATED(section))
 
   99                          description=
"Section with general information regarding which kind "// &
 
  100                          "of simulation to perform an parameters for the whole PROGRAM", &
 
  101                          n_keywords=7, n_subsections=0, repeats=.false.)
 
  105                          description=
"how to distribute the processors on the 2d grid needed "// &
 
  106                          "by BLACS (and thus SCALAPACK)", usage=
"BLACS_GRID SQUARE", &
 
  108                          enum_desc=
s2a(
"Distribution by matrix blocks", 
"Distribution by matrix rows", &
 
  109                                        "Distribution by matrix columns"), &
 
  114      CALL keyword_create(keyword, __location__, name=
"BLACS_REPEATABLE", &
 
  115                          description=
"Use a topology for BLACS collectives that is guaranteed to be repeatable "// &
 
  116                          "on homogeneous architectures", &
 
  117                          usage=
"BLACS_REPEATABLE", &
 
  118                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  122      CALL keyword_create(keyword, __location__, name=
"PREFERRED_DIAG_LIBRARY", &
 
  123                          description=
"Specifies the diagonalization library to be used. If not available, "// &
 
  124                          "the ScaLAPACK library is used", &
 
  125                          usage=
"PREFERRED_DIAG_LIBRARY ELPA", &
 
  131                          enum_c_vals=
s2a(
"ELPA", 
"ScaLAPACK", 
"SL", 
"CUSOLVER", 
"DLAF"), &
 
  132                          enum_desc=
s2a(
"ELPA library", &
 
  133                                        "ScaLAPACK library", &
 
  134                                        "ScaLAPACK library (shorthand)", &
 
  135                                        "cuSOLVER (CUDA GPU library)", &
 
  136                                        "DLA-Future (CUDA/HIP GPU library)"), &
 
  141      CALL keyword_create(keyword, __location__, name=
"PREFERRED_CHOLESKY_LIBRARY", &
 
  142                          description=
"Specifies Cholesky decomposition library to be used. If not available, "// &
 
  143                          "the ScaLAPACK library is used", &
 
  144                          usage=
"PREFERRED_CHOLESKY_LIBRARY DLAF", &
 
  148                          enum_c_vals=
s2a(
"ScaLAPACK", 
"SL", 
"DLAF"), &
 
  149                          enum_desc=
s2a(
"ScaLAPACK library", &
 
  150                                        "ScaLAPACK library (shorthand)", &
 
  151                                        "DLA-Future (CUDA/HIP GPU library)"), &
 
  156#if defined(__SPLA) && defined(__OFFLOAD_GEMM) 
  161      CALL keyword_create(keyword, __location__, name=
"PREFERRED_DGEMM_LIBRARY", &
 
  162                          description=
"Specifies the DGEMM library to be used. If not available, "// &
 
  163                          "the BLAS routine is used. This keyword affects some DGEMM calls in the WFC code and turns on their "// &
 
  164                          "acceleration with SpLA. This keyword affects only local DGEMM calls, not the calls to PDGEMM "// &
 
  165                          "(see keyword FM%TYPE_OF_MATRIX_MULTIPLICATION).", &
 
  166                          usage=
"PREFERRED_DGEMM_LIBRARY SPLA", &
 
  167                          default_i_val=default_dgemm, &
 
  169                          enum_c_vals=
s2a(
"SPLA", 
"BLAS"), &
 
  170                          enum_desc=
s2a(
"SPLA library", 
"BLAS library"))
 
  174      CALL keyword_create(keyword, __location__, name=
"EPS_CHECK_DIAG", &
 
  175                          description=
"Check that the orthonormality of the eigenvectors after a diagonalization "// &
 
  176                          "fulfills the specified numerical accuracy. A negative threshold value disables the check.", &
 
  177                          usage=
"EPS_CHECK_DIAG 1.0E-14", &
 
  178                          default_r_val=-1.0_dp)
 
  183                          description=
"Specifies the kernel to be used when ELPA is in use", &
 
  191      CALL keyword_create(keyword, __location__, name=
"ELPA_NEIGVEC_MIN", &
 
  192                          description=
"Minimum number of eigenvectors for the use of the eigensolver from "// &
 
  193                          "the ELPA library. The eigensolver from the ScaLAPACK library is used as fallback "// &
 
  194                          "for all smaller cases", &
 
  195                          usage=
"ELPA_NEIGVEC_MIN 32", &
 
  201                          description=
"For ELPA, enable a blocked QR step when reducing the input matrix "// &
 
  202                          "to banded form in preparation for the actual diagonalization step. "// &
 
  203                          "See implementation paper for more details. Requires ELPA version 201505 or newer, "// &
 
  204                          "automatically deactivated otherwise. If true, QR is activated only when the "// &
 
  205                          "the size of the diagonalized matrix is suitable. Print key PRINT_ELPA is "// &
 
  206                          "useful in determining which matrices are suitable for QR. Might accelerate the "// &
 
  207                          "diagonalization of suitable matrices.", &
 
  209                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  213      CALL keyword_create(keyword, __location__, name=
"ELPA_QR_UNSAFE", &
 
  214                          description=
"For ELPA, disable block size limitations when used together with ELPA_QR. "// &
 
  215                          "Keyword relevant only with ELPA versions 201605 or newer. Use keyword with caution, "// &
 
  216                          "as it might result in wrong eigenvalues with some matrix orders/block sizes "// &
 
  217                          "when the number of MPI processes is varied. If the print key PRINT_ELPA is "// &
 
  218                          "active the validity of the eigenvalues is checked against values calculated without "// &
 
  220                          usage=
"ELPA_QR_UNSAFE", &
 
  221                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  226                                       description=
"Controls the printing of ELPA diagonalization information. "// &
 
  227                                       "Useful for testing purposes, especially together with keyword ELPA_QR.", &
 
  228                                       filename=
"__STD_OUT__")
 
  232      CALL keyword_create(keyword, __location__, name=
"DLAF_NEIGVEC_MIN", &
 
  233                          description=
"Minimum number of eigenvectors for the use of the eigensolver from "// &
 
  234                          "the DLA-Future library. The eigensolver from the ScaLAPACK library is used as fallback "// &
 
  235                          "for all smaller cases", &
 
  236                          usage=
"DLAF_NEIGVEC_MIN 512", &
 
  241      CALL keyword_create(keyword, __location__, name=
"DLAF_CHOLESKY_N_MIN", &
 
  242                          description=
"Minimum matrix size for the use of the Cholesky decomposition from "// &
 
  243                          "the DLA-Future library. The Cholesky decomposition from the ScaLAPACK library is used as fallback "// &
 
  244                          "for all smaller cases", &
 
  245                          usage=
"DLAF_CHOLESKY_N_MIN 512", &
 
  251         keyword, __location__, name=
"PREFERRED_FFT_LIBRARY", &
 
  252         description=
"Specifies the FFT library which should be preferred. "// &
 
  253         "If it is not available, use FFTW3 if this is linked in, if FFTW3 is not available use FFTSG. "// &
 
  254         "Improved performance with FFTW3 can be obtained specifying a proper value for FFTW_PLAN_TYPE. "// &
 
  255         "Contrary to earlier CP2K versions, all libraries will result in the same grids, "// &
 
  256         "i.e. the subset of grids which all FFT libraries can transform. "// &
 
  257         "See EXTENDED_FFT_LENGTHS if larger FFTs or grids that more precisely match a given cutoff are needed, "// &
 
  258         "or older results need to be reproduced. "// &
 
  259         "FFTW3 is often (close to) optimal, and well tested with CP2K.", &
 
  260         usage=
"PREFERRED_FFT_LIBRARY FFTW3", &
 
  264         enum_c_vals=
s2a(
"FFTSG", 
"FFTW3", 
"FFTW"), &
 
  265         enum_desc=
s2a(
"Stefan Goedecker's FFT (FFTSG), always available, "// &
 
  266                       "will be used in case a FFT library is specified and not available.", &
 
  267                       "a fast portable FFT library. Recommended. "// &
 
  268                       "See also the FFTW_PLAN_TYPE, and FFTW_WISDOM_FILE_NAME keywords.", &
 
  269                       "Same as FFTW3 (for compatibility with CP2K 2.3)"))
 
  273      CALL keyword_create(keyword, __location__, name=
"FFTW_WISDOM_FILE_NAME", &
 
  274                          description=
"The name of the file that contains wisdom (pre-planned FFTs) for use with FFTW3. "// &
 
  275                          "Using wisdom can significantly speed up the FFTs (see the FFTW homepage for details). "// &
 
  276                          "Note that wisdom is not transferable between different computer (architectures). "// &
 
  277                          "Wisdom can be generated using the fftw-wisdom tool that is part of the fftw installation. "// &
 
  278                          "cp2k/tools/cp2k-wisdom is a script that contains some additional info, and can help "// &
 
  279                          "to generate a useful default for /etc/fftw/wisdom or particular values for a given simulation.", &
 
  280                          usage=
"FFTW_WISDOM_FILE_NAME wisdom.dat", default_lc_val=
"/etc/fftw/wisdom")
 
  284      CALL keyword_create(keyword, __location__, name=
"FFTW_PLAN_TYPE", &
 
  285                          description=
"FFTW can have improved performance if it is allowed to plan with "// &
 
  286                          "explicit measurements which strategy is best for a given FFT. "// &
 
  287                          "While a plan based on measurements is generally faster, "// &
 
  288                          "differences in machine load will lead to different plans for the same input file, "// &
 
  289                          "and thus numerics for the FFTs will be slightly different from run to run. "// &
 
  290                          "PATIENT planning is recommended for long ab initio MD runs.", &
 
  291                          usage=
"FFTW_PLAN_TYPE PATIENT", &
 
  295                          enum_c_vals=
s2a(
"ESTIMATE", &
 
  299                          enum_desc=
s2a(
"Quick estimate, no runtime measurements.", &
 
  300                                        "Quick measurement, somewhat faster FFTs.", &
 
  301                                        "Measurements trying a wider range of possibilities.", &
 
  302                                        "Measurements trying all possibilities - use with caution."))
 
  306      CALL keyword_create(keyword, __location__, name=
"EXTENDED_FFT_LENGTHS", &
 
  307                          description=
"Use fft library specific values for the allows number of points in FFTs. "// &
 
  308                          "The default is to use the internal FFT lengths. For external fft libraries this may "// &
 
  309                          "create an error at the external library level, because the length provided by cp2k is "// &
 
  310                          "not supported by the external library. In this case switch on this keyword "// &
 
  311                          "to obtain, with certain fft libraries, lengths matching the external fft library lengths, or "// &
 
  312                          "larger allowed grids, or grids that more precisely match a given cutoff. "// &
 
  313                          "IMPORTANT NOTE: in this case, the actual grids used in CP2K depends on the FFT library. "// &
 
  314                          "A change of FFT library must therefore be considered equivalent to a change of basis, "// &
 
  315                          "which implies a change of total energy.", &
 
  316                          usage=
"EXTENDED_FFT_LENGTHS", &
 
  317                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  321      CALL keyword_create(keyword, __location__, name=
"FFT_POOL_SCRATCH_LIMIT", &
 
  322                          description=
"Limits the memory usage of the FFT scratch pool, potentially reducing efficiency a bit", &
 
  323                          usage=
"FFT_POOL_SCRATCH_LIMIT {INTEGER}", default_i_val=15)
 
  328                          description=
"All-to-all communication (FFT) should use single precision", &
 
  329                          usage=
"ALLTOALL_SGL YES", &
 
  330                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  335                          variants=(/
"IOLEVEL"/), &
 
  336                          description=
"How much output is written out.", &
 
  337                          usage=
"PRINT_LEVEL HIGH", &
 
  339                          s2a(
"SILENT", 
"LOW", 
"MEDIUM", 
"HIGH", 
"DEBUG"), &
 
  340                          enum_desc=
s2a(
"Almost no output", &
 
  341                                        "Little output", 
"Quite some output", 
"Lots of output", &
 
  342                                        "Everything is written out, useful for debugging purposes only"), &
 
  349         keyword, __location__, name=
"PROGRAM_NAME", &
 
  350         variants=(/
"PROGRAM"/), &
 
  351         description=
"Which program should be run", &
 
  352         usage=
"PROGRAM_NAME {STRING}", &
 
  353         enum_c_vals=
s2a(
"ATOM", 
"FARMING", 
"TEST", 
"CP2K", 
"OPTIMIZE_INPUT", 
"OPTIMIZE_BASIS", 
"TMC", 
"MC_ANALYSIS", 
"SWARM"), &
 
  354         enum_desc=
s2a(
"Runs single atom calculations", &
 
  355                       "Runs N independent jobs in a single run", &
 
  356                       "Do some benchmarking and testing", &
 
  357                       "Runs one of the CP2K package", &
 
  358                       "A tool to optimize parameters in a CP2K input", &
 
  359                       "A tool to create a MOLOPT or ADMM basis for a given set"// &
 
  360                       " of training structures", &
 
  361                       "Runs Tree Monte Carlo algorithm using additional input file(s)", &
 
  362                       "Runs (Tree) Monte Carlo trajectory file analysis", &
 
  363                       "Runs swarm based calculation"), &
 
  371                          variants=(/
"PROJECT"/), &
 
  372                          description=
"Name of the project (used to build the name of the "// &
 
  373                          "trajectory, and other files generated by the program)", &
 
  374                          usage=
"PROJECT_NAME {STRING}", &
 
  375                          default_c_val=
"PROJECT")
 
  379      CALL keyword_create(keyword, __location__, name=
"OUTPUT_FILE_NAME", &
 
  380                          description=
"Name of the output file. "// &
 
  381                          "Relevant only if automatically started (through farming for example). "// &
 
  382                          "If empty uses the project name as basis for it.", &
 
  383                          usage=
"OUTPUT_FILE_NAME {filename}", default_lc_val=
"")
 
  388         keyword, __location__, name=
"RUN_TYPE", &
 
  389         description=
"Type of run that you want to perform Geometry "// &
 
  390         "optimization, md, montecarlo,...", &
 
  391         usage=
"RUN_TYPE MD", &
 
  394         enum_c_vals=
s2a(
"NONE", 
"ENERGY", 
"ENERGY_FORCE", 
"MD", 
"GEO_OPT", &
 
  395                         "MC", 
"DEBUG", 
"BSSE", 
"LR", 
"PINT", 
"VIBRATIONAL_ANALYSIS", &
 
  396                         "BAND", 
"CELL_OPT", 
"WFN_OPT", 
"WAVEFUNCTION_OPTIMIZATION", &
 
  397                         "MOLECULAR_DYNAMICS", 
"GEOMETRY_OPTIMIZATION", 
"MONTECARLO", &
 
  398                         "LINEAR_RESPONSE", 
"NORMAL_MODES", 
"RT_PROPAGATION", &
 
  399                         "EHRENFEST_DYN", 
"TAMC", 
"TMC", 
"DRIVER", 
"NEGF"), &
 
  406         enum_desc=
s2a(
"Perform no tasks", 
"Computes energy", 
"Computes energy and forces", &
 
  407                       "Molecular Dynamics", 
"Geometry Optimization", 
"Monte Carlo", &
 
  408                       "Performs a Debug analysis", 
"Basis set superposition error", 
"Linear Response", &
 
  409                       "Path integral", 
"Vibrational analysis", 
"Band methods", &
 
  410                       "Cell optimization. Both cell vectors and atomic positions are optimised.", &
 
  411                       "Alias for ENERGY", 
"Alias for ENERGY", 
"Alias for MD", 
"Alias for GEO_OPT", &
 
  412                       "Alias for MC", 
"Alias for LR", 
"Alias for VIBRATIONAL_ANALYSIS", &
 
  413                       "Real Time propagation run (fixed ionic positions)", &
 
  414                       "Ehrenfest dynamics (using real time propagation of the wavefunction)", &
 
  415                       "Temperature Accelerated Monte Carlo (TAMC)", &
 
  416                       "Tree Monte Carlo (TMC), a pre-sampling MC algorithm", &
 
  417                       "i-PI driver mode", &
 
  418                       "Non-equilibrium Green's function method"))
 
  423                          variants=(/
"WALLTI"/), &
 
  424                          description=
"Maximum execution time for this run. Time in seconds or in HH:MM:SS.", &
 
  425                          usage=
"WALLTIME {real} or {HH:MM:SS}", default_lc_val=
"")
 
  430                          description=
"If the input should be echoed to the output with all the "// &
 
  431                          "defaults made explicit", &
 
  432                          usage=
"ECHO_INPUT NO", default_l_val=.false., lone_keyword_l_val=.true.)
 
  436      CALL keyword_create(keyword, __location__, name=
"ECHO_ALL_HOSTS", &
 
  437                          description=
"Echo a list of hostname and pid for all MPI processes.", &
 
  438                          usage=
"ECHO_ALL_HOSTS NO", default_l_val=.false., lone_keyword_l_val=.true.)
 
  442      CALL keyword_create(keyword, __location__, name=
"ENABLE_MPI_IO", &
 
  443                          description=
"Enable MPI parallelization for all supported I/O routines "// &
 
  444                          "Currently, only cube file writer/reader routines use MPI I/O. Disabling "// &
 
  445                          "this flag might speed up calculations dominated by I/O.", &
 
  446                          usage=
"ENABLE_MPI_IO FALSE", default_l_val=.true., lone_keyword_l_val=.true.)
 
  451                          description=
"If a debug trace of the execution of the program should be written", &
 
  453                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  458                          description=
"For parallel TRACEd runs: only the master node writes output.", &
 
  459                          usage=
"TRACE_MASTER", &
 
  460                          default_l_val=.true., lone_keyword_l_val=.true.)
 
  465         keyword, __location__, name=
"TRACE_MAX", &
 
  466         description=
"Limit the total number a given subroutine is printed in the trace. Accounting is not influenced.", &
 
  467         usage=
"TRACE_MAX 100", default_i_val=huge(0))
 
  472         keyword, __location__, name=
"TRACE_ROUTINES", &
 
  473         description=
"A list of routines to trace. If left empty all routines are traced. Accounting is not influenced.", &
 
  474         usage=
"TRACE_ROUTINES {routine_name1} {routine_name2} ...", type_of_var=
char_t, &
 
  480         keyword, __location__, name=
"FLUSH_SHOULD_FLUSH", &
 
  481         description=
"Flush output regularly, enabling this option might degrade performance significantly on certain machines.", &
 
  482         usage=
"FLUSH_SHOULD_FLUSH", &
 
  483         default_l_val=.true., lone_keyword_l_val=.true.)
 
  488                          description=
"At the end of the run write a callgraph to file, "// &
 
  489                          "which contains detailed timing informations. "// &
 
  490                          "This callgraph can be viewed e.g. with the open-source program kcachegrind.", &
 
  491                          usage=
"CALLGRAPH {NONE|MASTER|ALL}", &
 
  493                          enum_c_vals=
s2a(
"NONE", 
"MASTER", 
"ALL"), &
 
  494                          enum_desc=
s2a(
"No callgraph gets written", &
 
  495                                        "Only the master process writes his callgraph", &
 
  496                                        "All processes write their callgraph (into a separate files)."), &
 
  501      CALL keyword_create(keyword, __location__, name=
"CALLGRAPH_FILE_NAME", &
 
  502                          description=
"Name of the callgraph file, which is written at the end of the run. "// &
 
  503                          "If not specified the project name will be used as filename.", &
 
  504                          usage=
"CALLGRAPH_FILE_NAME {filename}", default_lc_val=
"")
 
  509                          description=
"Initial seed for the global (pseudo)random number generator "// &
 
  510                          "to create a stream of normally Gaussian distributed random numbers. "// &
 
  511                          "Exactly 1 or 6 positive integer values are expected. A single value is "// &
 
  512                          "replicated to fill up the full seed array with 6 numbers.", &
 
  515                          usage=
"SEED {INTEGER} .. {INTEGER}", &
 
  516                          default_i_vals=(/2000/))
 
  521                          description=
"Some sections of the input structure are deallocated when not needed,"// &
 
  522                          " and reallocated only when used. This reduces the required maximum memory.", &
 
  524                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  529                                       "Controls the printing of the timing report at the end of CP2K execution", &
 
  533                          description=
"Specify % of CPUTIME above which the contribution will be inserted in the"// &
 
  534                          " final timing report (e.g. 0.02 = 2%)", &
 
  535                          usage=
"THRESHOLD {REAL}", &
 
  536                          default_r_val=0.02_dp)
 
  540      CALL keyword_create(keyword, __location__, name=
"SORT_BY_SELF_TIME", &
 
  541                          description=
"Sort the final timing report by the average self (exclusive) time instead of the "// &
 
  542                          "total (inclusive) time of a routine", &
 
  543                          usage=
"SORT_BY_SELF_TIME on", &
 
  544                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  548      CALL keyword_create(keyword, __location__, name=
"REPORT_MAXLOC", &
 
  549                          description=
"Report the rank with the slowest maximum self timing."// &
 
  550                          " Can be used to debug hard- or software."// &
 
  551                          " Also enables ECHO_ALL_HOSTS to link rank to hostname.", &
 
  552                          usage=
"REPORT_MAXLOC on", &
 
  553                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  558                          description=
"Include message_passing calls in the timing report (useful with CALLGRAPH).", &
 
  559                          usage=
"TIME_MPI .FALSE.", &
 
  560                          default_l_val=.true., lone_keyword_l_val=.true.)
 
  564      CALL keyword_create(keyword, __location__, name=
"TIMINGS_LEVEL", &
 
  565                          description=
"Specify the level of timings report. "// &
 
  566                          "Possible values are: 0 (report only CP2K root timer), 1 (all timers).", &
 
  567                          usage=
"TIMINGS_LEVEL 1", &
 
  576                                       "Controls the printing of the references relevant to the calculations performed", &
 
  582                                       description=
"controls the printing of  initialization controlled by the global section", &
 
  588                                       "controls the printing of physical and mathematical constants", &
 
  591      CALL keyword_create(keyword, __location__, name=
"BASIC_DATA_TYPES", &
 
  592                          description=
"Controls the printing of the basic data types.", &
 
  593                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  597                          description=
"if the printkey is active prints the physical constants", &
 
  598                          default_l_val=.true., lone_keyword_l_val=.true.)
 
  601      CALL keyword_create(keyword, __location__, name=
"SPHERICAL_HARMONICS", &
 
  602                          description=
"if the printkey is active prints the spherical harmonics", &
 
  607                          description=
"Prints the transformation matrices used by the  random number generator", &
 
  608                          default_l_val=.false., &
 
  609                          lone_keyword_l_val=.true.)
 
  613                          description=
"Performs a check of the global (pseudo)random "// &
 
  614                          "number generator (RNG) and prints the result", &
 
  615                          default_l_val=.false., &
 
  616                          lone_keyword_l_val=.true.)
 
  619      CALL keyword_create(keyword, __location__, name=
"GLOBAL_GAUSSIAN_RNG", &
 
  620                          description=
"Prints the initial status of the global Gaussian "// &
 
  621                          "(pseudo)random number stream which is mostly used for "// &
 
  622                          "the velocity initialization", &
 
  623                          default_l_val=.false., &
 
  624                          lone_keyword_l_val=.true.)
 
  630      NULLIFY (sub_section)
 
  632      CALL create_fm_section(sub_section)
 
  640      CALL create_fm_diag_rules_section(sub_section)
 
  644      CALL create_grid_section(sub_section)
 
 
  656   SUBROUTINE create_fm_section(section)
 
  659      INTEGER                                            :: default_matmul
 
  662      cpassert(.NOT. 
ASSOCIATED(section))
 
  664                          description=
"Configuration options for the full matrices.", &
 
  665                          n_keywords=1, n_subsections=0, repeats=.false.)
 
  670                          description=
"Defines the number of rows per scalapack block in "// &
 
  671                          "the creation of block cyclic dense matrices. "// &
 
  672                          "Use an internal default if zero or negative.", &
 
  678                          description=
"Defines the number of columns per scalapack block in "// &
 
  679                          "the creation of vlock cyclic dense matrices. "// &
 
  680                          "Use an internal default if zero or negative.", &
 
  685      CALL keyword_create(keyword, __location__, name=
"FORCE_BLOCK_SIZE", &
 
  686                          description=
"Ensure for small matrices that the layout is compatible "// &
 
  687                          "with bigger ones, i.e. no subdivision is performed (can break LAPACK).", &
 
  688                          usage=
"FORCE_BLOCK_SIZE", &
 
  689                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  699      CALL keyword_create(keyword, __location__, name=
"TYPE_OF_MATRIX_MULTIPLICATION", &
 
  700                          description=
"Allows to switch between scalapack pxgemm and COSMA pxgemm. "// &
 
  701                          "COSMA reduces the communication costs but increases the memory demands. "// &
 
  702                          "The performance of Scalapack's pxgemm on GPU's depends "// &
 
  703                          "crucially on the BLOCK_SIZES. Make sure optimized kernels are available.", &
 
  704                          default_i_val=default_matmul, &
 
  706                          enum_c_vals=
s2a(
"SCALAPACK", 
"PDGEMM", 
"COSMA"), &
 
  707                          enum_desc=
s2a(
"Standard ScaLAPACK pdgemm", &
 
  708                                        "Alias for ScaLAPACK", &
 
  709                                        "COSMA is employed. See <https://github.com/eth-cscs/COSMA>."))
 
  714   END SUBROUTINE create_fm_section
 
  721   SUBROUTINE create_fm_diag_rules_section(section)
 
  726      cpassert(.NOT. 
ASSOCIATED(section))
 
  727      CALL section_create(section, __location__, name=
"FM_DIAG_SETTINGS", &
 
  728                          description=
"This section defines a set of heuristic rules which are "// &
 
  729                          "used to calculate the optimal number of CPUs, M, needed to diagonalize a "// &
 
  730                          "full matrix distributed on N processors (FM type). If M < N, the matrix "// &
 
  731                          "is redistributed onto M processors before it is diagonalized. "// &
 
  732                          "The optimal value is calculate according to M = ((K+a*x-1)/(a*x))*a, "// &
 
  733                          "where K is the size of the matrix, and {a, x} are integers defined below. "// &
 
  734                          "The default values have been selected based on timings on a Cray XE6. "// &
 
  735                          "Supports diagonalization libraries SL and ELPA (see keyword ELPA_FORCE_REDISTRIBUTE).", &
 
  736                          n_keywords=3, n_subsections=0, repeats=.false.)
 
  741                          description=
"Parameter used for defining the rule which determines the optimal "// &
 
  742                          "number of CPUs needed to diagonalize a full distributed matrix. The optimal "// &
 
  743                          "number of CPUs will be an integer multiple of this variable.", &
 
  744                          usage=
"PARAMETER_A 4", type_of_var=
integer_t, &
 
  750                          description=
"Parameter used for defining the rule which determines the optimal "// &
 
  751                          "number of CPUs needed to diagonalize a full distributed matrix. The optimal "// &
 
  752                          "number of CPUs will be roughly proportional to this value.", &
 
  753                          usage=
"PARAMETER_X 60", type_of_var=
integer_t, &
 
  758      CALL keyword_create(keyword, __location__, name=
"PRINT_FM_REDISTRIBUTE", &
 
  759                          description=
"Controls printing of information related to this section. For each "// &
 
  760                          "diagonalized matrix, prints the size of the matrix, the optimal number of CPUs, "// &
 
  761                          "as well as notifies if the matrix was redistributed. Useful for testing.", &
 
  762                          usage=
"PRINT_FM_REDISTRIBUTE", type_of_var=
logical_t, &
 
  763                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  767      CALL keyword_create(keyword, __location__, name=
"ELPA_FORCE_REDISTRIBUTE", &
 
  768                          description=
"Controls how to perform redistribution when ELPA is used for diagonalization. "// &
 
  769                          "By default, redistribution is always performed using the defined rules. "// &
 
  770                          "By turning off this keyword, matrices are redistributed only to prevent crashes in the ELPA "// &
 
  771                          "library which happens when the original matrix is distributed over too many processors.", &
 
  772                          usage=
"ELPA_FORCE_REDISTRIBUTE", type_of_var=
logical_t, &
 
  773                          default_l_val=.true., lone_keyword_l_val=.true.)
 
  777   END SUBROUTINE create_fm_diag_rules_section
 
  784   SUBROUTINE create_grid_section(section)
 
  789      cpassert(.NOT. 
ASSOCIATED(section))
 
  791                          description=
"Configuration options for the grid library, "// &
 
  792                          "which performs e.g. the collocate and integrate of the GPW method.", &
 
  793                          n_keywords=1, n_subsections=0, repeats=.false.)
 
  797                          description=
"Selects the backed used by the grid library.", &
 
  801                          enum_c_vals=
s2a(
"AUTO", 
"REFERENCE", 
"CPU", 
"DGEMM", 
"GPU", 
"HIP"), &
 
  802                          enum_desc=
s2a(
"Let the grid library pick the backend automatically", &
 
  803                                        "Reference backend implementation", &
 
  804                                        "Optimized CPU backend", &
 
  805                                        "Alternative CPU backend based on DGEMM", &
 
  806                                        "GPU backend optimized for CUDA that also supports HIP", &
 
  807                                        "HIP backend optimized for ROCm"))
 
  812                          description=
"When enabled the reference backend is run in shadow mode "// &
 
  813                          "and its results are compared with those from the selected backend. "// &
 
  814                          "If the two results differ by too much then the calculation is aborted.", &
 
  815                          default_l_val=.false., lone_keyword_l_val=.true.)
 
  820                          description=
"When enabled the cpu backend "// &
 
  821                          "apply a spherical cutoff on the top of the cube. "// &
 
  822                          "There is a performance penalty using it in "// &
 
  823                          "combination with the cpu backend but it is on by "// &
 
  824                          "default for the regtests", default_l_val=.true., &
 
  825                          lone_keyword_l_val=.true.)
 
  829   END SUBROUTINE create_grid_section
 
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public schonherr2014
integer, save, public frigo2005
integer, save, public ceriotti2014
methods related to the blacs parallel environment
integer, parameter, public blacs_grid_row
integer, parameter, public blacs_grid_col
integer, parameter, public blacs_grid_square
Routines that link DBCSR and CP2K concepts together.
subroutine, public create_dbcsr_section(section)
Creates the dbcsr section for configuring DBCSR.
various cholesky decomposition related routines
integer, parameter, public fm_cholesky_type_dlaf
integer, parameter, public fm_cholesky_type_default
integer, parameter, public fm_cholesky_type_scalapack
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
integer, parameter, public fm_diag_type_cusolver
integer, parameter, public fm_diag_type_dlaf
integer, parameter, public fm_diag_type_scalapack
integer, parameter, public fm_diag_type_default
integer, parameter, public fm_diag_type_elpa
character(len=14), dimension(1), parameter, public elpa_kernel_names
character(len=44), dimension(1), parameter, public elpa_kernel_descriptions
integer, dimension(1), parameter, public elpa_kernel_ids
represent the structure of a full matrix
integer function, public cp_fm_struct_get_nrow_block()
...
integer function, public cp_fm_struct_get_ncol_block()
...
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer, parameter, public debug_print_level
integer, parameter, public low_print_level
integer, parameter, public medium_print_level
integer, parameter, public high_print_level
integer, parameter, public add_last_numeric
integer, parameter, public silent_print_level
subroutine, public cp_print_key_section_create(print_key_section, location, name, description, print_level, each_iter_names, each_iter_values, add_last, filename, common_iter_levels, citations, unit_str)
creates a print_key section
Fortran API for the grid package, which is written in C.
integer, parameter, public grid_backend_auto
integer, parameter, public grid_backend_gpu
integer, parameter, public grid_backend_hip
integer, parameter, public grid_backend_dgemm
integer, parameter, public grid_backend_cpu
integer, parameter, public grid_backend_ref
Defines the basic variable types.
integer, parameter, public dp
Utilities for string manipulations.
Timing routines for accounting.
integer, parameter, public default_timings_level