(git:ccc2433)
library_tests.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Performance tests for basic tasks like matrix multiplies, copy, fft.
10 !> \par History
11 !> 30-Nov-2000 (JGH) added input
12 !> 02-Jan-2001 (JGH) Parallel FFT
13 !> 28-Feb-2002 (JGH) Clebsch-Gordon Coefficients
14 !> 06-Jun-2003 (JGH) Real space grid test
15 !> Eigensolver test (29.08.05,MK)
16 !> \author JGH 6-NOV-2000
17 ! **************************************************************************************************
19 
20  USE ai_coulomb_test, ONLY: eri_test
21  USE cell_methods, ONLY: cell_create,&
22  init_cell
23  USE cell_types, ONLY: cell_release,&
24  cell_type
25  USE cg_test, ONLY: clebsch_gordon_test
28  cp_blacs_env_type
30  USE cp_files, ONLY: close_file,&
31  open_file
33  USE cp_fm_diag, ONLY: cp_fm_syevd,&
38  cp_fm_struct_type
39  USE cp_fm_types, ONLY: cp_fm_create,&
41  cp_fm_release,&
44  cp_fm_to_fm,&
45  cp_fm_type
47  cp_logger_type
51  USE dbcsr_api, ONLY: dbcsr_reset_randmat_seed,&
52  dbcsr_run_tests
53  USE dbm_tests, ONLY: dbm_run_tests
54  USE fft_tools, ONLY: bwfft,&
56  fwfft,&
57  fft3d,&
59  finalize_fft,&
60  init_fft
61  USE global_types, ONLY: global_environment_type
62  USE input_constants, ONLY: do_diag_syevd,&
65  do_mat_read,&
71  section_vals_type,&
73  USE kinds, ONLY: dp
74  USE machine, ONLY: m_flush,&
76  USE message_passing, ONLY: mp_para_env_type
79  USE mp_perf_test, ONLY: mpi_perf_test
80  USE parallel_gemm_api, ONLY: parallel_gemm
81  USE parallel_rng_types, ONLY: uniform,&
82  rng_stream_type
83  USE pw_grid_types, ONLY: fullspace,&
84  halfspace,&
85  pw_grid_type
86  USE pw_grids, ONLY: pw_grid_create,&
89  USE pw_methods, ONLY: pw_transfer,&
90  pw_zero
91  USE pw_types, ONLY: pw_c1d_gs_type,&
92  pw_c3d_rs_type,&
93  pw_r3d_rs_type
94  USE realspace_grid_types, ONLY: &
95  realspace_grid_desc_type, realspace_grid_input_type, realspace_grid_type, rs_grid_create, &
99 #include "./base/base_uses.f90"
100 
101  IMPLICIT NONE
102 
103  PRIVATE
104  PUBLIC :: lib_test
105 
106  INTEGER :: runtest(100)
107  REAL(KIND=dp) :: max_memory
108  REAL(KIND=dp), PARAMETER :: threshold = 1.0e-8_dp
109  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests'
110 
111 CONTAINS
112 
113 ! **************************************************************************************************
114 !> \brief Master routine for tests
115 !> \param root_section ...
116 !> \param para_env ...
117 !> \param globenv ...
118 !> \par History
119 !> none
120 !> \author JGH 6-NOV-2000
121 ! **************************************************************************************************
122  SUBROUTINE lib_test(root_section, para_env, globenv)
123 
124  TYPE(section_vals_type), POINTER :: root_section
125  TYPE(mp_para_env_type), POINTER :: para_env
126  TYPE(global_environment_type), POINTER :: globenv
127 
128  CHARACTER(LEN=*), PARAMETER :: routinen = 'lib_test'
129 
130  INTEGER :: handle, iw
131  LOGICAL :: explicit
132  TYPE(cp_logger_type), POINTER :: logger
133  TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
134  dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
135  rs_pw_transfer_section, shg_integrals_test_section
136 
137  CALL timeset(routinen, handle)
138 
139  logger => cp_get_default_logger()
140  iw = cp_print_key_unit_nr(logger, root_section, "TEST%PROGRAM_RUN_INFO", extension=".log")
141 
142  IF (iw > 0) THEN
143  WRITE (iw, '(T2,79("*"))')
144  WRITE (iw, '(A,T31,A,T80,A)') ' *', ' PERFORMANCE TESTS ', '*'
145  WRITE (iw, '(T2,79("*"))')
146  END IF
147  !
148  CALL test_input(root_section, para_env)
149  !
150  IF (runtest(1) /= 0) CALL copy_test(para_env, iw)
151  !
152  IF (runtest(2) /= 0) CALL matmul_test(para_env, test_matmul=.true., test_dgemm=.false., iw=iw)
153  IF (runtest(5) /= 0) CALL matmul_test(para_env, test_matmul=.false., test_dgemm=.true., iw=iw)
154  !
155  IF (runtest(3) /= 0) CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
156  globenv%fftw_wisdom_file_name)
157  !
158  IF (runtest(4) /= 0) CALL eri_test(iw)
159  !
160  IF (runtest(6) /= 0) CALL clebsch_gordon_test()
161  !
162  ! runtest 7 has been deleted and can be recycled
163  !
164  IF (runtest(8) /= 0) CALL mpi_perf_test(para_env, runtest(8), iw)
165  !
166  IF (runtest(10) /= 0) CALL validate_exp_minimax(runtest(10), iw)
167  !
168  IF (runtest(11) /= 0) CALL test_least_square_ft(runtest(11), iw)
169  !
170 
171  rs_pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%RS_PW_TRANSFER")
172  CALL section_vals_get(rs_pw_transfer_section, explicit=explicit)
173  IF (explicit) THEN
174  CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
175  END IF
176 
177  pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%PW_TRANSFER")
178  CALL section_vals_get(pw_transfer_section, explicit=explicit)
179  IF (explicit) THEN
180  CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
181  END IF
182 
183  cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_FM_GEMM")
184  CALL section_vals_get(cp_fm_gemm_test_section, explicit=explicit)
185  IF (explicit) THEN
186  CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
187  END IF
188 
189  eigensolver_section => section_vals_get_subs_vals(root_section, "TEST%EIGENSOLVER")
190  CALL section_vals_get(eigensolver_section, explicit=explicit)
191  IF (explicit) THEN
192  CALL eigensolver_test(para_env, iw, eigensolver_section)
193  END IF
194 
195  eri_mme_test_section => section_vals_get_subs_vals(root_section, "TEST%ERI_MME_TEST")
196  CALL section_vals_get(eri_mme_test_section, explicit=explicit)
197  IF (explicit) THEN
198  CALL cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
199  END IF
200 
201  shg_integrals_test_section => section_vals_get_subs_vals(root_section, "TEST%SHG_INTEGRALS_TEST")
202  CALL section_vals_get(shg_integrals_test_section, explicit=explicit)
203  IF (explicit) THEN
204  CALL shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
205  END IF
206 
207  ! DBCSR tests
208  cp_dbcsr_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_DBCSR")
209  CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit)
210  IF (explicit) THEN
211  CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
212  END IF
213 
214  ! DBM tests
215  dbm_test_section => section_vals_get_subs_vals(root_section, "TEST%DBM")
216  CALL section_vals_get(dbm_test_section, explicit=explicit)
217  IF (explicit) THEN
218  CALL run_dbm_tests(para_env, iw, dbm_test_section)
219  END IF
220 
221  CALL cp_print_key_finished_output(iw, logger, root_section, "TEST%PROGRAM_RUN_INFO")
222 
223  CALL timestop(handle)
224 
225  END SUBROUTINE lib_test
226 
227 ! **************************************************************************************************
228 !> \brief Reads input section &TEST ... &END
229 !> \param root_section ...
230 !> \param para_env ...
231 !> \author JGH 30-NOV-2000
232 !> \note
233 !> I---------------------------------------------------------------------------I
234 !> I SECTION: &TEST ... &END I
235 !> I I
236 !> I MEMORY max_memory I
237 !> I COPY n I
238 !> I MATMUL n I
239 !> I FFT n I
240 !> I ERI n I
241 !> I PW_FFT n I
242 !> I Clebsch-Gordon n I
243 !> I RS_GRIDS n I
244 !> I MPI n I
245 !> I RNG n -> Parallel random number generator I
246 !> I---------------------------------------------------------------------------I
247 ! **************************************************************************************************
248  SUBROUTINE test_input(root_section, para_env)
249  TYPE(section_vals_type), POINTER :: root_section
250  TYPE(mp_para_env_type), POINTER :: para_env
251 
252  TYPE(section_vals_type), POINTER :: test_section
253 
254 !
255 !..defaults
256 ! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm)
257 
258  runtest = 0
259  test_section => section_vals_get_subs_vals(root_section, "TEST")
260  CALL section_vals_val_get(test_section, "MEMORY", r_val=max_memory)
261  CALL section_vals_val_get(test_section, 'COPY', i_val=runtest(1))
262  CALL section_vals_val_get(test_section, 'MATMUL', i_val=runtest(2))
263  CALL section_vals_val_get(test_section, 'DGEMM', i_val=runtest(5))
264  CALL section_vals_val_get(test_section, 'FFT', i_val=runtest(3))
265  CALL section_vals_val_get(test_section, 'ERI', i_val=runtest(4))
266  CALL section_vals_val_get(test_section, 'CLEBSCH_GORDON', i_val=runtest(6))
267  CALL section_vals_val_get(test_section, 'MPI', i_val=runtest(8))
268  CALL section_vals_val_get(test_section, 'MINIMAX', i_val=runtest(10))
269  CALL section_vals_val_get(test_section, 'LEAST_SQ_FT', i_val=runtest(11))
270 
271  CALL para_env%sync()
272  END SUBROUTINE test_input
273 
274 ! **************************************************************************************************
275 !> \brief Tests the performance to copy two vectors.
276 !> \param para_env ...
277 !> \param iw ...
278 !> \par History
279 !> none
280 !> \author JGH 6-NOV-2000
281 !> \note
282 !> The results of these tests allow to determine the size of the cache
283 !> of the CPU. This can be used to optimize the performance of the
284 !> FFTSG library.
285 ! **************************************************************************************************
286  SUBROUTINE copy_test(para_env, iw)
287  TYPE(mp_para_env_type), POINTER :: para_env
288  INTEGER :: iw
289 
290  INTEGER :: i, ierr, j, len, ntim, siz
291  REAL(kind=dp) :: perf, t, tend, tstart
292  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: ca, cb
293 
294 ! test for copy --> Cache size
295 
296  siz = abs(runtest(1))
297  IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of copy ( F95 ) "
298  DO i = 6, 24
299  len = 2**i
300  IF (8.0_dp*real(len, kind=dp) > max_memory*0.5_dp) EXIT
301  ALLOCATE (ca(len), stat=ierr)
302  IF (ierr /= 0) EXIT
303  ALLOCATE (cb(len), stat=ierr)
304  IF (ierr /= 0) EXIT
305 
306  CALL random_number(ca)
307  ntim = nint(1.e7_dp/real(len, kind=dp))
308  ntim = max(ntim, 1)
309  ntim = min(ntim, siz*10000)
310 
311  tstart = m_walltime()
312  DO j = 1, ntim
313  cb(:) = ca(:)
314  ca(1) = real(j, kind=dp)
315  END DO
316  tend = m_walltime()
317  t = tend - tstart + threshold
318  IF (t > 0.0_dp) THEN
319  perf = real(ntim, kind=dp)*real(len, kind=dp)*1.e-6_dp/t
320  ELSE
321  perf = 0.0_dp
322  END IF
323 
324  IF (para_env%is_source()) THEN
325  WRITE (iw, '(A,i2,i10,A,T59,F14.4,A)') " Copy test: Size = 2^", i, &
326  len/1024, " Kwords", perf, " Mcopy/s"
327  END IF
328 
329  DEALLOCATE (ca)
330  DEALLOCATE (cb)
331  END DO
332  CALL para_env%sync()
333  END SUBROUTINE copy_test
334 
335 ! **************************************************************************************************
336 !> \brief Tests the performance of different kinds of matrix matrix multiply
337 !> kernels for the BLAS and F95 intrinsic matmul.
338 !> \param para_env ...
339 !> \param test_matmul ...
340 !> \param test_dgemm ...
341 !> \param iw ...
342 !> \par History
343 !> none
344 !> \author JGH 6-NOV-2000
345 ! **************************************************************************************************
346  SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
347  TYPE(mp_para_env_type), POINTER :: para_env
348  LOGICAL :: test_matmul, test_dgemm
349  INTEGER :: iw
350 
351  INTEGER :: i, ierr, j, len, ntim, siz
352  REAL(kind=dp) :: perf, t, tend, tstart, xdum
353  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: ma, mb, mc
354 
355 ! test for matrix multpies
356 
357  IF (test_matmul) THEN
358  siz = abs(runtest(2))
359  IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( F95 ) "
360  DO i = 5, siz, 2
361  len = 2**i + 1
362  IF (8.0_dp*real(len*len, kind=dp) > max_memory*0.3_dp) EXIT
363  ALLOCATE (ma(len, len), stat=ierr)
364  IF (ierr /= 0) EXIT
365  ALLOCATE (mb(len, len), stat=ierr)
366  IF (ierr /= 0) EXIT
367  ALLOCATE (mc(len, len), stat=ierr)
368  IF (ierr /= 0) EXIT
369  mc = 0.0_dp
370 
371  CALL random_number(xdum)
372  ma = xdum
373  CALL random_number(xdum)
374  mb = xdum
375  ntim = nint(1.e8_dp/(2.0_dp*real(len, kind=dp)**3))
376  ntim = max(ntim, 1)
377  ntim = min(ntim, siz*200)
378  tstart = m_walltime()
379  DO j = 1, ntim
380  mc(:, :) = matmul(ma, mb)
381  ma(1, 1) = real(j, kind=dp)
382  END DO
383  tend = m_walltime()
384  t = tend - tstart + threshold
385  perf = real(ntim, kind=dp)*2.0_dp*real(len, kind=dp)**3*1.e-6_dp/t
386  IF (para_env%is_source()) THEN
387  WRITE (iw, '(A,i6,T59,F14.4,A)') &
388  " Matrix multiply test: c = a * b Size = ", len, perf, " Mflop/s"
389  END IF
390  tstart = m_walltime()
391  DO j = 1, ntim
392  mc(:, :) = mc + matmul(ma, mb)
393  ma(1, 1) = real(j, kind=dp)
394  END DO
395  tend = m_walltime()
396  t = tend - tstart + threshold
397  IF (t > 0.0_dp) THEN
398  perf = real(ntim, kind=dp)*2.0_dp*real(len, kind=dp)**3*1.e-6_dp/t
399  ELSE
400  perf = 0.0_dp
401  END IF
402 
403  IF (para_env%is_source()) THEN
404  WRITE (iw, '(A,i6,T59,F14.4,A)') &
405  " Matrix multiply test: a = a * b Size = ", len, perf, " Mflop/s"
406  END IF
407 
408  tstart = m_walltime()
409  DO j = 1, ntim
410  mc(:, :) = mc + matmul(ma, transpose(mb))
411  ma(1, 1) = real(j, kind=dp)
412  END DO
413  tend = m_walltime()
414  t = tend - tstart + threshold
415  IF (t > 0.0_dp) THEN
416  perf = real(ntim, kind=dp)*2.0_dp*real(len, kind=dp)**3*1.e-6_dp/t
417  ELSE
418  perf = 0.0_dp
419  END IF
420 
421  IF (para_env%is_source()) THEN
422  WRITE (iw, '(A,i6,T59,F14.4,A)') &
423  " Matrix multiply test: c = a * b(T) Size = ", len, perf, " Mflop/s"
424  END IF
425 
426  tstart = m_walltime()
427  DO j = 1, ntim
428  mc(:, :) = mc + matmul(transpose(ma), mb)
429  ma(1, 1) = real(j, kind=dp)
430  END DO
431  tend = m_walltime()
432  t = tend - tstart + threshold
433  IF (t > 0.0_dp) THEN
434  perf = real(ntim, kind=dp)*2.0_dp*real(len, kind=dp)**3*1.e-6_dp/t
435  ELSE
436  perf = 0.0_dp
437  END IF
438 
439  IF (para_env%is_source()) THEN
440  WRITE (iw, '(A,i6,T59,F14.4,A)') &
441  " Matrix multiply test: c = a(T) * b Size = ", len, perf, " Mflop/s"
442  END IF
443 
444  DEALLOCATE (ma)
445  DEALLOCATE (mb)
446  DEALLOCATE (mc)
447  END DO
448  END IF
449 
450  ! test for matrix multpies
451  IF (test_dgemm) THEN
452  siz = abs(runtest(5))
453  IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( BLAS ) "
454  DO i = 5, siz, 2
455  len = 2**i + 1
456  IF (8.0_dp*real(len*len, kind=dp) > max_memory*0.3_dp) EXIT
457  ALLOCATE (ma(len, len), stat=ierr)
458  IF (ierr /= 0) EXIT
459  ALLOCATE (mb(len, len), stat=ierr)
460  IF (ierr /= 0) EXIT
461  ALLOCATE (mc(len, len), stat=ierr)
462  IF (ierr /= 0) EXIT
463  mc = 0.0_dp
464 
465  CALL random_number(xdum)
466  ma = xdum
467  CALL random_number(xdum)
468  mb = xdum
469  ntim = nint(1.e8_dp/(2.0_dp*real(len, kind=dp)**3))
470  ntim = max(ntim, 1)
471  ntim = min(ntim, 1000)
472 
473  tstart = m_walltime()
474  DO j = 1, ntim
475  CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
476  END DO
477  tend = m_walltime()
478  t = tend - tstart + threshold
479  IF (t > 0.0_dp) THEN
480  perf = real(ntim, kind=dp)*2.0_dp*real(len, kind=dp)**3*1.e-6_dp/t
481  ELSE
482  perf = 0.0_dp
483  END IF
484 
485  IF (para_env%is_source()) THEN
486  WRITE (iw, '(A,i6,T59,F14.4,A)') &
487  " Matrix multiply test: c = a * b Size = ", len, perf, " Mflop/s"
488  END IF
489 
490  tstart = m_walltime()
491  DO j = 1, ntim
492  CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
493  END DO
494  tend = m_walltime()
495  t = tend - tstart + threshold
496  IF (t > 0.0_dp) THEN
497  perf = real(ntim, kind=dp)*2.0_dp*real(len, kind=dp)**3*1.e-6_dp/t
498  ELSE
499  perf = 0.0_dp
500  END IF
501 
502  IF (para_env%is_source()) THEN
503  WRITE (iw, '(A,i6,T59,F14.4,A)') &
504  " Matrix multiply test: a = a * b Size = ", len, perf, " Mflop/s"
505  END IF
506 
507  tstart = m_walltime()
508  DO j = 1, ntim
509  CALL dgemm("N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
510  END DO
511  tend = m_walltime()
512  t = tend - tstart + threshold
513  IF (t > 0.0_dp) THEN
514  perf = real(ntim, kind=dp)*2.0_dp*real(len, kind=dp)**3*1.e-6_dp/t
515  ELSE
516  perf = 0.0_dp
517  END IF
518 
519  IF (para_env%is_source()) THEN
520  WRITE (iw, '(A,i6,T59,F14.4,A)') &
521  " Matrix multiply test: c = a * b(T) Size = ", len, perf, " Mflop/s"
522  END IF
523 
524  tstart = m_walltime()
525  DO j = 1, ntim
526  CALL dgemm("T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
527  END DO
528  tend = m_walltime()
529  t = tend - tstart + threshold
530  IF (t > 0.0_dp) THEN
531  perf = real(ntim, kind=dp)*2.0_dp*real(len, kind=dp)**3*1.e-6_dp/t
532  ELSE
533  perf = 0.0_dp
534  END IF
535 
536  IF (para_env%is_source()) THEN
537  WRITE (iw, '(A,i6,T59,F14.4,A)') &
538  " Matrix multiply test: c = a(T) * b Size = ", len, perf, " Mflop/s"
539  END IF
540 
541  DEALLOCATE (ma)
542  DEALLOCATE (mb)
543  DEALLOCATE (mc)
544  END DO
545  END IF
546 
547  CALL para_env%sync()
548 
549  END SUBROUTINE matmul_test
550 
551 ! **************************************************************************************************
552 !> \brief Tests the performance of all available FFT libraries for 3D FFTs
553 !> \param para_env ...
554 !> \param iw ...
555 !> \param fftw_plan_type ...
556 !> \param wisdom_file where FFTW3 should look to save/load wisdom
557 !> \par History
558 !> none
559 !> \author JGH 6-NOV-2000
560 ! **************************************************************************************************
561  SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
562 
563  TYPE(mp_para_env_type), POINTER :: para_env
564  INTEGER :: iw, fftw_plan_type
565  CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
566 
567  INTEGER, PARAMETER :: ndate(3) = (/12, 48, 96/)
568 
569  INTEGER :: iall, ierr, it, j, len, n(3), ntim, &
570  radix_in, radix_out, siz, stat
571  COMPLEX(KIND=dp), DIMENSION(4, 4, 4) :: zz
572  COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: ca, cb, cc
573  CHARACTER(LEN=7) :: method
574  REAL(kind=dp) :: flops, perf, scale, t, tdiff, tend, &
575  tstart
576  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: ra
577 
578 ! test for 3d FFT
579 
580  IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of 3D-FFT "
581  siz = abs(runtest(3))
582 
583  DO iall = 1, 100
584  SELECT CASE (iall)
585  CASE DEFAULT
586  EXIT
587  CASE (1)
588  CALL init_fft("FFTSG", alltoall=.false., fftsg_sizes=.true., wisdom_file=wisdom_file, &
589  pool_limit=10, plan_style=fftw_plan_type)
590  method = "FFTSG "
591  CASE (2)
592  cycle
593  CASE (3)
594  CALL init_fft("FFTW3", alltoall=.false., fftsg_sizes=.true., wisdom_file=wisdom_file, &
595  pool_limit=10, plan_style=fftw_plan_type)
596  method = "FFTW3 "
597  END SELECT
598  n = 4
599  zz = 0.0_dp
600  CALL fft3d(1, n, zz, status=stat)
601  IF (stat == 0) THEN
602  DO it = 1, 3
603  radix_in = ndate(it)
604  CALL fft_radix_operations(radix_in, radix_out, fft_radix_closest)
605  len = radix_out
606  n = len
607  IF (16.0_dp*real(len*len*len, kind=dp) > max_memory*0.5_dp) EXIT
608  ALLOCATE (ra(len, len, len), stat=ierr)
609  ALLOCATE (ca(len, len, len), stat=ierr)
610  CALL random_number(ra)
611  ca(:, :, :) = ra
612  CALL random_number(ra)
613  ca(:, :, :) = ca + cmplx(0.0_dp, 1.0_dp, kind=dp)*ra
614  flops = real(len**3, kind=dp)*15.0_dp*log(real(len, kind=dp))
615  ntim = nint(siz*1.e7_dp/flops)
616  ntim = max(ntim, 1)
617  ntim = min(ntim, 200)
618  scale = 1.0_dp/real(len**3, kind=dp)
619  tstart = m_walltime()
620  DO j = 1, ntim
621  CALL fft3d(fwfft, n, ca)
622  CALL fft3d(bwfft, n, ca, scale=scale)
623  END DO
624  tend = m_walltime()
625  t = tend - tstart + threshold
626  IF (t > 0.0_dp) THEN
627  perf = real(ntim, kind=dp)*2.0_dp*flops*1.e-6_dp/t
628  ELSE
629  perf = 0.0_dp
630  END IF
631 
632  IF (para_env%is_source()) THEN
633  WRITE (iw, '(T2,A,A,i6,T59,F14.4,A)') &
634  adjustr(method), " test (in-place) Size = ", len, perf, " Mflop/s"
635  END IF
636  DEALLOCATE (ca)
637  DEALLOCATE (ra)
638  END DO
639  IF (para_env%is_source()) WRITE (iw, *)
640  ! test if input data is preserved
641  len = 24
642  n = len
643  ALLOCATE (ra(len, len, len))
644  ALLOCATE (ca(len, len, len))
645  ALLOCATE (cb(len, len, len))
646  ALLOCATE (cc(len, len, len))
647  CALL random_number(ra)
648  ca(:, :, :) = ra
649  CALL random_number(ra)
650  ca(:, :, :) = ca + cmplx(0.0_dp, 1.0_dp, kind=dp)*ra
651  cc(:, :, :) = ca
652  CALL fft3d(fwfft, n, ca, cb)
653  tdiff = maxval(abs(ca - cc))
654  IF (tdiff > 1.0e-12_dp) THEN
655  IF (para_env%is_source()) &
656  WRITE (iw, '(T2,A,A,A)') adjustr(method), " FWFFT ", &
657  " Input array is changed in out-of-place FFT !"
658  ELSE
659  IF (para_env%is_source()) &
660  WRITE (iw, '(T2,A,A,A)') adjustr(method), " FWFFT ", &
661  " Input array is not changed in out-of-place FFT !"
662  END IF
663  ca(:, :, :) = cc
664  CALL fft3d(bwfft, n, ca, cb)
665  tdiff = maxval(abs(ca - cc))
666  IF (tdiff > 1.0e-12_dp) THEN
667  IF (para_env%is_source()) &
668  WRITE (iw, '(T2,A,A,A)') adjustr(method), " BWFFT ", &
669  " Input array is changed in out-of-place FFT !"
670  ELSE
671  IF (para_env%is_source()) &
672  WRITE (iw, '(T2,A,A,A)') adjustr(method), " BWFFT ", &
673  " Input array is not changed in out-of-place FFT !"
674  END IF
675  IF (para_env%is_source()) WRITE (iw, *)
676 
677  DEALLOCATE (ra)
678  DEALLOCATE (ca)
679  DEALLOCATE (cb)
680  DEALLOCATE (cc)
681  END IF
682  CALL finalize_fft(para_env, wisdom_file=wisdom_file)
683  END DO
684 
685  END SUBROUTINE fft_test
686 
687 ! **************************************************************************************************
688 !> \brief test rs_pw_transfer performance
689 !> \param para_env ...
690 !> \param iw ...
691 !> \param globenv ...
692 !> \param rs_pw_transfer_section ...
693 !> \author Joost VandeVondele
694 !> 9.2008 Randomise rs grid [Iain Bethune]
695 !> (c) The Numerical Algorithms Group (NAG) Ltd, 2008 on behalf of the HECToR project
696 ! **************************************************************************************************
697  SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
698 
699  TYPE(mp_para_env_type), POINTER :: para_env
700  INTEGER :: iw
701  TYPE(global_environment_type), POINTER :: globenv
702  TYPE(section_vals_type), POINTER :: rs_pw_transfer_section
703 
704  CHARACTER(LEN=*), PARAMETER :: routinen = 'rs_pw_transfer_test'
705 
706  INTEGER :: halo_size, handle, i_loop, n_loop, ns_max
707  INTEGER, DIMENSION(3) :: no, np
708  INTEGER, DIMENSION(:), POINTER :: i_vals
709  LOGICAL :: do_rs2pw
710  REAL(kind=dp) :: tend, tstart
711  TYPE(cell_type), POINTER :: box
712  TYPE(pw_grid_type), POINTER :: grid
713  TYPE(pw_r3d_rs_type) :: ca
714  TYPE(realspace_grid_desc_type), POINTER :: rs_desc
715  TYPE(realspace_grid_input_type) :: input_settings
716  TYPE(realspace_grid_type) :: rs_grid
717  TYPE(section_vals_type), POINTER :: rs_grid_section
718 
719  CALL timeset(routinen, handle)
720 
721  !..set fft lib
722  CALL init_fft(globenv%default_fft_library, alltoall=.false., fftsg_sizes=.true., &
723  pool_limit=globenv%fft_pool_scratch_limit, &
724  wisdom_file=globenv%fftw_wisdom_file_name, &
725  plan_style=globenv%fftw_plan_type)
726 
727  ! .. set cell (should otherwise be irrelevant)
728  NULLIFY (box)
729  CALL cell_create(box)
730  box%hmat = reshape((/20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
731  0.0_dp, 0.0_dp, 20.0_dp/), (/3, 3/))
732  CALL init_cell(box)
733 
734  ! .. grid type and pw_grid
735  NULLIFY (grid)
736  CALL section_vals_val_get(rs_pw_transfer_section, "GRID", i_vals=i_vals)
737  np = i_vals
738  CALL pw_grid_create(grid, para_env)
739  CALL pw_grid_setup(box%hmat, grid, grid_span=fullspace, npts=np, fft_usage=.true., iounit=iw)
740  no = grid%npts
741 
742  CALL ca%create(grid)
743  CALL pw_zero(ca)
744 
745  ! .. rs input setting type
746  CALL section_vals_val_get(rs_pw_transfer_section, "HALO_SIZE", i_val=halo_size)
747  rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section, "RS_GRID")
748  ns_max = 2*halo_size + 1
749  CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, (/-1, -1, -1/))
750 
751  ! .. rs type
752  NULLIFY (rs_desc)
753  CALL rs_grid_create_descriptor(rs_desc, pw_grid=grid, input_settings=input_settings)
754  CALL rs_grid_create(rs_grid, rs_desc)
755  CALL rs_grid_print(rs_grid, iw)
756  CALL rs_grid_zero(rs_grid)
757 
758  ! Put random values on the grid, so summation check will pick up errors
759  CALL random_number(rs_grid%r)
760 
761  CALL section_vals_val_get(rs_pw_transfer_section, "N_loop", i_val=n_loop)
762  CALL section_vals_val_get(rs_pw_transfer_section, "RS2PW", l_val=do_rs2pw)
763 
764  ! go for the real loops, sync to get max timings
765  IF (para_env%is_source()) THEN
766  WRITE (iw, '(T2,A)') ""
767  WRITE (iw, '(T2,A)') "Timing rs_pw_transfer routine"
768  WRITE (iw, '(T2,A)') ""
769  WRITE (iw, '(T2,A)') "iteration time[s]"
770  END IF
771  DO i_loop = 1, n_loop
772  CALL para_env%sync()
773  tstart = m_walltime()
774  IF (do_rs2pw) THEN
775  CALL transfer_rs2pw(rs_grid, ca)
776  ELSE
777  CALL transfer_pw2rs(rs_grid, ca)
778  END IF
779  CALL para_env%sync()
780  tend = m_walltime()
781  IF (para_env%is_source()) THEN
782  WRITE (iw, '(T2,I9,1X,F12.6)') i_loop, tend - tstart
783  END IF
784  END DO
785 
786  !cleanup
787  CALL rs_grid_release(rs_grid)
788  CALL rs_grid_release_descriptor(rs_desc)
789  CALL ca%release()
790  CALL pw_grid_release(grid)
791  CALL cell_release(box)
792  CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
793 
794  CALL timestop(handle)
795 
796  END SUBROUTINE rs_pw_transfer_test
797 
798 ! **************************************************************************************************
799 !> \brief Tests the performance of PW calls to FFT routines
800 !> \param para_env ...
801 !> \param iw ...
802 !> \param globenv ...
803 !> \param pw_transfer_section ...
804 !> \par History
805 !> JGH 6-Feb-2001 : Test and performance code
806 !> Made input sensitive [Joost VandeVondele]
807 !> \author JGH 1-JAN-2001
808 ! **************************************************************************************************
809  SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
810 
811  TYPE(mp_para_env_type), POINTER :: para_env
812  INTEGER :: iw
813  TYPE(global_environment_type), POINTER :: globenv
814  TYPE(section_vals_type), POINTER :: pw_transfer_section
815 
816  REAL(kind=dp), PARAMETER :: toler = 1.e-11_dp
817 
818  INTEGER :: blocked_id, grid_span, i_layout, i_rep, &
819  ig, ip, itmp, n_loop, n_rep, nn, p, q
820  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: layouts
821  INTEGER, DIMENSION(2) :: distribution_layout
822  INTEGER, DIMENSION(3) :: no, np
823  INTEGER, DIMENSION(:), POINTER :: i_vals
824  LOGICAL :: debug, is_fullspace, odd, &
825  pw_grid_layout_all, spherical
826  REAL(kind=dp) :: em, et, flops, gsq, perf, t, t_max, &
827  t_min, tend, tstart
828  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: t_end, t_start
829  TYPE(cell_type), POINTER :: box
830  TYPE(pw_c1d_gs_type) :: ca, cc
831  TYPE(pw_c3d_rs_type) :: cb
832  TYPE(pw_grid_type), POINTER :: grid
833 
834 !..set fft lib
835 
836  CALL init_fft(globenv%default_fft_library, alltoall=.false., fftsg_sizes=.true., &
837  pool_limit=globenv%fft_pool_scratch_limit, &
838  wisdom_file=globenv%fftw_wisdom_file_name, &
839  plan_style=globenv%fftw_plan_type)
840 
841  !..the unit cell (should not really matter, the number of grid points do)
842  NULLIFY (box, grid)
843  CALL cell_create(box)
844  box%hmat = reshape((/10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
845  0.0_dp, 0.0_dp, 7.0_dp/), (/3, 3/))
846  CALL init_cell(box)
847 
848  CALL section_vals_get(pw_transfer_section, n_repetition=n_rep)
849  DO i_rep = 1, n_rep
850 
851  ! how often should we do the transfer
852  CALL section_vals_val_get(pw_transfer_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
853  ALLOCATE (t_start(n_loop))
854  ALLOCATE (t_end(n_loop))
855 
856  ! setup of the grids
857  CALL section_vals_val_get(pw_transfer_section, "GRID", i_rep_section=i_rep, i_vals=i_vals)
858  np = i_vals
859 
860  CALL section_vals_val_get(pw_transfer_section, "PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
861  CALL section_vals_val_get(pw_transfer_section, "DEBUG", i_rep_section=i_rep, l_val=debug)
862 
863  CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT_ALL", i_rep_section=i_rep, &
864  l_val=pw_grid_layout_all)
865 
866  ! prepare to loop over all or a specific layout
867  IF (pw_grid_layout_all) THEN
868  ! count layouts that fit
869  itmp = 0
870  ! start from 2, (/1,para_env%num_pe/) is not supported
871  DO p = 2, para_env%num_pe
872  q = para_env%num_pe/p
873  IF (p*q == para_env%num_pe) THEN
874  itmp = itmp + 1
875  END IF
876  END DO
877  ! build list
878  ALLOCATE (layouts(2, itmp))
879  itmp = 0
880  DO p = 2, para_env%num_pe
881  q = para_env%num_pe/p
882  IF (p*q == para_env%num_pe) THEN
883  itmp = itmp + 1
884  layouts(:, itmp) = (/p, q/)
885  END IF
886  END DO
887  ELSE
888  CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
889  ALLOCATE (layouts(2, 1))
890  layouts(:, 1) = i_vals
891  END IF
892 
893  DO i_layout = 1, SIZE(layouts, 2)
894 
895  distribution_layout = layouts(:, i_layout)
896 
897  CALL pw_grid_create(grid, para_env)
898 
899  CALL section_vals_val_get(pw_transfer_section, "PW_GRID", i_rep_section=i_rep, i_val=itmp)
900 
901  ! from cp_control_utils
902  SELECT CASE (itmp)
903  CASE (do_pwgrid_spherical)
904  spherical = .true.
905  is_fullspace = .false.
907  spherical = .false.
908  is_fullspace = .true.
910  spherical = .false.
911  is_fullspace = .false.
912  END SELECT
913 
914  ! from pw_env_methods
915  IF (spherical) THEN
916  grid_span = halfspace
917  spherical = .true.
918  odd = .true.
919  ELSE IF (is_fullspace) THEN
920  grid_span = fullspace
921  spherical = .false.
922  odd = .false.
923  ELSE
924  grid_span = halfspace
925  spherical = .false.
926  odd = .true.
927  END IF
928 
929  ! actual setup
930  CALL pw_grid_setup(box%hmat, grid, grid_span=grid_span, odd=odd, spherical=spherical, &
931  blocked=blocked_id, npts=np, fft_usage=.true., &
932  rs_dims=distribution_layout, iounit=iw)
933 
934  IF (iw > 0) CALL m_flush(iw)
935 
936  ! note that the number of grid points might be different from what the user requested (fft-able needed)
937  no = grid%npts
938 
939  CALL ca%create(grid)
940  CALL cb%create(grid)
941  CALL cc%create(grid)
942 
943  ! initialize data
944  CALL pw_zero(ca)
945  CALL pw_zero(cb)
946  CALL pw_zero(cc)
947  nn = SIZE(ca%array)
948  DO ig = 1, nn
949  gsq = grid%gsq(ig)
950  ca%array(ig) = exp(-gsq)
951  END DO
952 
953  flops = product(no)*30.0_dp*log(real(maxval(no), kind=dp))
954  tstart = m_walltime()
955  DO ip = 1, n_loop
956  CALL para_env%sync()
957  t_start(ip) = m_walltime()
958  CALL pw_transfer(ca, cb, debug)
959  CALL pw_transfer(cb, cc, debug)
960  CALL para_env%sync()
961  t_end(ip) = m_walltime()
962  END DO
963  tend = m_walltime()
964  t = tend - tstart + threshold
965  IF (t > 0.0_dp) THEN
966  perf = real(n_loop, kind=dp)*2.0_dp*flops*1.e-6_dp/t
967  ELSE
968  perf = 0.0_dp
969  END IF
970 
971  em = maxval(abs(ca%array(:) - cc%array(:)))
972  CALL para_env%max(em)
973  et = sum(abs(ca%array(:) - cc%array(:)))
974  CALL para_env%sum(et)
975  t_min = minval(t_end - t_start)
976  t_max = maxval(t_end - t_start)
977 
978  IF (para_env%is_source()) THEN
979  WRITE (iw, *)
980  WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Maximal Error ", em
981  WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Total Error ", et
982  WRITE (iw, '(A,T67,F14.0)') &
983  " Parallel FFT Tests: Performance [Mflops] ", perf
984  WRITE (iw, '(A,T67,F14.6)') " Best time : ", t_min
985  WRITE (iw, '(A,T67,F14.6)') " Worst time: ", t_max
986  IF (iw > 0) CALL m_flush(iw)
987  END IF
988 
989  ! need debugging ???
990  IF (em > toler .OR. et > toler) THEN
991  cpwarn("The FFT results are not accurate ... starting debug pw_transfer")
992  CALL pw_transfer(ca, cb, .true.)
993  CALL pw_transfer(cb, cc, .true.)
994  END IF
995 
996  ! done with these grids
997  CALL ca%release()
998  CALL cb%release()
999  CALL cc%release()
1000  CALL pw_grid_release(grid)
1001 
1002  END DO
1003 
1004  ! local arrays
1005  DEALLOCATE (layouts)
1006  DEALLOCATE (t_start)
1007  DEALLOCATE (t_end)
1008 
1009  END DO
1010 
1011  ! cleanup
1012  CALL cell_release(box)
1013  CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
1014 
1015  END SUBROUTINE pw_fft_test
1016 
1017 ! **************************************************************************************************
1018 !> \brief Tests the eigensolver library routines
1019 !> \param para_env ...
1020 !> \param iw ...
1021 !> \param eigensolver_section ...
1022 !> \par History
1023 !> JGH 6-Feb-2001 : Test and performance code
1024 !> \author JGH 1-JAN-2001
1025 ! **************************************************************************************************
1026  SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
1027 
1028  TYPE(mp_para_env_type), POINTER :: para_env
1029  INTEGER :: iw
1030  TYPE(section_vals_type), POINTER :: eigensolver_section
1031 
1032  INTEGER :: diag_method, i, i_loop, i_rep, &
1033  init_method, j, n, n_loop, n_rep, &
1034  neig, unit_number
1035  REAL(kind=dp) :: t1, t2
1036  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigenvalues
1037  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer
1038  TYPE(cp_blacs_env_type), POINTER :: blacs_env
1039  TYPE(cp_fm_struct_type), POINTER :: fmstruct
1040  TYPE(cp_fm_type) :: eigenvectors, matrix, work
1041  TYPE(rng_stream_type), ALLOCATABLE :: rng_stream
1042 
1043  IF (iw > 0) THEN
1044  WRITE (unit=iw, fmt="(/,/,T2,A,/)") "EIGENSOLVER TEST"
1045  END IF
1046 
1047  ! create blacs env corresponding to para_env
1048  NULLIFY (blacs_env)
1049  CALL cp_blacs_env_create(blacs_env=blacs_env, &
1050  para_env=para_env)
1051 
1052  ! loop over all tests
1053  CALL section_vals_get(eigensolver_section, n_repetition=n_rep)
1054  DO i_rep = 1, n_rep
1055 
1056  ! parse section
1057  CALL section_vals_val_get(eigensolver_section, "N", i_rep_section=i_rep, i_val=n)
1058  CALL section_vals_val_get(eigensolver_section, "EIGENVALUES", i_rep_section=i_rep, i_val=neig)
1059  CALL section_vals_val_get(eigensolver_section, "DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
1060  CALL section_vals_val_get(eigensolver_section, "INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
1061  CALL section_vals_val_get(eigensolver_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
1062 
1063  ! proper number of eigs
1064  IF (neig < 0) neig = n
1065  neig = min(neig, n)
1066 
1067  ! report
1068  IF (iw > 0) THEN
1069  WRITE (iw, *) "Matrix size", n
1070  WRITE (iw, *) "Number of eigenvalues", neig
1071  WRITE (iw, *) "Timing loops", n_loop
1072  SELECT CASE (diag_method)
1073  CASE (do_diag_syevd)
1074  WRITE (iw, *) "Diag using syevd"
1075  CASE (do_diag_syevx)
1076  WRITE (iw, *) "Diag using syevx"
1077  CASE DEFAULT
1078  ! stop
1079  END SELECT
1080 
1081  SELECT CASE (init_method)
1082  CASE (do_mat_random)
1083  WRITE (iw, *) "using random matrix"
1084  CASE (do_mat_read)
1085  WRITE (iw, *) "reading from file"
1086  CASE DEFAULT
1087  ! stop
1088  END SELECT
1089  END IF
1090 
1091  ! create matrix struct type
1092  NULLIFY (fmstruct)
1093  CALL cp_fm_struct_create(fmstruct=fmstruct, &
1094  para_env=para_env, &
1095  context=blacs_env, &
1096  nrow_global=n, &
1097  ncol_global=n)
1098 
1099  ! create all needed matrices, and buffers for the eigenvalues
1100  CALL cp_fm_create(matrix=matrix, &
1101  matrix_struct=fmstruct, &
1102  name="MATRIX")
1103  CALL cp_fm_set_all(matrix, 0.0_dp)
1104 
1105  CALL cp_fm_create(matrix=eigenvectors, &
1106  matrix_struct=fmstruct, &
1107  name="EIGENVECTORS")
1108  CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1109 
1110  CALL cp_fm_create(matrix=work, &
1111  matrix_struct=fmstruct, &
1112  name="WORK")
1113  CALL cp_fm_set_all(matrix, 0.0_dp)
1114 
1115  ALLOCATE (eigenvalues(n))
1116  eigenvalues = 0.0_dp
1117  ALLOCATE (buffer(1, n))
1118 
1119  ! generate initial matrix, either by reading a file, or using random numbers
1120  IF (para_env%is_source()) THEN
1121  SELECT CASE (init_method)
1122  CASE (do_mat_random)
1123  rng_stream = rng_stream_type( &
1124  name="rng_stream", &
1125  distribution_type=uniform, &
1126  extended_precision=.true.)
1127  CASE (do_mat_read)
1128  CALL open_file(file_name="MATRIX", &
1129  file_action="READ", &
1130  file_form="FORMATTED", &
1131  file_status="OLD", &
1132  unit_number=unit_number)
1133  END SELECT
1134  END IF
1135 
1136  DO i = 1, n
1137  IF (para_env%is_source()) THEN
1138  SELECT CASE (init_method)
1139  CASE (do_mat_random)
1140  DO j = i, n
1141  buffer(1, j) = rng_stream%next() - 0.5_dp
1142  END DO
1143  !MK activate/modify for a diagonal dominant symmetric matrix:
1144  !MK buffer(1,i) = 10.0_dp*buffer(1,i)
1145  CASE (do_mat_read)
1146  READ (unit=unit_number, fmt=*) buffer(1, 1:n)
1147  END SELECT
1148  END IF
1149  CALL para_env%bcast(buffer)
1150  SELECT CASE (init_method)
1151  CASE (do_mat_random)
1152  CALL cp_fm_set_submatrix(fm=matrix, &
1153  new_values=buffer, &
1154  start_row=i, &
1155  start_col=i, &
1156  n_rows=1, &
1157  n_cols=n - i + 1, &
1158  alpha=1.0_dp, &
1159  beta=0.0_dp, &
1160  transpose=.false.)
1161  CALL cp_fm_set_submatrix(fm=matrix, &
1162  new_values=buffer, &
1163  start_row=i, &
1164  start_col=i, &
1165  n_rows=n - i + 1, &
1166  n_cols=1, &
1167  alpha=1.0_dp, &
1168  beta=0.0_dp, &
1169  transpose=.true.)
1170  CASE (do_mat_read)
1171  CALL cp_fm_set_submatrix(fm=matrix, &
1172  new_values=buffer, &
1173  start_row=i, &
1174  start_col=1, &
1175  n_rows=1, &
1176  n_cols=n, &
1177  alpha=1.0_dp, &
1178  beta=0.0_dp, &
1179  transpose=.false.)
1180  END SELECT
1181  END DO
1182 
1183  DEALLOCATE (buffer)
1184 
1185  IF (para_env%is_source()) THEN
1186  SELECT CASE (init_method)
1187  CASE (do_mat_read)
1188  CALL close_file(unit_number=unit_number)
1189  END SELECT
1190  END IF
1191 
1192  DO i_loop = 1, n_loop
1193  eigenvalues = 0.0_dp
1194  CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1195  CALL cp_fm_to_fm(source=matrix, &
1196  destination=work)
1197 
1198  ! DONE, now testing
1199  t1 = m_walltime()
1200  SELECT CASE (diag_method)
1201  CASE (do_diag_syevd)
1202  CALL cp_fm_syevd(matrix=work, &
1203  eigenvectors=eigenvectors, &
1204  eigenvalues=eigenvalues)
1205  CASE (do_diag_syevx)
1206  CALL cp_fm_syevx(matrix=work, &
1207  eigenvectors=eigenvectors, &
1208  eigenvalues=eigenvalues, &
1209  neig=neig, &
1210  work_syevx=1.0_dp)
1211  END SELECT
1212  t2 = m_walltime()
1213  IF (iw > 0) WRITE (iw, *) "Timing for loop ", i_loop, " : ", t2 - t1
1214  END DO
1215 
1216  IF (iw > 0) THEN
1217  WRITE (iw, *) "Eigenvalues: "
1218  WRITE (unit=iw, fmt="(T3,5F14.6)") eigenvalues(1:neig)
1219  WRITE (unit=iw, fmt="(T3,A4,F16.6)") "Sum:", sum(eigenvalues(1:neig))
1220  WRITE (iw, *) ""
1221  END IF
1222 
1223  ! Clean up
1224  DEALLOCATE (eigenvalues)
1225  CALL cp_fm_release(matrix=work)
1226  CALL cp_fm_release(matrix=eigenvectors)
1227  CALL cp_fm_release(matrix=matrix)
1228  CALL cp_fm_struct_release(fmstruct=fmstruct)
1229 
1230  END DO
1231 
1232  CALL cp_blacs_env_release(blacs_env=blacs_env)
1233 
1234  END SUBROUTINE eigensolver_test
1235 
1236 ! **************************************************************************************************
1237 !> \brief Tests the parallel matrix multiply
1238 !> \param para_env ...
1239 !> \param iw ...
1240 !> \param cp_fm_gemm_test_section ...
1241 ! **************************************************************************************************
1242  SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
1243 
1244  TYPE(mp_para_env_type), POINTER :: para_env
1245  INTEGER :: iw
1246  TYPE(section_vals_type), POINTER :: cp_fm_gemm_test_section
1247 
1248  CHARACTER(LEN=1) :: transa, transb
1249  INTEGER :: i_loop, i_rep, k, m, n, n_loop, n_rep, ncol_block, ncol_block_actual, &
1250  ncol_global, np, nrow_block, nrow_block_actual, nrow_global
1251  INTEGER, DIMENSION(:), POINTER :: grid_2d
1252  LOGICAL :: force_blocksize, row_major, transa_p, &
1253  transb_p
1254  REAL(kind=dp) :: t1, t2, t3, t4
1255  TYPE(cp_blacs_env_type), POINTER :: blacs_env
1256  TYPE(cp_fm_struct_type), POINTER :: fmstruct_a, fmstruct_b, fmstruct_c
1257  TYPE(cp_fm_type) :: matrix_a, matrix_b, matrix_c
1258 
1259  CALL section_vals_get(cp_fm_gemm_test_section, n_repetition=n_rep)
1260  DO i_rep = 1, n_rep
1261 
1262  ! how often should we do the multiply
1263  CALL section_vals_val_get(cp_fm_gemm_test_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
1264 
1265  ! matrices def.
1266  CALL section_vals_val_get(cp_fm_gemm_test_section, "K", i_rep_section=i_rep, i_val=k)
1267  CALL section_vals_val_get(cp_fm_gemm_test_section, "N", i_rep_section=i_rep, i_val=n)
1268  CALL section_vals_val_get(cp_fm_gemm_test_section, "M", i_rep_section=i_rep, i_val=m)
1269  CALL section_vals_val_get(cp_fm_gemm_test_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1270  CALL section_vals_val_get(cp_fm_gemm_test_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1271  CALL section_vals_val_get(cp_fm_gemm_test_section, "nrow_block", i_rep_section=i_rep, i_val=nrow_block)
1272  CALL section_vals_val_get(cp_fm_gemm_test_section, "ncol_block", i_rep_section=i_rep, i_val=ncol_block)
1273  CALL section_vals_val_get(cp_fm_gemm_test_section, "ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
1274  CALL section_vals_val_get(cp_fm_gemm_test_section, "GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
1275  CALL section_vals_val_get(cp_fm_gemm_test_section, "FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
1276  transa = "N"
1277  transb = "N"
1278  IF (transa_p) transa = "T"
1279  IF (transb_p) transb = "T"
1280 
1281  IF (iw > 0) THEN
1282  WRITE (iw, '(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
1283  WRITE (iw, '(T2,A)', advance="NO") "C = "
1284  IF (transa_p) THEN
1285  WRITE (iw, '(A)', advance="NO") "TRANSPOSE(A) x"
1286  ELSE
1287  WRITE (iw, '(A)', advance="NO") "A x "
1288  END IF
1289  IF (transb_p) THEN
1290  WRITE (iw, '(A)') "TRANSPOSE(B) "
1291  ELSE
1292  WRITE (iw, '(A)') "B "
1293  END IF
1294  WRITE (iw, '(T2,A,T50,I5,A,I5)') 'requested block size', nrow_block, ' by ', ncol_block
1295  WRITE (iw, '(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ', n_loop
1296  WRITE (iw, '(T2,A,T50,L5)') 'Row Major', row_major
1297  WRITE (iw, '(T2,A,T50,2I7)') 'GRID_2D ', grid_2d
1298  WRITE (iw, '(T2,A,T50,L5)') 'Force blocksize ', force_blocksize
1299  ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant)
1300  np = cp_fm_pilaenv(0, 'D')
1301  IF (np > 0) THEN
1302  WRITE (iw, '(T2,A,T50,I5)') 'PILAENV blocksize', np
1303  END IF
1304  END IF
1305 
1306  NULLIFY (blacs_env)
1307  CALL cp_blacs_env_create(blacs_env=blacs_env, &
1308  para_env=para_env, &
1309  row_major=row_major, &
1310  grid_2d=grid_2d)
1311 
1312  NULLIFY (fmstruct_a)
1313  IF (transa_p) THEN
1314  nrow_global = m; ncol_global = k
1315  ELSE
1316  nrow_global = k; ncol_global = m
1317  END IF
1318  CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env, &
1319  nrow_global=nrow_global, ncol_global=ncol_global, &
1320  nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1321  CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1322  IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ', nrow_global, " by ", ncol_global, &
1323  ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1324 
1325  IF (transb_p) THEN
1326  nrow_global = n; ncol_global = m
1327  ELSE
1328  nrow_global = m; ncol_global = n
1329  END IF
1330  NULLIFY (fmstruct_b)
1331  CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env, &
1332  nrow_global=nrow_global, ncol_global=ncol_global, &
1333  nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1334  CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1335  IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix B ', nrow_global, " by ", ncol_global, &
1336  ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1337 
1338  NULLIFY (fmstruct_c)
1339  nrow_global = k
1340  ncol_global = n
1341  CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env, &
1342  nrow_global=nrow_global, ncol_global=ncol_global, &
1343  nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1344  CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1345  IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix C ', nrow_global, " by ", ncol_global, &
1346  ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1347 
1348  CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A")
1349  CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B")
1350  CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C")
1351 
1352  CALL random_number(matrix_a%local_data)
1353  CALL random_number(matrix_b%local_data)
1354  CALL random_number(matrix_c%local_data)
1355 
1356  IF (iw > 0) CALL m_flush(iw)
1357 
1358  t1 = m_walltime()
1359  DO i_loop = 1, n_loop
1360  t3 = m_walltime()
1361  CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
1362  t4 = m_walltime()
1363  IF (iw > 0) THEN
1364  WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm timing: ", (t4 - t3)
1365  CALL m_flush(iw)
1366  END IF
1367  END DO
1368  t2 = m_walltime()
1369 
1370  IF (iw > 0) THEN
1371  WRITE (iw, '(T2,A,T50,F12.6)') "average cp_fm_gemm timing: ", (t2 - t1)/n_loop
1372  IF (t2 > t1) THEN
1373  WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", &
1374  2*real(m, kind=dp)*real(n, kind=dp)*real(k, kind=dp)*n_loop/max(0.001_dp, t2 - t1)/1.0e9_dp/para_env%num_pe
1375  END IF
1376  END IF
1377 
1378  CALL cp_fm_release(matrix=matrix_a)
1379  CALL cp_fm_release(matrix=matrix_b)
1380  CALL cp_fm_release(matrix=matrix_c)
1381  CALL cp_fm_struct_release(fmstruct=fmstruct_a)
1382  CALL cp_fm_struct_release(fmstruct=fmstruct_b)
1383  CALL cp_fm_struct_release(fmstruct=fmstruct_c)
1384  CALL cp_blacs_env_release(blacs_env=blacs_env)
1385 
1386  END DO
1387 
1388  END SUBROUTINE cp_fm_gemm_test
1389 
1390 ! **************************************************************************************************
1391 !> \brief Tests the DBCSR interface.
1392 !> \param para_env ...
1393 !> \param iw ...
1394 !> \param input_section ...
1395 ! **************************************************************************************************
1396  SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
1397 
1398  TYPE(mp_para_env_type), POINTER :: para_env
1399  INTEGER :: iw
1400  TYPE(section_vals_type), POINTER :: input_section
1401 
1402  CHARACTER, DIMENSION(3) :: types
1403  INTEGER :: data_type, i_rep, k, m, n, n_loop, &
1404  n_rep, test_type
1405  INTEGER, DIMENSION(:), POINTER :: bs_k, bs_m, bs_n, nproc
1406  LOGICAL :: always_checksum, retain_sparsity, &
1407  transa_p, transb_p
1408  REAL(kind=dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1409 
1410 ! ---------------------------------------------------------------------------
1411 
1412  NULLIFY (bs_m, bs_n, bs_k)
1413  CALL section_vals_get(input_section, n_repetition=n_rep)
1414  CALL dbcsr_reset_randmat_seed()
1415  DO i_rep = 1, n_rep
1416  ! how often should we do the multiply
1417  CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
1418 
1419  ! matrices def.
1420  CALL section_vals_val_get(input_section, "TEST_TYPE", i_rep_section=i_rep, i_val=test_type)
1421  CALL section_vals_val_get(input_section, "DATA_TYPE", i_rep_section=i_rep, i_val=data_type)
1422  CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
1423  CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
1424  CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
1425  CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1426  CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1427  CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, &
1428  i_vals=bs_m)
1429  CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, &
1430  i_vals=bs_n)
1431  CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, &
1432  i_vals=bs_k)
1433  CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1434  CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
1435  CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
1436  CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
1437  CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
1438  CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
1439  CALL section_vals_val_get(input_section, "nproc", i_rep_section=i_rep, &
1440  i_vals=nproc)
1441  CALL section_vals_val_get(input_section, "atype", i_rep_section=i_rep, &
1442  c_val=types(1))
1443  CALL section_vals_val_get(input_section, "btype", i_rep_section=i_rep, &
1444  c_val=types(2))
1445  CALL section_vals_val_get(input_section, "ctype", i_rep_section=i_rep, &
1446  c_val=types(3))
1447  CALL section_vals_val_get(input_section, "filter_eps", &
1448  i_rep_section=i_rep, r_val=filter_eps)
1449  CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1450 
1451  CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
1452  (/m, n, k/), &
1453  (/transa_p, transb_p/), &
1454  bs_m, bs_n, bs_k, &
1455  (/s_a, s_b, s_c/), &
1456  alpha, beta, &
1457  data_type=data_type, &
1458  test_type=test_type, &
1459  n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
1460  always_checksum=always_checksum)
1461  END DO
1462  END SUBROUTINE cp_dbcsr_tests
1463 
1464 ! **************************************************************************************************
1465 !> \brief Tests the DBM library.
1466 !> \param para_env ...
1467 !> \param iw ...
1468 !> \param input_section ...
1469 ! **************************************************************************************************
1470  SUBROUTINE run_dbm_tests(para_env, iw, input_section)
1471 
1472  TYPE(mp_para_env_type), POINTER :: para_env
1473  INTEGER :: iw
1474  TYPE(section_vals_type), POINTER :: input_section
1475 
1476  INTEGER :: i_rep, k, m, n, n_loop, n_rep
1477  INTEGER, DIMENSION(:), POINTER :: bs_k, bs_m, bs_n
1478  LOGICAL :: always_checksum, retain_sparsity, &
1479  transa_p, transb_p
1480  REAL(kind=dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1481 
1482 ! ---------------------------------------------------------------------------
1483 
1484  NULLIFY (bs_m, bs_n, bs_k)
1485  CALL section_vals_get(input_section, n_repetition=n_rep)
1486  CALL dbcsr_reset_randmat_seed()
1487  DO i_rep = 1, n_rep
1488  CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
1489  CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
1490  CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
1491  CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
1492  CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1493  CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1494  CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, i_vals=bs_m)
1495  CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, i_vals=bs_n)
1496  CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, i_vals=bs_k)
1497  CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1498  CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
1499  CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
1500  CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
1501  CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
1502  CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
1503  CALL section_vals_val_get(input_section, "filter_eps", i_rep_section=i_rep, r_val=filter_eps)
1504  CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1505 
1506  CALL dbm_run_tests(mp_group=para_env, &
1507  io_unit=iw, &
1508  matrix_sizes=(/m, n, k/), &
1509  trs=(/transa_p, transb_p/), &
1510  bs_m=bs_m, &
1511  bs_n=bs_n, &
1512  bs_k=bs_k, &
1513  sparsities=(/s_a, s_b, s_c/), &
1514  alpha=alpha, &
1515  beta=beta, &
1516  n_loops=n_loop, &
1517  eps=filter_eps, &
1518  retain_sparsity=retain_sparsity, &
1519  always_checksum=always_checksum)
1520  END DO
1521  END SUBROUTINE run_dbm_tests
1522 
1523 END MODULE library_tests
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Test of Electron Repulsion Routines (ERI)
subroutine, public eri_test(iw)
...
Handles all functions related to the CELL.
Definition: cell_methods.F:15
subroutine, public init_cell(cell, hmat, periodic)
Initialise/readjust a simulation cell after hmat has been changed.
Definition: cell_methods.F:117
subroutine, public cell_create(cell, hmat, periodic, tag)
allocates and initializes a cell
Definition: cell_methods.F:85
Handles all functions related to the CELL.
Definition: cell_types.F:15
subroutine, public cell_release(cell)
releases the given cell (see doc/ReferenceCounting.html)
Definition: cell_types.F:559
Test of Clebsch-Gordon Coefficients.
Definition: cg_test.F:14
subroutine, public clebsch_gordon_test()
...
Definition: cg_test.F:42
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
Definition: cp_blacs_env.F:282
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Definition: cp_blacs_env.F:123
Interface to Minimax-Ewald method for periodic ERI's to be used in CP2K.
subroutine, public cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
...
Utility routines to open and close files. Tracking of preconnections.
Definition: cp_files.F:16
subroutine, public open_file(file_name, file_status, file_form, file_action, file_position, file_pad, unit_number, debug, skip_get_unit_number, file_access)
Opens the requested file using a free unit number.
Definition: cp_files.F:308
subroutine, public close_file(unit_number, file_status, keep_preconnection)
Close an open file given by its logical unit number. Optionally, keep the file and unit preconnected.
Definition: cp_files.F:119
basic linear algebra operations for full matrices
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition: cp_fm_diag.F:17
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
Definition: cp_fm_diag.F:413
subroutine, public cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack....
Definition: cp_fm_diag.F:657
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
Definition: cp_fm_struct.F:125
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
Definition: cp_fm_struct.F:409
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition: cp_fm_struct.F:320
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
Definition: cp_fm_types.F:768
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
Definition: cp_fm_types.F:535
integer function, public cp_fm_pilaenv(ictxt, prec)
...
Definition: cp_fm_types.F:2635
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
subroutine, public init_input_type(input_settings, nsmax, rs_grid_section, ilevel, higher_grid_layout)
parses an input section to assign the proper values to the input type
subroutine, public dbm_run_tests(mp_group, io_unit, matrix_sizes, trs, bs_m, bs_n, bs_k, sparsities, alpha, beta, n_loops, eps, retain_sparsity, always_checksum)
Tests the DBM library.
Definition: dbm_tests.F:54
subroutine, public fft_radix_operations(radix_in, radix_out, operation)
Determine the allowed lengths of FFT's '''.
Definition: fft_tools.F:239
integer, parameter, public bwfft
Definition: fft_tools.F:145
integer, parameter, public fft_radix_closest
Definition: fft_tools.F:146
subroutine, public init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, plan_style)
...
Definition: fft_tools.F:185
integer, parameter, public fwfft
Definition: fft_tools.F:145
subroutine, public finalize_fft(para_env, wisdom_file)
does whatever is needed to finalize the current fft setup
Definition: fft_tools.F:216
Define type storing the global information of a run. Keep the amount of stored data small....
Definition: global_types.F:21
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public do_diag_syevd
integer, parameter, public do_mat_read
integer, parameter, public do_pwgrid_ns_fullspace
integer, parameter, public do_diag_syevx
integer, parameter, public do_pwgrid_spherical
integer, parameter, public do_mat_random
integer, parameter, public do_pwgrid_ns_halfspace
objects that represent the structure of input sections and the data contained in an input section
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_get(section_vals, ref_count, n_repetition, n_subs_vals_rep, section, explicit)
returns various attributes about the section_vals
subroutine, public section_vals_val_get(section_vals, keyword_name, i_rep_section, i_rep_val, n_rep_val, val, l_val, i_val, r_val, c_val, l_vals, i_vals, r_vals, c_vals, explicit)
returns the requested value
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
Performance tests for basic tasks like matrix multiplies, copy, fft.
Definition: library_tests.F:18
subroutine, public lib_test(root_section, para_env, globenv)
Master routine for tests.
Machine interface based on Fortran 2003 and POSIX.
Definition: machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition: machine.F:106
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition: machine.F:123
Interface to the message passing library MPI.
Routines to calculate the minimax coefficients in order to approximate 1/x as a sum over exponential ...
Definition: minimax_exp.F:29
subroutine, public validate_exp_minimax(n_R, iw)
Unit test checking that numerical error of minimax approximations generated using any k15 or k53 coef...
Definition: minimax_exp.F:246
Routines to calculate frequency and time grids (integration points and weights) for correlation metho...
Definition: mp2_grids.F:14
subroutine, public test_least_square_ft(nR, iw)
test the singular value decomposition for the computation of integration weights for the Fourier tran...
Definition: mp2_grids.F:1162
Interface to the message passing library MPI.
Definition: mp_perf_test.F:23
subroutine, public mpi_perf_test(comm, npow, output_unit)
Tests the MPI library.
Definition: mp_perf_test.F:55
basic linear algebra operations for full matrixes
Parallel (pseudo)random number generator (RNG) for multiple streams and substreams of random numbers.
integer, parameter, public uniform
integer, parameter, public halfspace
Definition: pw_grid_types.F:28
integer, parameter, public fullspace
Definition: pw_grid_types.F:28
This module defines the grid data type and some basic operations on it.
Definition: pw_grids.F:36
subroutine, public pw_grid_release(pw_grid)
releases the given pw grid
Definition: pw_grids.F:2133
subroutine, public pw_grid_setup(cell_hmat, pw_grid, grid_span, cutoff, bounds, bounds_local, npts, spherical, odd, fft_usage, ncommensurate, icommensurate, blocked, ref_grid, rs_dims, iounit)
sets up a pw_grid
Definition: pw_grids.F:286
subroutine, public pw_grid_create(pw_grid, pe_group, local)
Initialize a PW grid with all defaults.
Definition: pw_grids.F:93
subroutine, public rs_grid_print(rs, iounit)
Print information on grids to output.
subroutine, public rs_grid_create(rs, desc)
...
subroutine, public rs_grid_create_descriptor(desc, pw_grid, input_settings, border_points)
Determine the setup of real space grids - this is divided up into the creation of a descriptor and th...
subroutine, public transfer_pw2rs(rs, pw)
...
subroutine, public rs_grid_release_descriptor(rs_desc)
releases the given rs grid descriptor (see doc/ReferenceCounting.html)
subroutine, public transfer_rs2pw(rs, pw)
...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
subroutine, public rs_grid_zero(rs)
Initialize grid to zero.
Calculates 2-center integrals for different r12 operators comparing the Solid harmonic Gaussian integ...
subroutine, public shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
Unit test for performance and accuracy of the SHG integrals.