99#include "./base/base_uses.f90"
106 INTEGER :: runtest(100)
107 REAL(KIND=
dp) :: max_memory
109 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'library_tests'
122 SUBROUTINE lib_test(root_section, para_env, globenv)
128 CHARACTER(LEN=*),
PARAMETER :: routinen =
'lib_test'
130 INTEGER :: handle, iw
133 TYPE(
section_vals_type),
POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
134 dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
135 rs_pw_transfer_section, shg_integrals_test_section
137 CALL timeset(routinen, handle)
143 WRITE (iw,
'(T2,79("*"))')
144 WRITE (iw,
'(A,T31,A,T80,A)')
' *',
' PERFORMANCE TESTS ',
'*'
145 WRITE (iw,
'(T2,79("*"))')
148 CALL test_input(root_section, para_env)
150 IF (runtest(1) /= 0)
CALL copy_test(para_env, iw)
152 IF (runtest(2) /= 0)
CALL matmul_test(para_env, test_matmul=.true., test_dgemm=.false., iw=iw)
153 IF (runtest(5) /= 0)
CALL matmul_test(para_env, test_matmul=.false., test_dgemm=.true., iw=iw)
155 IF (runtest(3) /= 0)
CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
156 globenv%fftw_wisdom_file_name)
158 IF (runtest(4) /= 0)
CALL eri_test(iw)
164 IF (runtest(8) /= 0)
CALL mpi_perf_test(para_env, runtest(8), iw)
174 CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
180 CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
186 CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
192 CALL eigensolver_test(para_env, iw, eigensolver_section)
211 CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
218 CALL run_dbm_tests(para_env, iw, dbm_test_section)
223 CALL timestop(handle)
248 SUBROUTINE test_input(root_section, para_env)
272 END SUBROUTINE test_input
286 SUBROUTINE copy_test(para_env, iw)
290 INTEGER :: i, ierr, j, len, ntim, siz
291 REAL(kind=
dp) :: perf, t, tend, tstart
292 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: ca, cb
296 siz = abs(runtest(1))
297 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of copy ( F95 ) "
300 IF (8.0_dp*real(len, kind=
dp) > max_memory*0.5_dp)
EXIT
301 ALLOCATE (ca(len), stat=ierr)
303 ALLOCATE (cb(len), stat=ierr)
306 CALL random_number(ca)
307 ntim = nint(1.e7_dp/real(len, kind=
dp))
309 ntim = min(ntim, siz*10000)
314 ca(1) = real(j, kind=
dp)
319 perf = real(ntim, kind=
dp)*real(len, kind=
dp)*1.e-6_dp/t
324 IF (para_env%is_source())
THEN
325 WRITE (iw,
'(A,i2,i10,A,T59,F14.4,A)')
" Copy test: Size = 2^", i, &
326 len/1024,
" Kwords", perf,
" Mcopy/s"
333 END SUBROUTINE copy_test
346 SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
348 LOGICAL :: test_matmul, test_dgemm
351 INTEGER :: i, ierr, j, len, ntim, siz
352 REAL(kind=
dp) :: perf, t, tend, tstart, xdum
353 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: ma, mb, mc
357 IF (test_matmul)
THEN
358 siz = abs(runtest(2))
359 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of matmul ( F95 ) "
362 IF (8.0_dp*real(len*len, kind=
dp) > max_memory*0.3_dp)
EXIT
363 ALLOCATE (ma(len, len), stat=ierr)
365 ALLOCATE (mb(len, len), stat=ierr)
367 ALLOCATE (mc(len, len), stat=ierr)
371 CALL random_number(xdum)
373 CALL random_number(xdum)
375 ntim = nint(1.e8_dp/(2.0_dp*real(len, kind=
dp)**3))
377 ntim = min(ntim, siz*200)
380 mc(:, :) = matmul(ma, mb)
381 ma(1, 1) = real(j, kind=
dp)
385 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
386 IF (para_env%is_source())
THEN
387 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
388 " Matrix multiply test: c = a * b Size = ", len, perf,
" Mflop/s"
392 mc(:, :) = mc + matmul(ma, mb)
393 ma(1, 1) = real(j, kind=
dp)
398 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
403 IF (para_env%is_source())
THEN
404 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
405 " Matrix multiply test: a = a * b Size = ", len, perf,
" Mflop/s"
410 mc(:, :) = mc + matmul(ma, transpose(mb))
411 ma(1, 1) = real(j, kind=
dp)
416 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
421 IF (para_env%is_source())
THEN
422 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
423 " Matrix multiply test: c = a * b(T) Size = ", len, perf,
" Mflop/s"
428 mc(:, :) = mc + matmul(transpose(ma), mb)
429 ma(1, 1) = real(j, kind=
dp)
434 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
439 IF (para_env%is_source())
THEN
440 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
441 " Matrix multiply test: c = a(T) * b Size = ", len, perf,
" Mflop/s"
452 siz = abs(runtest(5))
453 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of matmul ( BLAS ) "
456 IF (8.0_dp*real(len*len, kind=
dp) > max_memory*0.3_dp)
EXIT
457 ALLOCATE (ma(len, len), stat=ierr)
459 ALLOCATE (mb(len, len), stat=ierr)
461 ALLOCATE (mc(len, len), stat=ierr)
465 CALL random_number(xdum)
467 CALL random_number(xdum)
469 ntim = nint(1.e8_dp/(2.0_dp*real(len, kind=
dp)**3))
471 ntim = min(ntim, 1000)
475 CALL dgemm(
"N",
"N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
480 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
485 IF (para_env%is_source())
THEN
486 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
487 " Matrix multiply test: c = a * b Size = ", len, perf,
" Mflop/s"
492 CALL dgemm(
"N",
"N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
497 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
502 IF (para_env%is_source())
THEN
503 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
504 " Matrix multiply test: a = a * b Size = ", len, perf,
" Mflop/s"
509 CALL dgemm(
"N",
"T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
514 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
519 IF (para_env%is_source())
THEN
520 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
521 " Matrix multiply test: c = a * b(T) Size = ", len, perf,
" Mflop/s"
526 CALL dgemm(
"T",
"N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
531 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
536 IF (para_env%is_source())
THEN
537 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
538 " Matrix multiply test: c = a(T) * b Size = ", len, perf,
" Mflop/s"
549 END SUBROUTINE matmul_test
561 SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
564 INTEGER :: iw, fftw_plan_type
565 CHARACTER(LEN=*),
INTENT(IN) :: wisdom_file
567 INTEGER,
PARAMETER :: ndate(3) = (/12, 48, 96/)
569 INTEGER :: iall, ierr, it, j, len, n(3), ntim, &
570 radix_in, radix_out, siz, stat
571 COMPLEX(KIND=dp),
DIMENSION(4, 4, 4) :: zz
572 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: ca, cb, cc
573 CHARACTER(LEN=7) :: method
574 REAL(kind=
dp) :: flops, perf, scale, t, tdiff, tend, &
576 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: ra
580 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of 3D-FFT "
581 siz = abs(runtest(3))
588 CALL init_fft(
"FFTSG", alltoall=.false., fftsg_sizes=.true., wisdom_file=wisdom_file, &
589 pool_limit=10, plan_style=fftw_plan_type)
594 CALL init_fft(
"FFTW3", alltoall=.false., fftsg_sizes=.true., wisdom_file=wisdom_file, &
595 pool_limit=10, plan_style=fftw_plan_type)
607 IF (16.0_dp*real(len*len*len, kind=
dp) > max_memory*0.5_dp)
EXIT
608 ALLOCATE (ra(len, len, len), stat=ierr)
609 ALLOCATE (ca(len, len, len), stat=ierr)
610 CALL random_number(ra)
612 CALL random_number(ra)
613 ca(:, :, :) = ca +
gaussi*ra
614 flops = real(len**3, kind=
dp)*15.0_dp*log(real(len, kind=
dp))
615 ntim = nint(siz*1.e7_dp/flops)
617 ntim = min(ntim, 200)
618 scale = 1.0_dp/real(len**3, kind=
dp)
627 perf = real(ntim, kind=
dp)*2.0_dp*flops*1.e-6_dp/t
632 IF (para_env%is_source())
THEN
633 WRITE (iw,
'(T2,A,A,i6,T59,F14.4,A)') &
634 adjustr(method),
" test (in-place) Size = ", len, perf,
" Mflop/s"
639 IF (para_env%is_source())
WRITE (iw, *)
643 ALLOCATE (ra(len, len, len))
644 ALLOCATE (ca(len, len, len))
645 ALLOCATE (cb(len, len, len))
646 ALLOCATE (cc(len, len, len))
647 CALL random_number(ra)
649 CALL random_number(ra)
650 ca(:, :, :) = ca +
gaussi*ra
653 tdiff = maxval(abs(ca - cc))
654 IF (tdiff > 1.0e-12_dp)
THEN
655 IF (para_env%is_source()) &
656 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" FWFFT ", &
657 " Input array is changed in out-of-place FFT !"
659 IF (para_env%is_source()) &
660 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" FWFFT ", &
661 " Input array is not changed in out-of-place FFT !"
665 tdiff = maxval(abs(ca - cc))
666 IF (tdiff > 1.0e-12_dp)
THEN
667 IF (para_env%is_source()) &
668 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" BWFFT ", &
669 " Input array is changed in out-of-place FFT !"
671 IF (para_env%is_source()) &
672 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" BWFFT ", &
673 " Input array is not changed in out-of-place FFT !"
675 IF (para_env%is_source())
WRITE (iw, *)
685 END SUBROUTINE fft_test
697 SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
704 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_pw_transfer_test'
706 INTEGER :: halo_size, handle, i_loop, n_loop, ns_max
707 INTEGER,
DIMENSION(3) :: no, np
708 INTEGER,
DIMENSION(:),
POINTER :: i_vals
710 REAL(kind=
dp) :: tend, tstart
719 CALL timeset(routinen, handle)
722 CALL init_fft(globenv%default_fft_library, alltoall=.false., fftsg_sizes=.true., &
723 pool_limit=globenv%fft_pool_scratch_limit, &
724 wisdom_file=globenv%fftw_wisdom_file_name, &
725 plan_style=globenv%fftw_plan_type)
730 box%hmat = reshape((/20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
731 0.0_dp, 0.0_dp, 20.0_dp/), (/3, 3/))
747 ns_max = 2*halo_size + 1
748 CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, (/-1, -1, -1/))
758 CALL random_number(rs_grid%r)
764 IF (para_env%is_source())
THEN
765 WRITE (iw,
'(T2,A)')
""
766 WRITE (iw,
'(T2,A)')
"Timing rs_pw_transfer routine"
767 WRITE (iw,
'(T2,A)')
""
768 WRITE (iw,
'(T2,A)')
"iteration time[s]"
770 DO i_loop = 1, n_loop
780 IF (para_env%is_source())
THEN
781 WRITE (iw,
'(T2,I9,1X,F12.6)') i_loop, tend - tstart
791 CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
793 CALL timestop(handle)
795 END SUBROUTINE rs_pw_transfer_test
808 SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
815 REAL(kind=
dp),
PARAMETER :: toler = 1.e-11_dp
817 INTEGER :: blocked_id, grid_span, i_layout, i_rep, &
818 ig, ip, itmp, n_loop, n_rep, nn, p, q
819 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: layouts
820 INTEGER,
DIMENSION(2) :: distribution_layout
821 INTEGER,
DIMENSION(3) :: no, np
822 INTEGER,
DIMENSION(:),
POINTER :: i_vals
823 LOGICAL :: debug, is_fullspace, odd, &
824 pw_grid_layout_all, spherical
825 REAL(kind=
dp) :: em, et, flops, gsq, perf, t, t_max, &
827 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: t_end, t_start
835 CALL init_fft(globenv%default_fft_library, alltoall=.false., fftsg_sizes=.true., &
836 pool_limit=globenv%fft_pool_scratch_limit, &
837 wisdom_file=globenv%fftw_wisdom_file_name, &
838 plan_style=globenv%fftw_plan_type)
843 box%hmat = reshape((/10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
844 0.0_dp, 0.0_dp, 7.0_dp/), (/3, 3/))
852 ALLOCATE (t_start(n_loop))
853 ALLOCATE (t_end(n_loop))
859 CALL section_vals_val_get(pw_transfer_section,
"PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
863 l_val=pw_grid_layout_all)
866 IF (pw_grid_layout_all)
THEN
870 DO p = 2, para_env%num_pe
871 q = para_env%num_pe/p
872 IF (p*q == para_env%num_pe)
THEN
877 ALLOCATE (layouts(2, itmp))
879 DO p = 2, para_env%num_pe
880 q = para_env%num_pe/p
881 IF (p*q == para_env%num_pe)
THEN
883 layouts(:, itmp) = (/p, q/)
887 CALL section_vals_val_get(pw_transfer_section,
"PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
888 ALLOCATE (layouts(2, 1))
889 layouts(:, 1) = i_vals
892 DO i_layout = 1,
SIZE(layouts, 2)
894 distribution_layout = layouts(:, i_layout)
902 is_fullspace = .false.
905 is_fullspace = .true.
908 is_fullspace = .false.
916 ELSE IF (is_fullspace)
THEN
927 CALL pw_grid_create(grid, para_env, box%hmat, grid_span=grid_span, odd=odd, spherical=spherical, &
928 blocked=blocked_id, npts=np, fft_usage=.true., &
929 rs_dims=distribution_layout, iounit=iw)
947 ca%array(ig) = exp(-gsq)
950 flops = product(no)*30.0_dp*log(real(maxval(no), kind=
dp))
963 perf = real(n_loop, kind=
dp)*2.0_dp*flops*1.e-6_dp/t
968 em = maxval(abs(ca%array(:) - cc%array(:)))
969 CALL para_env%max(em)
970 et = sum(abs(ca%array(:) - cc%array(:)))
971 CALL para_env%sum(et)
972 t_min = minval(t_end - t_start)
973 t_max = maxval(t_end - t_start)
975 IF (para_env%is_source())
THEN
977 WRITE (iw,
'(A,T67,E14.6)')
" Parallel FFT Tests: Maximal Error ", em
978 WRITE (iw,
'(A,T67,E14.6)')
" Parallel FFT Tests: Total Error ", et
979 WRITE (iw,
'(A,T67,F14.0)') &
980 " Parallel FFT Tests: Performance [Mflops] ", perf
981 WRITE (iw,
'(A,T67,F14.6)')
" Best time : ", t_min
982 WRITE (iw,
'(A,T67,F14.6)')
" Worst time: ", t_max
987 IF (em > toler .OR. et > toler)
THEN
988 cpwarn(
"The FFT results are not accurate ... starting debug pw_transfer")
1002 DEALLOCATE (layouts)
1003 DEALLOCATE (t_start)
1010 CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
1012 END SUBROUTINE pw_fft_test
1023 SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
1029 INTEGER :: diag_method, i, i_loop, i_rep, &
1030 init_method, j, n, n_loop, n_rep, &
1032 REAL(kind=
dp) :: t1, t2
1033 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: eigenvalues
1034 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer
1037 TYPE(
cp_fm_type) :: eigenvectors, matrix, work
1041 WRITE (unit=iw, fmt=
"(/,/,T2,A,/)")
"EIGENSOLVER TEST"
1056 CALL section_vals_val_get(eigensolver_section,
"DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
1057 CALL section_vals_val_get(eigensolver_section,
"INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
1061 IF (neig < 0) neig = n
1066 WRITE (iw, *)
"Matrix size", n
1067 WRITE (iw, *)
"Number of eigenvalues", neig
1068 WRITE (iw, *)
"Timing loops", n_loop
1069 SELECT CASE (diag_method)
1071 WRITE (iw, *)
"Diag using syevd"
1073 WRITE (iw, *)
"Diag using syevx"
1078 SELECT CASE (init_method)
1080 WRITE (iw, *)
"using random matrix"
1082 WRITE (iw, *)
"reading from file"
1091 para_env=para_env, &
1092 context=blacs_env, &
1098 matrix_struct=fmstruct, &
1103 matrix_struct=fmstruct, &
1104 name=
"EIGENVECTORS")
1108 matrix_struct=fmstruct, &
1112 ALLOCATE (eigenvalues(n))
1113 eigenvalues = 0.0_dp
1114 ALLOCATE (buffer(1, n))
1117 IF (para_env%is_source())
THEN
1118 SELECT CASE (init_method)
1121 name=
"rng_stream", &
1123 extended_precision=.true.)
1126 file_action=
"READ", &
1127 file_form=
"FORMATTED", &
1128 file_status=
"OLD", &
1129 unit_number=unit_number)
1134 IF (para_env%is_source())
THEN
1135 SELECT CASE (init_method)
1138 buffer(1, j) = rng_stream%next() - 0.5_dp
1143 READ (unit=unit_number, fmt=*) buffer(1, 1:n)
1146 CALL para_env%bcast(buffer)
1147 SELECT CASE (init_method)
1150 new_values=buffer, &
1159 new_values=buffer, &
1169 new_values=buffer, &
1182 IF (para_env%is_source())
THEN
1183 SELECT CASE (init_method)
1189 DO i_loop = 1, n_loop
1190 eigenvalues = 0.0_dp
1197 SELECT CASE (diag_method)
1200 eigenvectors=eigenvectors, &
1201 eigenvalues=eigenvalues)
1204 eigenvectors=eigenvectors, &
1205 eigenvalues=eigenvalues, &
1210 IF (iw > 0)
WRITE (iw, *)
"Timing for loop ", i_loop,
" : ", t2 - t1
1214 WRITE (iw, *)
"Eigenvalues: "
1215 WRITE (unit=iw, fmt=
"(T3,5F14.6)") eigenvalues(1:neig)
1216 WRITE (unit=iw, fmt=
"(T3,A4,F16.6)")
"Sum:", sum(eigenvalues(1:neig))
1221 DEALLOCATE (eigenvalues)
1231 END SUBROUTINE eigensolver_test
1239 SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
1245 CHARACTER(LEN=1) :: transa, transb
1246 INTEGER :: i_loop, i_rep, k, m, n, n_loop, n_rep, ncol_block, ncol_block_actual, &
1247 ncol_global, np, nrow_block, nrow_block_actual, nrow_global
1248 INTEGER,
DIMENSION(:),
POINTER :: grid_2d
1249 LOGICAL :: force_blocksize, row_major, transa_p, &
1251 REAL(kind=
dp) :: t1, t2, t3, t4
1254 TYPE(
cp_fm_type) :: matrix_a, matrix_b, matrix_c
1260 CALL section_vals_val_get(cp_fm_gemm_test_section,
"N_loop", i_rep_section=i_rep, i_val=n_loop)
1266 CALL section_vals_val_get(cp_fm_gemm_test_section,
"transa", i_rep_section=i_rep, l_val=transa_p)
1267 CALL section_vals_val_get(cp_fm_gemm_test_section,
"transb", i_rep_section=i_rep, l_val=transb_p)
1268 CALL section_vals_val_get(cp_fm_gemm_test_section,
"nrow_block", i_rep_section=i_rep, i_val=nrow_block)
1269 CALL section_vals_val_get(cp_fm_gemm_test_section,
"ncol_block", i_rep_section=i_rep, i_val=ncol_block)
1270 CALL section_vals_val_get(cp_fm_gemm_test_section,
"ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
1271 CALL section_vals_val_get(cp_fm_gemm_test_section,
"GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
1272 CALL section_vals_val_get(cp_fm_gemm_test_section,
"FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
1275 IF (transa_p) transa =
"T"
1276 IF (transb_p) transb =
"T"
1279 WRITE (iw,
'(T2,A)')
"----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
1280 WRITE (iw,
'(T2,A)',
advance=
"NO")
"C = "
1282 WRITE (iw,
'(A)',
advance=
"NO")
"TRANSPOSE(A) x"
1284 WRITE (iw,
'(A)',
advance=
"NO")
"A x "
1287 WRITE (iw,
'(A)')
"TRANSPOSE(B) "
1289 WRITE (iw,
'(A)')
"B "
1291 WRITE (iw,
'(T2,A,T50,I5,A,I5)')
'requested block size', nrow_block,
' by ', ncol_block
1292 WRITE (iw,
'(T2,A,T50,I5)')
'number of repetitions of cp_fm_gemm ', n_loop
1293 WRITE (iw,
'(T2,A,T50,L5)')
'Row Major', row_major
1294 WRITE (iw,
'(T2,A,T50,2I7)')
'GRID_2D ', grid_2d
1295 WRITE (iw,
'(T2,A,T50,L5)')
'Force blocksize ', force_blocksize
1299 WRITE (iw,
'(T2,A,T50,I5)')
'PILAENV blocksize', np
1305 para_env=para_env, &
1306 row_major=row_major, &
1309 NULLIFY (fmstruct_a)
1311 nrow_global = m; ncol_global = k
1313 nrow_global = k; ncol_global = m
1316 nrow_global=nrow_global, ncol_global=ncol_global, &
1317 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1318 CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1319 IF (iw > 0)
WRITE (iw,
'(T2,A,I9,A,I9,A,I5,A,I5)')
'matrix A ', nrow_global,
" by ", ncol_global, &
1320 ' using blocks of ', nrow_block_actual,
' by ', ncol_block_actual
1323 nrow_global = n; ncol_global = m
1325 nrow_global = m; ncol_global = n
1327 NULLIFY (fmstruct_b)
1329 nrow_global=nrow_global, ncol_global=ncol_global, &
1330 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1331 CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1332 IF (iw > 0)
WRITE (iw,
'(T2,A,I9,A,I9,A,I5,A,I5)')
'matrix B ', nrow_global,
" by ", ncol_global, &
1333 ' using blocks of ', nrow_block_actual,
' by ', ncol_block_actual
1335 NULLIFY (fmstruct_c)
1339 nrow_global=nrow_global, ncol_global=ncol_global, &
1340 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1341 CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1342 IF (iw > 0)
WRITE (iw,
'(T2,A,I9,A,I9,A,I5,A,I5)')
'matrix C ', nrow_global,
" by ", ncol_global, &
1343 ' using blocks of ', nrow_block_actual,
' by ', ncol_block_actual
1345 CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name=
"MATRIX A")
1346 CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name=
"MATRIX B")
1347 CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name=
"MATRIX C")
1349 CALL random_number(matrix_a%local_data)
1350 CALL random_number(matrix_b%local_data)
1351 CALL random_number(matrix_c%local_data)
1356 DO i_loop = 1, n_loop
1358 CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
1361 WRITE (iw,
'(T2,A,T50,F12.6)')
"cp_fm_gemm timing: ", (t4 - t3)
1368 WRITE (iw,
'(T2,A,T50,F12.6)')
"average cp_fm_gemm timing: ", (t2 - t1)/n_loop
1370 WRITE (iw,
'(T2,A,T50,F12.6)')
"cp_fm_gemm Gflops per MPI task: ", &
1371 2*real(m, kind=
dp)*real(n, kind=
dp)*real(k, kind=
dp)*n_loop/max(0.001_dp, t2 - t1)/1.0e9_dp/para_env%num_pe
1385 END SUBROUTINE cp_fm_gemm_test
1393 SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
1399 CHARACTER,
DIMENSION(3) :: types
1400 INTEGER :: data_type, i_rep, k, m, n, n_loop, &
1402 INTEGER,
DIMENSION(:),
POINTER :: bs_k, bs_m, bs_n, nproc
1403 LOGICAL :: always_checksum, retain_sparsity, &
1405 REAL(kind=
dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1409 NULLIFY (bs_m, bs_n, bs_k)
1411 CALL dbcsr_reset_randmat_seed()
1430 CALL section_vals_val_get(input_section,
"keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1445 i_rep_section=i_rep, r_val=filter_eps)
1446 CALL section_vals_val_get(input_section,
"ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1448 CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
1450 (/transa_p, transb_p/), &
1452 (/s_a, s_b, s_c/), &
1454 data_type=data_type, &
1455 test_type=test_type, &
1456 n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
1457 always_checksum=always_checksum)
1459 END SUBROUTINE cp_dbcsr_tests
1467 SUBROUTINE run_dbm_tests(para_env, iw, input_section)
1473 INTEGER :: i_rep, k, m, n, n_loop, n_rep
1474 INTEGER,
DIMENSION(:),
POINTER :: bs_k, bs_m, bs_n
1475 LOGICAL :: always_checksum, retain_sparsity, &
1477 REAL(kind=
dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1481 NULLIFY (bs_m, bs_n, bs_k)
1483 CALL dbcsr_reset_randmat_seed()
1494 CALL section_vals_val_get(input_section,
"keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1501 CALL section_vals_val_get(input_section,
"ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1505 matrix_sizes=(/m, n, k/), &
1506 trs=(/transa_p, transb_p/), &
1510 sparsities=(/s_a, s_b, s_c/), &
1515 retain_sparsity=retain_sparsity, &
1516 always_checksum=always_checksum)
1518 END SUBROUTINE run_dbm_tests
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Test of Electron Repulsion Routines (ERI)
real(kind=dp), parameter threshold
subroutine, public eri_test(iw)
...
Handles all functions related to the CELL.
subroutine, public init_cell(cell, hmat, periodic)
Initialise/readjust a simulation cell after hmat has been changed.
subroutine, public cell_create(cell, hmat, periodic, tag)
allocates and initializes a cell
Handles all functions related to the CELL.
subroutine, public cell_release(cell)
releases the given cell (see doc/ReferenceCounting.html)
Test of Clebsch-Gordon Coefficients.
subroutine, public clebsch_gordon_test()
...
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Interface to Minimax-Ewald method for periodic ERI's to be used in CP2K.
subroutine, public cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
...
Utility routines to open and close files. Tracking of preconnections.
subroutine, public open_file(file_name, file_status, file_form, file_action, file_position, file_pad, unit_number, debug, skip_get_unit_number, file_access)
Opens the requested file using a free unit number.
subroutine, public close_file(unit_number, file_status, keep_preconnection)
Close an open file given by its logical unit number. Optionally, keep the file and unit preconnected.
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
subroutine, public cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack....
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
integer function, public cp_fm_pilaenv(ictxt, prec)
...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
subroutine, public init_input_type(input_settings, nsmax, rs_grid_section, ilevel, higher_grid_layout)
parses an input section to assign the proper values to the input type
subroutine, public dbm_run_tests(mp_group, io_unit, matrix_sizes, trs, bs_m, bs_n, bs_k, sparsities, alpha, beta, n_loops, eps, retain_sparsity, always_checksum)
Tests the DBM library.
Define type storing the global information of a run. Keep the amount of stored data small....
Defines the basic variable types.
integer, parameter, public dp
Performance tests for basic tasks like matrix multiplies, copy, fft.
subroutine, public lib_test(root_section, para_env, globenv)
Master routine for tests.
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition of mathematical constants and functions.
complex(kind=dp), parameter, public gaussi
Interface to the message passing library MPI.
Routines to calculate the minimax coefficients in order to approximate 1/x as a sum over exponential ...
subroutine, public validate_exp_minimax(n_r, iw)
Unit test checking that numerical error of minimax approximations generated using any k15 or k53 coef...
Routines to calculate frequency and time grids (integration points and weights) for correlation metho...
subroutine, public test_least_square_ft(nr, iw)
test the singular value decomposition for the computation of integration weights for the Fourier tran...
Interface to the message passing library MPI.
subroutine, public mpi_perf_test(comm, npow, output_unit)
Tests the MPI library.
basic linear algebra operations for full matrixes
Parallel (pseudo)random number generator (RNG) for multiple streams and substreams of random numbers.
subroutine advance(self, e, c)
Advance the state by n steps, i.e. jump n steps forward, if n > 0, or backward if n < 0.
integer, parameter, public uniform
integer, parameter, public halfspace
integer, parameter, public fullspace
This module defines the grid data type and some basic operations on it.
subroutine, public pw_grid_release(pw_grid)
releases the given pw grid
subroutine, public rs_grid_print(rs, iounit)
Print information on grids to output.
subroutine, public rs_grid_create(rs, desc)
...
subroutine, public rs_grid_create_descriptor(desc, pw_grid, input_settings, border_points)
Determine the setup of real space grids - this is divided up into the creation of a descriptor and th...
subroutine, public transfer_pw2rs(rs, pw)
...
subroutine, public rs_grid_release_descriptor(rs_desc)
releases the given rs grid descriptor (see doc/ReferenceCounting.html)
subroutine, public transfer_rs2pw(rs, pw)
...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
subroutine, public rs_grid_zero(rs)
Initialize grid to zero.
Calculates 2-center integrals for different r12 operators comparing the Solid harmonic Gaussian integ...
subroutine, public shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
Unit test for performance and accuracy of the SHG integrals.
Type defining parameters related to the simulation cell.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
type of a logger, at the moment it contains just a print level starting at which level it should be l...
contains the initially parsed file and the initial parallel environment
stores all the informations relevant to an mpi environment