98#include "./base/base_uses.f90"
105 INTEGER :: runtest(100)
106 REAL(KIND=
dp) :: max_memory
108 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'library_tests'
121 SUBROUTINE lib_test(root_section, para_env, globenv)
127 CHARACTER(LEN=*),
PARAMETER :: routinen =
'lib_test'
129 INTEGER :: handle, iw
132 TYPE(
section_vals_type),
POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
133 dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
134 rs_pw_transfer_section, shg_integrals_test_section
136 CALL timeset(routinen, handle)
142 WRITE (iw,
'(T2,79("*"))')
143 WRITE (iw,
'(A,T31,A,T80,A)')
' *',
' PERFORMANCE TESTS ',
'*'
144 WRITE (iw,
'(T2,79("*"))')
147 CALL test_input(root_section, para_env)
149 IF (runtest(1) /= 0)
CALL copy_test(para_env, iw)
151 IF (runtest(2) /= 0)
CALL matmul_test(para_env, test_matmul=.true., test_dgemm=.false., iw=iw)
152 IF (runtest(5) /= 0)
CALL matmul_test(para_env, test_matmul=.false., test_dgemm=.true., iw=iw)
154 IF (runtest(3) /= 0)
CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
155 globenv%fftw_wisdom_file_name)
157 IF (runtest(4) /= 0)
CALL eri_test(iw)
163 IF (runtest(8) /= 0)
CALL mpi_perf_test(para_env, runtest(8), iw)
173 CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
179 CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
185 CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
191 CALL eigensolver_test(para_env, iw, eigensolver_section)
210 CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
217 CALL run_dbm_tests(para_env, iw, dbm_test_section)
222 CALL timestop(handle)
247 SUBROUTINE test_input(root_section, para_env)
271 END SUBROUTINE test_input
285 SUBROUTINE copy_test(para_env, iw)
289 INTEGER :: i, ierr, j, len, ntim, siz
290 REAL(kind=
dp) :: perf, t, tend, tstart
291 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: ca, cb
295 siz = abs(runtest(1))
296 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of copy ( F95 ) "
299 IF (8.0_dp*real(len, kind=
dp) > max_memory*0.5_dp)
EXIT
300 ALLOCATE (ca(len), stat=ierr)
302 ALLOCATE (cb(len), stat=ierr)
305 CALL random_number(ca)
306 ntim = nint(1.e7_dp/real(len, kind=
dp))
308 ntim = min(ntim, siz*10000)
313 ca(1) = real(j, kind=
dp)
318 perf = real(ntim, kind=
dp)*real(len, kind=
dp)*1.e-6_dp/t
323 IF (para_env%is_source())
THEN
324 WRITE (iw,
'(A,i2,i10,A,T59,F14.4,A)')
" Copy test: Size = 2^", i, &
325 len/1024,
" Kwords", perf,
" Mcopy/s"
332 END SUBROUTINE copy_test
345 SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
347 LOGICAL :: test_matmul, test_dgemm
350 INTEGER :: i, ierr, j, len, ntim, siz
351 REAL(kind=
dp) :: perf, t, tend, tstart, xdum
352 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: ma, mb, mc
356 IF (test_matmul)
THEN
357 siz = abs(runtest(2))
358 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of matmul ( F95 ) "
361 IF (8.0_dp*real(len*len, kind=
dp) > max_memory*0.3_dp)
EXIT
362 ALLOCATE (ma(len, len), stat=ierr)
364 ALLOCATE (mb(len, len), stat=ierr)
366 ALLOCATE (mc(len, len), stat=ierr)
370 CALL random_number(xdum)
372 CALL random_number(xdum)
374 ntim = nint(1.e8_dp/(2.0_dp*real(len, kind=
dp)**3))
376 ntim = min(ntim, siz*200)
379 mc(:, :) = matmul(ma, mb)
380 ma(1, 1) = real(j, kind=
dp)
384 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
385 IF (para_env%is_source())
THEN
386 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
387 " Matrix multiply test: c = a * b Size = ", len, perf,
" Mflop/s"
391 mc(:, :) = mc + matmul(ma, mb)
392 ma(1, 1) = real(j, kind=
dp)
397 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
402 IF (para_env%is_source())
THEN
403 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
404 " Matrix multiply test: a = a * b Size = ", len, perf,
" Mflop/s"
409 mc(:, :) = mc + matmul(ma, transpose(mb))
410 ma(1, 1) = real(j, kind=
dp)
415 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
420 IF (para_env%is_source())
THEN
421 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
422 " Matrix multiply test: c = a * b(T) Size = ", len, perf,
" Mflop/s"
427 mc(:, :) = mc + matmul(transpose(ma), mb)
428 ma(1, 1) = real(j, kind=
dp)
433 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
438 IF (para_env%is_source())
THEN
439 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
440 " Matrix multiply test: c = a(T) * b Size = ", len, perf,
" Mflop/s"
451 siz = abs(runtest(5))
452 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of matmul ( BLAS ) "
455 IF (8.0_dp*real(len*len, kind=
dp) > max_memory*0.3_dp)
EXIT
456 ALLOCATE (ma(len, len), stat=ierr)
458 ALLOCATE (mb(len, len), stat=ierr)
460 ALLOCATE (mc(len, len), stat=ierr)
464 CALL random_number(xdum)
466 CALL random_number(xdum)
468 ntim = nint(1.e8_dp/(2.0_dp*real(len, kind=
dp)**3))
470 ntim = min(ntim, 1000)
474 CALL dgemm(
"N",
"N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
479 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
484 IF (para_env%is_source())
THEN
485 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
486 " Matrix multiply test: c = a * b Size = ", len, perf,
" Mflop/s"
491 CALL dgemm(
"N",
"N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
496 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
501 IF (para_env%is_source())
THEN
502 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
503 " Matrix multiply test: a = a * b Size = ", len, perf,
" Mflop/s"
508 CALL dgemm(
"N",
"T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
513 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
518 IF (para_env%is_source())
THEN
519 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
520 " Matrix multiply test: c = a * b(T) Size = ", len, perf,
" Mflop/s"
525 CALL dgemm(
"T",
"N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
530 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
535 IF (para_env%is_source())
THEN
536 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
537 " Matrix multiply test: c = a(T) * b Size = ", len, perf,
" Mflop/s"
548 END SUBROUTINE matmul_test
560 SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
563 INTEGER :: iw, fftw_plan_type
564 CHARACTER(LEN=*),
INTENT(IN) :: wisdom_file
566 INTEGER,
PARAMETER :: ndate(3) = (/12, 48, 96/)
568 INTEGER :: iall, ierr, it, j, len, n(3), ntim, &
569 radix_in, radix_out, siz, stat
570 COMPLEX(KIND=dp),
DIMENSION(4, 4, 4) :: zz
571 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: ca, cb, cc
572 CHARACTER(LEN=7) :: method
573 REAL(kind=
dp) :: flops, perf, scale, t, tdiff, tend, &
575 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: ra
579 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of 3D-FFT "
580 siz = abs(runtest(3))
587 CALL init_fft(
"FFTSG", alltoall=.false., fftsg_sizes=.true., wisdom_file=wisdom_file, &
588 pool_limit=10, plan_style=fftw_plan_type)
593 CALL init_fft(
"FFTW3", alltoall=.false., fftsg_sizes=.true., wisdom_file=wisdom_file, &
594 pool_limit=10, plan_style=fftw_plan_type)
606 IF (16.0_dp*real(len*len*len, kind=
dp) > max_memory*0.5_dp)
EXIT
607 ALLOCATE (ra(len, len, len), stat=ierr)
608 ALLOCATE (ca(len, len, len), stat=ierr)
609 CALL random_number(ra)
611 CALL random_number(ra)
612 ca(:, :, :) = ca + cmplx(0.0_dp, 1.0_dp, kind=
dp)*ra
613 flops = real(len**3, kind=
dp)*15.0_dp*log(real(len, kind=
dp))
614 ntim = nint(siz*1.e7_dp/flops)
616 ntim = min(ntim, 200)
617 scale = 1.0_dp/real(len**3, kind=
dp)
626 perf = real(ntim, kind=
dp)*2.0_dp*flops*1.e-6_dp/t
631 IF (para_env%is_source())
THEN
632 WRITE (iw,
'(T2,A,A,i6,T59,F14.4,A)') &
633 adjustr(method),
" test (in-place) Size = ", len, perf,
" Mflop/s"
638 IF (para_env%is_source())
WRITE (iw, *)
642 ALLOCATE (ra(len, len, len))
643 ALLOCATE (ca(len, len, len))
644 ALLOCATE (cb(len, len, len))
645 ALLOCATE (cc(len, len, len))
646 CALL random_number(ra)
648 CALL random_number(ra)
649 ca(:, :, :) = ca + cmplx(0.0_dp, 1.0_dp, kind=
dp)*ra
652 tdiff = maxval(abs(ca - cc))
653 IF (tdiff > 1.0e-12_dp)
THEN
654 IF (para_env%is_source()) &
655 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" FWFFT ", &
656 " Input array is changed in out-of-place FFT !"
658 IF (para_env%is_source()) &
659 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" FWFFT ", &
660 " Input array is not changed in out-of-place FFT !"
664 tdiff = maxval(abs(ca - cc))
665 IF (tdiff > 1.0e-12_dp)
THEN
666 IF (para_env%is_source()) &
667 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" BWFFT ", &
668 " Input array is changed in out-of-place FFT !"
670 IF (para_env%is_source()) &
671 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" BWFFT ", &
672 " Input array is not changed in out-of-place FFT !"
674 IF (para_env%is_source())
WRITE (iw, *)
684 END SUBROUTINE fft_test
696 SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
703 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_pw_transfer_test'
705 INTEGER :: halo_size, handle, i_loop, n_loop, ns_max
706 INTEGER,
DIMENSION(3) :: no, np
707 INTEGER,
DIMENSION(:),
POINTER :: i_vals
709 REAL(kind=
dp) :: tend, tstart
718 CALL timeset(routinen, handle)
721 CALL init_fft(globenv%default_fft_library, alltoall=.false., fftsg_sizes=.true., &
722 pool_limit=globenv%fft_pool_scratch_limit, &
723 wisdom_file=globenv%fftw_wisdom_file_name, &
724 plan_style=globenv%fftw_plan_type)
729 box%hmat = reshape((/20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
730 0.0_dp, 0.0_dp, 20.0_dp/), (/3, 3/))
746 ns_max = 2*halo_size + 1
747 CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, (/-1, -1, -1/))
757 CALL random_number(rs_grid%r)
763 IF (para_env%is_source())
THEN
764 WRITE (iw,
'(T2,A)')
""
765 WRITE (iw,
'(T2,A)')
"Timing rs_pw_transfer routine"
766 WRITE (iw,
'(T2,A)')
""
767 WRITE (iw,
'(T2,A)')
"iteration time[s]"
769 DO i_loop = 1, n_loop
779 IF (para_env%is_source())
THEN
780 WRITE (iw,
'(T2,I9,1X,F12.6)') i_loop, tend - tstart
790 CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
792 CALL timestop(handle)
794 END SUBROUTINE rs_pw_transfer_test
807 SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
814 REAL(kind=
dp),
PARAMETER :: toler = 1.e-11_dp
816 INTEGER :: blocked_id, grid_span, i_layout, i_rep, &
817 ig, ip, itmp, n_loop, n_rep, nn, p, q
818 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: layouts
819 INTEGER,
DIMENSION(2) :: distribution_layout
820 INTEGER,
DIMENSION(3) :: no, np
821 INTEGER,
DIMENSION(:),
POINTER :: i_vals
822 LOGICAL :: debug, is_fullspace, odd, &
823 pw_grid_layout_all, spherical
824 REAL(kind=
dp) :: em, et, flops, gsq, perf, t, t_max, &
826 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: t_end, t_start
834 CALL init_fft(globenv%default_fft_library, alltoall=.false., fftsg_sizes=.true., &
835 pool_limit=globenv%fft_pool_scratch_limit, &
836 wisdom_file=globenv%fftw_wisdom_file_name, &
837 plan_style=globenv%fftw_plan_type)
842 box%hmat = reshape((/10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
843 0.0_dp, 0.0_dp, 7.0_dp/), (/3, 3/))
851 ALLOCATE (t_start(n_loop))
852 ALLOCATE (t_end(n_loop))
858 CALL section_vals_val_get(pw_transfer_section,
"PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
862 l_val=pw_grid_layout_all)
865 IF (pw_grid_layout_all)
THEN
869 DO p = 2, para_env%num_pe
870 q = para_env%num_pe/p
871 IF (p*q == para_env%num_pe)
THEN
876 ALLOCATE (layouts(2, itmp))
878 DO p = 2, para_env%num_pe
879 q = para_env%num_pe/p
880 IF (p*q == para_env%num_pe)
THEN
882 layouts(:, itmp) = (/p, q/)
886 CALL section_vals_val_get(pw_transfer_section,
"PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
887 ALLOCATE (layouts(2, 1))
888 layouts(:, 1) = i_vals
891 DO i_layout = 1,
SIZE(layouts, 2)
893 distribution_layout = layouts(:, i_layout)
901 is_fullspace = .false.
904 is_fullspace = .true.
907 is_fullspace = .false.
915 ELSE IF (is_fullspace)
THEN
926 CALL pw_grid_create(grid, para_env, box%hmat, grid_span=grid_span, odd=odd, spherical=spherical, &
927 blocked=blocked_id, npts=np, fft_usage=.true., &
928 rs_dims=distribution_layout, iounit=iw)
946 ca%array(ig) = exp(-gsq)
949 flops = product(no)*30.0_dp*log(real(maxval(no), kind=
dp))
962 perf = real(n_loop, kind=
dp)*2.0_dp*flops*1.e-6_dp/t
967 em = maxval(abs(ca%array(:) - cc%array(:)))
968 CALL para_env%max(em)
969 et = sum(abs(ca%array(:) - cc%array(:)))
970 CALL para_env%sum(et)
971 t_min = minval(t_end - t_start)
972 t_max = maxval(t_end - t_start)
974 IF (para_env%is_source())
THEN
976 WRITE (iw,
'(A,T67,E14.6)')
" Parallel FFT Tests: Maximal Error ", em
977 WRITE (iw,
'(A,T67,E14.6)')
" Parallel FFT Tests: Total Error ", et
978 WRITE (iw,
'(A,T67,F14.0)') &
979 " Parallel FFT Tests: Performance [Mflops] ", perf
980 WRITE (iw,
'(A,T67,F14.6)')
" Best time : ", t_min
981 WRITE (iw,
'(A,T67,F14.6)')
" Worst time: ", t_max
986 IF (em > toler .OR. et > toler)
THEN
987 cpwarn(
"The FFT results are not accurate ... starting debug pw_transfer")
1001 DEALLOCATE (layouts)
1002 DEALLOCATE (t_start)
1009 CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
1011 END SUBROUTINE pw_fft_test
1022 SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
1028 INTEGER :: diag_method, i, i_loop, i_rep, &
1029 init_method, j, n, n_loop, n_rep, &
1031 REAL(kind=
dp) :: t1, t2
1032 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: eigenvalues
1033 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer
1036 TYPE(
cp_fm_type) :: eigenvectors, matrix, work
1040 WRITE (unit=iw, fmt=
"(/,/,T2,A,/)")
"EIGENSOLVER TEST"
1055 CALL section_vals_val_get(eigensolver_section,
"DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
1056 CALL section_vals_val_get(eigensolver_section,
"INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
1060 IF (neig < 0) neig = n
1065 WRITE (iw, *)
"Matrix size", n
1066 WRITE (iw, *)
"Number of eigenvalues", neig
1067 WRITE (iw, *)
"Timing loops", n_loop
1068 SELECT CASE (diag_method)
1070 WRITE (iw, *)
"Diag using syevd"
1072 WRITE (iw, *)
"Diag using syevx"
1077 SELECT CASE (init_method)
1079 WRITE (iw, *)
"using random matrix"
1081 WRITE (iw, *)
"reading from file"
1090 para_env=para_env, &
1091 context=blacs_env, &
1097 matrix_struct=fmstruct, &
1102 matrix_struct=fmstruct, &
1103 name=
"EIGENVECTORS")
1107 matrix_struct=fmstruct, &
1111 ALLOCATE (eigenvalues(n))
1112 eigenvalues = 0.0_dp
1113 ALLOCATE (buffer(1, n))
1116 IF (para_env%is_source())
THEN
1117 SELECT CASE (init_method)
1120 name=
"rng_stream", &
1122 extended_precision=.true.)
1125 file_action=
"READ", &
1126 file_form=
"FORMATTED", &
1127 file_status=
"OLD", &
1128 unit_number=unit_number)
1133 IF (para_env%is_source())
THEN
1134 SELECT CASE (init_method)
1137 buffer(1, j) = rng_stream%next() - 0.5_dp
1142 READ (unit=unit_number, fmt=*) buffer(1, 1:n)
1145 CALL para_env%bcast(buffer)
1146 SELECT CASE (init_method)
1149 new_values=buffer, &
1158 new_values=buffer, &
1168 new_values=buffer, &
1181 IF (para_env%is_source())
THEN
1182 SELECT CASE (init_method)
1188 DO i_loop = 1, n_loop
1189 eigenvalues = 0.0_dp
1196 SELECT CASE (diag_method)
1199 eigenvectors=eigenvectors, &
1200 eigenvalues=eigenvalues)
1203 eigenvectors=eigenvectors, &
1204 eigenvalues=eigenvalues, &
1209 IF (iw > 0)
WRITE (iw, *)
"Timing for loop ", i_loop,
" : ", t2 - t1
1213 WRITE (iw, *)
"Eigenvalues: "
1214 WRITE (unit=iw, fmt=
"(T3,5F14.6)") eigenvalues(1:neig)
1215 WRITE (unit=iw, fmt=
"(T3,A4,F16.6)")
"Sum:", sum(eigenvalues(1:neig))
1220 DEALLOCATE (eigenvalues)
1230 END SUBROUTINE eigensolver_test
1238 SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
1244 CHARACTER(LEN=1) :: transa, transb
1245 INTEGER :: i_loop, i_rep, k, m, n, n_loop, n_rep, ncol_block, ncol_block_actual, &
1246 ncol_global, np, nrow_block, nrow_block_actual, nrow_global
1247 INTEGER,
DIMENSION(:),
POINTER :: grid_2d
1248 LOGICAL :: force_blocksize, row_major, transa_p, &
1250 REAL(kind=
dp) :: t1, t2, t3, t4
1253 TYPE(
cp_fm_type) :: matrix_a, matrix_b, matrix_c
1259 CALL section_vals_val_get(cp_fm_gemm_test_section,
"N_loop", i_rep_section=i_rep, i_val=n_loop)
1265 CALL section_vals_val_get(cp_fm_gemm_test_section,
"transa", i_rep_section=i_rep, l_val=transa_p)
1266 CALL section_vals_val_get(cp_fm_gemm_test_section,
"transb", i_rep_section=i_rep, l_val=transb_p)
1267 CALL section_vals_val_get(cp_fm_gemm_test_section,
"nrow_block", i_rep_section=i_rep, i_val=nrow_block)
1268 CALL section_vals_val_get(cp_fm_gemm_test_section,
"ncol_block", i_rep_section=i_rep, i_val=ncol_block)
1269 CALL section_vals_val_get(cp_fm_gemm_test_section,
"ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
1270 CALL section_vals_val_get(cp_fm_gemm_test_section,
"GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
1271 CALL section_vals_val_get(cp_fm_gemm_test_section,
"FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
1274 IF (transa_p) transa =
"T"
1275 IF (transb_p) transb =
"T"
1278 WRITE (iw,
'(T2,A)')
"----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
1279 WRITE (iw,
'(T2,A)',
advance=
"NO")
"C = "
1281 WRITE (iw,
'(A)',
advance=
"NO")
"TRANSPOSE(A) x"
1283 WRITE (iw,
'(A)',
advance=
"NO")
"A x "
1286 WRITE (iw,
'(A)')
"TRANSPOSE(B) "
1288 WRITE (iw,
'(A)')
"B "
1290 WRITE (iw,
'(T2,A,T50,I5,A,I5)')
'requested block size', nrow_block,
' by ', ncol_block
1291 WRITE (iw,
'(T2,A,T50,I5)')
'number of repetitions of cp_fm_gemm ', n_loop
1292 WRITE (iw,
'(T2,A,T50,L5)')
'Row Major', row_major
1293 WRITE (iw,
'(T2,A,T50,2I7)')
'GRID_2D ', grid_2d
1294 WRITE (iw,
'(T2,A,T50,L5)')
'Force blocksize ', force_blocksize
1298 WRITE (iw,
'(T2,A,T50,I5)')
'PILAENV blocksize', np
1304 para_env=para_env, &
1305 row_major=row_major, &
1308 NULLIFY (fmstruct_a)
1310 nrow_global = m; ncol_global = k
1312 nrow_global = k; ncol_global = m
1315 nrow_global=nrow_global, ncol_global=ncol_global, &
1316 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1317 CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1318 IF (iw > 0)
WRITE (iw,
'(T2,A,I9,A,I9,A,I5,A,I5)')
'matrix A ', nrow_global,
" by ", ncol_global, &
1319 ' using blocks of ', nrow_block_actual,
' by ', ncol_block_actual
1322 nrow_global = n; ncol_global = m
1324 nrow_global = m; ncol_global = n
1326 NULLIFY (fmstruct_b)
1328 nrow_global=nrow_global, ncol_global=ncol_global, &
1329 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1330 CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1331 IF (iw > 0)
WRITE (iw,
'(T2,A,I9,A,I9,A,I5,A,I5)')
'matrix B ', nrow_global,
" by ", ncol_global, &
1332 ' using blocks of ', nrow_block_actual,
' by ', ncol_block_actual
1334 NULLIFY (fmstruct_c)
1338 nrow_global=nrow_global, ncol_global=ncol_global, &
1339 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1340 CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1341 IF (iw > 0)
WRITE (iw,
'(T2,A,I9,A,I9,A,I5,A,I5)')
'matrix C ', nrow_global,
" by ", ncol_global, &
1342 ' using blocks of ', nrow_block_actual,
' by ', ncol_block_actual
1344 CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name=
"MATRIX A")
1345 CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name=
"MATRIX B")
1346 CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name=
"MATRIX C")
1348 CALL random_number(matrix_a%local_data)
1349 CALL random_number(matrix_b%local_data)
1350 CALL random_number(matrix_c%local_data)
1355 DO i_loop = 1, n_loop
1357 CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
1360 WRITE (iw,
'(T2,A,T50,F12.6)')
"cp_fm_gemm timing: ", (t4 - t3)
1367 WRITE (iw,
'(T2,A,T50,F12.6)')
"average cp_fm_gemm timing: ", (t2 - t1)/n_loop
1369 WRITE (iw,
'(T2,A,T50,F12.6)')
"cp_fm_gemm Gflops per MPI task: ", &
1370 2*real(m, kind=
dp)*real(n, kind=
dp)*real(k, kind=
dp)*n_loop/max(0.001_dp, t2 - t1)/1.0e9_dp/para_env%num_pe
1384 END SUBROUTINE cp_fm_gemm_test
1392 SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
1398 CHARACTER,
DIMENSION(3) :: types
1399 INTEGER :: data_type, i_rep, k, m, n, n_loop, &
1401 INTEGER,
DIMENSION(:),
POINTER :: bs_k, bs_m, bs_n, nproc
1402 LOGICAL :: always_checksum, retain_sparsity, &
1404 REAL(kind=
dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1408 NULLIFY (bs_m, bs_n, bs_k)
1410 CALL dbcsr_reset_randmat_seed()
1429 CALL section_vals_val_get(input_section,
"keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1444 i_rep_section=i_rep, r_val=filter_eps)
1445 CALL section_vals_val_get(input_section,
"ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1447 CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
1449 (/transa_p, transb_p/), &
1451 (/s_a, s_b, s_c/), &
1453 data_type=data_type, &
1454 test_type=test_type, &
1455 n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
1456 always_checksum=always_checksum)
1458 END SUBROUTINE cp_dbcsr_tests
1466 SUBROUTINE run_dbm_tests(para_env, iw, input_section)
1472 INTEGER :: i_rep, k, m, n, n_loop, n_rep
1473 INTEGER,
DIMENSION(:),
POINTER :: bs_k, bs_m, bs_n
1474 LOGICAL :: always_checksum, retain_sparsity, &
1476 REAL(kind=
dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1480 NULLIFY (bs_m, bs_n, bs_k)
1482 CALL dbcsr_reset_randmat_seed()
1493 CALL section_vals_val_get(input_section,
"keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1500 CALL section_vals_val_get(input_section,
"ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1504 matrix_sizes=(/m, n, k/), &
1505 trs=(/transa_p, transb_p/), &
1509 sparsities=(/s_a, s_b, s_c/), &
1514 retain_sparsity=retain_sparsity, &
1515 always_checksum=always_checksum)
1517 END SUBROUTINE run_dbm_tests
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Test of Electron Repulsion Routines (ERI)
real(kind=dp), parameter threshold
subroutine, public eri_test(iw)
...
Handles all functions related to the CELL.
subroutine, public init_cell(cell, hmat, periodic)
Initialise/readjust a simulation cell after hmat has been changed.
subroutine, public cell_create(cell, hmat, periodic, tag)
allocates and initializes a cell
Handles all functions related to the CELL.
subroutine, public cell_release(cell)
releases the given cell (see doc/ReferenceCounting.html)
Test of Clebsch-Gordon Coefficients.
subroutine, public clebsch_gordon_test()
...
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Interface to Minimax-Ewald method for periodic ERI's to be used in CP2K.
subroutine, public cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
...
Utility routines to open and close files. Tracking of preconnections.
subroutine, public open_file(file_name, file_status, file_form, file_action, file_position, file_pad, unit_number, debug, skip_get_unit_number, file_access)
Opens the requested file using a free unit number.
subroutine, public close_file(unit_number, file_status, keep_preconnection)
Close an open file given by its logical unit number. Optionally, keep the file and unit preconnected.
basic linear algebra operations for full matrices
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
subroutine, public cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack....
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
integer function, public cp_fm_pilaenv(ictxt, prec)
...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
subroutine, public init_input_type(input_settings, nsmax, rs_grid_section, ilevel, higher_grid_layout)
parses an input section to assign the proper values to the input type
subroutine, public dbm_run_tests(mp_group, io_unit, matrix_sizes, trs, bs_m, bs_n, bs_k, sparsities, alpha, beta, n_loops, eps, retain_sparsity, always_checksum)
Tests the DBM library.
Define type storing the global information of a run. Keep the amount of stored data small....
Defines the basic variable types.
integer, parameter, public dp
Performance tests for basic tasks like matrix multiplies, copy, fft.
subroutine, public lib_test(root_section, para_env, globenv)
Master routine for tests.
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Interface to the message passing library MPI.
Routines to calculate the minimax coefficients in order to approximate 1/x as a sum over exponential ...
subroutine, public validate_exp_minimax(n_r, iw)
Unit test checking that numerical error of minimax approximations generated using any k15 or k53 coef...
Routines to calculate frequency and time grids (integration points and weights) for correlation metho...
subroutine, public test_least_square_ft(nr, iw)
test the singular value decomposition for the computation of integration weights for the Fourier tran...
Interface to the message passing library MPI.
subroutine, public mpi_perf_test(comm, npow, output_unit)
Tests the MPI library.
basic linear algebra operations for full matrixes
Parallel (pseudo)random number generator (RNG) for multiple streams and substreams of random numbers.
subroutine advance(self, e, c)
Advance the state by n steps, i.e. jump n steps forward, if n > 0, or backward if n < 0.
integer, parameter, public uniform
integer, parameter, public halfspace
integer, parameter, public fullspace
This module defines the grid data type and some basic operations on it.
subroutine, public pw_grid_release(pw_grid)
releases the given pw grid
subroutine, public rs_grid_print(rs, iounit)
Print information on grids to output.
subroutine, public rs_grid_create(rs, desc)
...
subroutine, public rs_grid_create_descriptor(desc, pw_grid, input_settings, border_points)
Determine the setup of real space grids - this is divided up into the creation of a descriptor and th...
subroutine, public transfer_pw2rs(rs, pw)
...
subroutine, public rs_grid_release_descriptor(rs_desc)
releases the given rs grid descriptor (see doc/ReferenceCounting.html)
subroutine, public transfer_rs2pw(rs, pw)
...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
subroutine, public rs_grid_zero(rs)
Initialize grid to zero.
Calculates 2-center integrals for different r12 operators comparing the Solid harmonic Gaussian integ...
subroutine, public shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
Unit test for performance and accuracy of the SHG integrals.
Type defining parameters related to the simulation cell.
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
type of a logger, at the moment it contains just a print level starting at which level it should be l...
contains the initially parsed file and the initial parallel environment
stores all the informations relevant to an mpi environment