51 USE dbcsr_api,
ONLY: dbcsr_reset_randmat_seed,&
95 realspace_grid_desc_type, realspace_grid_input_type, realspace_grid_type,
rs_grid_create, &
99 #include "./base/base_uses.f90"
106 INTEGER :: runtest(100)
107 REAL(KIND=
dp) :: max_memory
108 REAL(KIND=
dp),
PARAMETER :: threshold = 1.0e-8_dp
109 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'library_tests'
122 SUBROUTINE lib_test(root_section, para_env, globenv)
124 TYPE(section_vals_type),
POINTER :: root_section
125 TYPE(mp_para_env_type),
POINTER :: para_env
126 TYPE(global_environment_type),
POINTER :: globenv
128 CHARACTER(LEN=*),
PARAMETER :: routinen =
'lib_test'
130 INTEGER :: handle, iw
132 TYPE(cp_logger_type),
POINTER :: logger
133 TYPE(section_vals_type),
POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
134 dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
135 rs_pw_transfer_section, shg_integrals_test_section
137 CALL timeset(routinen, handle)
143 WRITE (iw,
'(T2,79("*"))')
144 WRITE (iw,
'(A,T31,A,T80,A)')
' *',
' PERFORMANCE TESTS ',
'*'
145 WRITE (iw,
'(T2,79("*"))')
148 CALL test_input(root_section, para_env)
150 IF (runtest(1) /= 0)
CALL copy_test(para_env, iw)
152 IF (runtest(2) /= 0)
CALL matmul_test(para_env, test_matmul=.true., test_dgemm=.false., iw=iw)
153 IF (runtest(5) /= 0)
CALL matmul_test(para_env, test_matmul=.false., test_dgemm=.true., iw=iw)
155 IF (runtest(3) /= 0)
CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
156 globenv%fftw_wisdom_file_name)
158 IF (runtest(4) /= 0)
CALL eri_test(iw)
164 IF (runtest(8) /= 0)
CALL mpi_perf_test(para_env, runtest(8), iw)
174 CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
180 CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
186 CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
192 CALL eigensolver_test(para_env, iw, eigensolver_section)
211 CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
218 CALL run_dbm_tests(para_env, iw, dbm_test_section)
223 CALL timestop(handle)
248 SUBROUTINE test_input(root_section, para_env)
249 TYPE(section_vals_type),
POINTER :: root_section
250 TYPE(mp_para_env_type),
POINTER :: para_env
252 TYPE(section_vals_type),
POINTER :: test_section
272 END SUBROUTINE test_input
286 SUBROUTINE copy_test(para_env, iw)
287 TYPE(mp_para_env_type),
POINTER :: para_env
290 INTEGER :: i, ierr, j, len, ntim, siz
291 REAL(kind=
dp) :: perf, t, tend, tstart
292 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: ca, cb
296 siz = abs(runtest(1))
297 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of copy ( F95 ) "
300 IF (8.0_dp*real(len, kind=
dp) > max_memory*0.5_dp)
EXIT
301 ALLOCATE (ca(len), stat=ierr)
303 ALLOCATE (cb(len), stat=ierr)
306 CALL random_number(ca)
307 ntim = nint(1.e7_dp/real(len, kind=
dp))
309 ntim = min(ntim, siz*10000)
314 ca(1) = real(j, kind=
dp)
317 t = tend - tstart + threshold
319 perf = real(ntim, kind=
dp)*real(len, kind=
dp)*1.e-6_dp/t
324 IF (para_env%is_source())
THEN
325 WRITE (iw,
'(A,i2,i10,A,T59,F14.4,A)')
" Copy test: Size = 2^", i, &
326 len/1024,
" Kwords", perf,
" Mcopy/s"
333 END SUBROUTINE copy_test
346 SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
347 TYPE(mp_para_env_type),
POINTER :: para_env
348 LOGICAL :: test_matmul, test_dgemm
351 INTEGER :: i, ierr, j, len, ntim, siz
352 REAL(kind=
dp) :: perf, t, tend, tstart, xdum
353 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: ma, mb, mc
357 IF (test_matmul)
THEN
358 siz = abs(runtest(2))
359 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of matmul ( F95 ) "
362 IF (8.0_dp*real(len*len, kind=
dp) > max_memory*0.3_dp)
EXIT
363 ALLOCATE (ma(len, len), stat=ierr)
365 ALLOCATE (mb(len, len), stat=ierr)
367 ALLOCATE (mc(len, len), stat=ierr)
371 CALL random_number(xdum)
373 CALL random_number(xdum)
375 ntim = nint(1.e8_dp/(2.0_dp*real(len, kind=
dp)**3))
377 ntim = min(ntim, siz*200)
380 mc(:, :) = matmul(ma, mb)
381 ma(1, 1) = real(j, kind=
dp)
384 t = tend - tstart + threshold
385 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
386 IF (para_env%is_source())
THEN
387 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
388 " Matrix multiply test: c = a * b Size = ", len, perf,
" Mflop/s"
392 mc(:, :) = mc + matmul(ma, mb)
393 ma(1, 1) = real(j, kind=
dp)
396 t = tend - tstart + threshold
398 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
403 IF (para_env%is_source())
THEN
404 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
405 " Matrix multiply test: a = a * b Size = ", len, perf,
" Mflop/s"
410 mc(:, :) = mc + matmul(ma, transpose(mb))
411 ma(1, 1) = real(j, kind=
dp)
414 t = tend - tstart + threshold
416 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
421 IF (para_env%is_source())
THEN
422 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
423 " Matrix multiply test: c = a * b(T) Size = ", len, perf,
" Mflop/s"
428 mc(:, :) = mc + matmul(transpose(ma), mb)
429 ma(1, 1) = real(j, kind=
dp)
432 t = tend - tstart + threshold
434 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
439 IF (para_env%is_source())
THEN
440 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
441 " Matrix multiply test: c = a(T) * b Size = ", len, perf,
" Mflop/s"
452 siz = abs(runtest(5))
453 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of matmul ( BLAS ) "
456 IF (8.0_dp*real(len*len, kind=
dp) > max_memory*0.3_dp)
EXIT
457 ALLOCATE (ma(len, len), stat=ierr)
459 ALLOCATE (mb(len, len), stat=ierr)
461 ALLOCATE (mc(len, len), stat=ierr)
465 CALL random_number(xdum)
467 CALL random_number(xdum)
469 ntim = nint(1.e8_dp/(2.0_dp*real(len, kind=
dp)**3))
471 ntim = min(ntim, 1000)
475 CALL dgemm(
"N",
"N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
478 t = tend - tstart + threshold
480 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
485 IF (para_env%is_source())
THEN
486 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
487 " Matrix multiply test: c = a * b Size = ", len, perf,
" Mflop/s"
492 CALL dgemm(
"N",
"N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
495 t = tend - tstart + threshold
497 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
502 IF (para_env%is_source())
THEN
503 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
504 " Matrix multiply test: a = a * b Size = ", len, perf,
" Mflop/s"
509 CALL dgemm(
"N",
"T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
512 t = tend - tstart + threshold
514 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
519 IF (para_env%is_source())
THEN
520 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
521 " Matrix multiply test: c = a * b(T) Size = ", len, perf,
" Mflop/s"
526 CALL dgemm(
"T",
"N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
529 t = tend - tstart + threshold
531 perf = real(ntim, kind=
dp)*2.0_dp*real(len, kind=
dp)**3*1.e-6_dp/t
536 IF (para_env%is_source())
THEN
537 WRITE (iw,
'(A,i6,T59,F14.4,A)') &
538 " Matrix multiply test: c = a(T) * b Size = ", len, perf,
" Mflop/s"
549 END SUBROUTINE matmul_test
561 SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
563 TYPE(mp_para_env_type),
POINTER :: para_env
564 INTEGER :: iw, fftw_plan_type
565 CHARACTER(LEN=*),
INTENT(IN) :: wisdom_file
567 INTEGER,
PARAMETER :: ndate(3) = (/12, 48, 96/)
569 INTEGER :: iall, ierr, it, j, len, n(3), ntim, &
570 radix_in, radix_out, siz, stat
571 COMPLEX(KIND=dp),
DIMENSION(4, 4, 4) :: zz
572 COMPLEX(KIND=dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: ca, cb, cc
573 CHARACTER(LEN=7) :: method
574 REAL(kind=
dp) :: flops, perf, scale, t, tdiff, tend, &
576 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :) :: ra
580 IF (para_env%is_source())
WRITE (iw,
'(//,A,/)')
" Test of 3D-FFT "
581 siz = abs(runtest(3))
588 CALL init_fft(
"FFTSG", alltoall=.false., fftsg_sizes=.true., wisdom_file=wisdom_file, &
589 pool_limit=10, plan_style=fftw_plan_type)
594 CALL init_fft(
"FFTW3", alltoall=.false., fftsg_sizes=.true., wisdom_file=wisdom_file, &
595 pool_limit=10, plan_style=fftw_plan_type)
600 CALL fft3d(1, n, zz, status=stat)
607 IF (16.0_dp*real(len*len*len, kind=
dp) > max_memory*0.5_dp)
EXIT
608 ALLOCATE (ra(len, len, len), stat=ierr)
609 ALLOCATE (ca(len, len, len), stat=ierr)
610 CALL random_number(ra)
612 CALL random_number(ra)
613 ca(:, :, :) = ca + cmplx(0.0_dp, 1.0_dp, kind=
dp)*ra
614 flops = real(len**3, kind=
dp)*15.0_dp*log(real(len, kind=
dp))
615 ntim = nint(siz*1.e7_dp/flops)
617 ntim = min(ntim, 200)
618 scale = 1.0_dp/real(len**3, kind=
dp)
621 CALL fft3d(
fwfft, n, ca)
622 CALL fft3d(
bwfft, n, ca, scale=scale)
625 t = tend - tstart + threshold
627 perf = real(ntim, kind=
dp)*2.0_dp*flops*1.e-6_dp/t
632 IF (para_env%is_source())
THEN
633 WRITE (iw,
'(T2,A,A,i6,T59,F14.4,A)') &
634 adjustr(method),
" test (in-place) Size = ", len, perf,
" Mflop/s"
639 IF (para_env%is_source())
WRITE (iw, *)
643 ALLOCATE (ra(len, len, len))
644 ALLOCATE (ca(len, len, len))
645 ALLOCATE (cb(len, len, len))
646 ALLOCATE (cc(len, len, len))
647 CALL random_number(ra)
649 CALL random_number(ra)
650 ca(:, :, :) = ca + cmplx(0.0_dp, 1.0_dp, kind=
dp)*ra
652 CALL fft3d(
fwfft, n, ca, cb)
653 tdiff = maxval(abs(ca - cc))
654 IF (tdiff > 1.0e-12_dp)
THEN
655 IF (para_env%is_source()) &
656 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" FWFFT ", &
657 " Input array is changed in out-of-place FFT !"
659 IF (para_env%is_source()) &
660 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" FWFFT ", &
661 " Input array is not changed in out-of-place FFT !"
664 CALL fft3d(
bwfft, n, ca, cb)
665 tdiff = maxval(abs(ca - cc))
666 IF (tdiff > 1.0e-12_dp)
THEN
667 IF (para_env%is_source()) &
668 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" BWFFT ", &
669 " Input array is changed in out-of-place FFT !"
671 IF (para_env%is_source()) &
672 WRITE (iw,
'(T2,A,A,A)') adjustr(method),
" BWFFT ", &
673 " Input array is not changed in out-of-place FFT !"
675 IF (para_env%is_source())
WRITE (iw, *)
685 END SUBROUTINE fft_test
697 SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
699 TYPE(mp_para_env_type),
POINTER :: para_env
701 TYPE(global_environment_type),
POINTER :: globenv
702 TYPE(section_vals_type),
POINTER :: rs_pw_transfer_section
704 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rs_pw_transfer_test'
706 INTEGER :: halo_size, handle, i_loop, n_loop, ns_max
707 INTEGER,
DIMENSION(3) :: no, np
708 INTEGER,
DIMENSION(:),
POINTER :: i_vals
710 REAL(kind=
dp) :: tend, tstart
711 TYPE(cell_type),
POINTER :: box
712 TYPE(pw_grid_type),
POINTER :: grid
713 TYPE(pw_r3d_rs_type) :: ca
714 TYPE(realspace_grid_desc_type),
POINTER :: rs_desc
715 TYPE(realspace_grid_input_type) :: input_settings
716 TYPE(realspace_grid_type) :: rs_grid
717 TYPE(section_vals_type),
POINTER :: rs_grid_section
719 CALL timeset(routinen, handle)
722 CALL init_fft(globenv%default_fft_library, alltoall=.false., fftsg_sizes=.true., &
723 pool_limit=globenv%fft_pool_scratch_limit, &
724 wisdom_file=globenv%fftw_wisdom_file_name, &
725 plan_style=globenv%fftw_plan_type)
730 box%hmat = reshape((/20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
731 0.0_dp, 0.0_dp, 20.0_dp/), (/3, 3/))
748 ns_max = 2*halo_size + 1
749 CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, (/-1, -1, -1/))
759 CALL random_number(rs_grid%r)
765 IF (para_env%is_source())
THEN
766 WRITE (iw,
'(T2,A)')
""
767 WRITE (iw,
'(T2,A)')
"Timing rs_pw_transfer routine"
768 WRITE (iw,
'(T2,A)')
""
769 WRITE (iw,
'(T2,A)')
"iteration time[s]"
771 DO i_loop = 1, n_loop
781 IF (para_env%is_source())
THEN
782 WRITE (iw,
'(T2,I9,1X,F12.6)') i_loop, tend - tstart
792 CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
794 CALL timestop(handle)
796 END SUBROUTINE rs_pw_transfer_test
809 SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
811 TYPE(mp_para_env_type),
POINTER :: para_env
813 TYPE(global_environment_type),
POINTER :: globenv
814 TYPE(section_vals_type),
POINTER :: pw_transfer_section
816 REAL(kind=
dp),
PARAMETER :: toler = 1.e-11_dp
818 INTEGER :: blocked_id, grid_span, i_layout, i_rep, &
819 ig, ip, itmp, n_loop, n_rep, nn, p, q
820 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: layouts
821 INTEGER,
DIMENSION(2) :: distribution_layout
822 INTEGER,
DIMENSION(3) :: no, np
823 INTEGER,
DIMENSION(:),
POINTER :: i_vals
824 LOGICAL :: debug, is_fullspace, odd, &
825 pw_grid_layout_all, spherical
826 REAL(kind=
dp) :: em, et, flops, gsq, perf, t, t_max, &
828 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: t_end, t_start
829 TYPE(cell_type),
POINTER :: box
830 TYPE(pw_c1d_gs_type) :: ca, cc
831 TYPE(pw_c3d_rs_type) :: cb
832 TYPE(pw_grid_type),
POINTER :: grid
836 CALL init_fft(globenv%default_fft_library, alltoall=.false., fftsg_sizes=.true., &
837 pool_limit=globenv%fft_pool_scratch_limit, &
838 wisdom_file=globenv%fftw_wisdom_file_name, &
839 plan_style=globenv%fftw_plan_type)
844 box%hmat = reshape((/10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
845 0.0_dp, 0.0_dp, 7.0_dp/), (/3, 3/))
853 ALLOCATE (t_start(n_loop))
854 ALLOCATE (t_end(n_loop))
860 CALL section_vals_val_get(pw_transfer_section,
"PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
864 l_val=pw_grid_layout_all)
867 IF (pw_grid_layout_all)
THEN
871 DO p = 2, para_env%num_pe
872 q = para_env%num_pe/p
873 IF (p*q == para_env%num_pe)
THEN
878 ALLOCATE (layouts(2, itmp))
880 DO p = 2, para_env%num_pe
881 q = para_env%num_pe/p
882 IF (p*q == para_env%num_pe)
THEN
884 layouts(:, itmp) = (/p, q/)
888 CALL section_vals_val_get(pw_transfer_section,
"PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
889 ALLOCATE (layouts(2, 1))
890 layouts(:, 1) = i_vals
893 DO i_layout = 1,
SIZE(layouts, 2)
895 distribution_layout = layouts(:, i_layout)
905 is_fullspace = .false.
908 is_fullspace = .true.
911 is_fullspace = .false.
919 ELSE IF (is_fullspace)
THEN
930 CALL pw_grid_setup(box%hmat, grid, grid_span=grid_span, odd=odd, spherical=spherical, &
931 blocked=blocked_id, npts=np, fft_usage=.true., &
932 rs_dims=distribution_layout, iounit=iw)
950 ca%array(ig) = exp(-gsq)
953 flops = product(no)*30.0_dp*log(real(maxval(no), kind=
dp))
958 CALL pw_transfer(ca, cb, debug)
959 CALL pw_transfer(cb, cc, debug)
964 t = tend - tstart + threshold
966 perf = real(n_loop, kind=
dp)*2.0_dp*flops*1.e-6_dp/t
971 em = maxval(abs(ca%array(:) - cc%array(:)))
972 CALL para_env%max(em)
973 et = sum(abs(ca%array(:) - cc%array(:)))
974 CALL para_env%sum(et)
975 t_min = minval(t_end - t_start)
976 t_max = maxval(t_end - t_start)
978 IF (para_env%is_source())
THEN
980 WRITE (iw,
'(A,T67,E14.6)')
" Parallel FFT Tests: Maximal Error ", em
981 WRITE (iw,
'(A,T67,E14.6)')
" Parallel FFT Tests: Total Error ", et
982 WRITE (iw,
'(A,T67,F14.0)') &
983 " Parallel FFT Tests: Performance [Mflops] ", perf
984 WRITE (iw,
'(A,T67,F14.6)')
" Best time : ", t_min
985 WRITE (iw,
'(A,T67,F14.6)')
" Worst time: ", t_max
990 IF (em > toler .OR. et > toler)
THEN
991 cpwarn(
"The FFT results are not accurate ... starting debug pw_transfer")
992 CALL pw_transfer(ca, cb, .true.)
993 CALL pw_transfer(cb, cc, .true.)
1005 DEALLOCATE (layouts)
1006 DEALLOCATE (t_start)
1013 CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
1015 END SUBROUTINE pw_fft_test
1026 SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
1028 TYPE(mp_para_env_type),
POINTER :: para_env
1030 TYPE(section_vals_type),
POINTER :: eigensolver_section
1032 INTEGER :: diag_method, i, i_loop, i_rep, &
1033 init_method, j, n, n_loop, n_rep, &
1035 REAL(kind=
dp) :: t1, t2
1036 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: eigenvalues
1037 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer
1038 TYPE(cp_blacs_env_type),
POINTER :: blacs_env
1039 TYPE(cp_fm_struct_type),
POINTER :: fmstruct
1040 TYPE(cp_fm_type) :: eigenvectors, matrix, work
1041 TYPE(rng_stream_type),
ALLOCATABLE :: rng_stream
1044 WRITE (unit=iw, fmt=
"(/,/,T2,A,/)")
"EIGENSOLVER TEST"
1059 CALL section_vals_val_get(eigensolver_section,
"DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
1060 CALL section_vals_val_get(eigensolver_section,
"INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
1064 IF (neig < 0) neig = n
1069 WRITE (iw, *)
"Matrix size", n
1070 WRITE (iw, *)
"Number of eigenvalues", neig
1071 WRITE (iw, *)
"Timing loops", n_loop
1072 SELECT CASE (diag_method)
1074 WRITE (iw, *)
"Diag using syevd"
1076 WRITE (iw, *)
"Diag using syevx"
1081 SELECT CASE (init_method)
1083 WRITE (iw, *)
"using random matrix"
1085 WRITE (iw, *)
"reading from file"
1094 para_env=para_env, &
1095 context=blacs_env, &
1101 matrix_struct=fmstruct, &
1106 matrix_struct=fmstruct, &
1107 name=
"EIGENVECTORS")
1111 matrix_struct=fmstruct, &
1115 ALLOCATE (eigenvalues(n))
1116 eigenvalues = 0.0_dp
1117 ALLOCATE (buffer(1, n))
1120 IF (para_env%is_source())
THEN
1121 SELECT CASE (init_method)
1123 rng_stream = rng_stream_type( &
1124 name=
"rng_stream", &
1126 extended_precision=.true.)
1129 file_action=
"READ", &
1130 file_form=
"FORMATTED", &
1131 file_status=
"OLD", &
1132 unit_number=unit_number)
1137 IF (para_env%is_source())
THEN
1138 SELECT CASE (init_method)
1141 buffer(1, j) = rng_stream%next() - 0.5_dp
1146 READ (unit=unit_number, fmt=*) buffer(1, 1:n)
1149 CALL para_env%bcast(buffer)
1150 SELECT CASE (init_method)
1153 new_values=buffer, &
1162 new_values=buffer, &
1172 new_values=buffer, &
1185 IF (para_env%is_source())
THEN
1186 SELECT CASE (init_method)
1192 DO i_loop = 1, n_loop
1193 eigenvalues = 0.0_dp
1195 CALL cp_fm_to_fm(source=matrix, &
1200 SELECT CASE (diag_method)
1203 eigenvectors=eigenvectors, &
1204 eigenvalues=eigenvalues)
1207 eigenvectors=eigenvectors, &
1208 eigenvalues=eigenvalues, &
1213 IF (iw > 0)
WRITE (iw, *)
"Timing for loop ", i_loop,
" : ", t2 - t1
1217 WRITE (iw, *)
"Eigenvalues: "
1218 WRITE (unit=iw, fmt=
"(T3,5F14.6)") eigenvalues(1:neig)
1219 WRITE (unit=iw, fmt=
"(T3,A4,F16.6)")
"Sum:", sum(eigenvalues(1:neig))
1224 DEALLOCATE (eigenvalues)
1225 CALL cp_fm_release(matrix=work)
1226 CALL cp_fm_release(matrix=eigenvectors)
1227 CALL cp_fm_release(matrix=matrix)
1234 END SUBROUTINE eigensolver_test
1242 SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
1244 TYPE(mp_para_env_type),
POINTER :: para_env
1246 TYPE(section_vals_type),
POINTER :: cp_fm_gemm_test_section
1248 CHARACTER(LEN=1) :: transa, transb
1249 INTEGER :: i_loop, i_rep, k, m, n, n_loop, n_rep, ncol_block, ncol_block_actual, &
1250 ncol_global, np, nrow_block, nrow_block_actual, nrow_global
1251 INTEGER,
DIMENSION(:),
POINTER :: grid_2d
1252 LOGICAL :: force_blocksize, row_major, transa_p, &
1254 REAL(kind=
dp) :: t1, t2, t3, t4
1255 TYPE(cp_blacs_env_type),
POINTER :: blacs_env
1256 TYPE(cp_fm_struct_type),
POINTER :: fmstruct_a, fmstruct_b, fmstruct_c
1257 TYPE(cp_fm_type) :: matrix_a, matrix_b, matrix_c
1263 CALL section_vals_val_get(cp_fm_gemm_test_section,
"N_loop", i_rep_section=i_rep, i_val=n_loop)
1269 CALL section_vals_val_get(cp_fm_gemm_test_section,
"transa", i_rep_section=i_rep, l_val=transa_p)
1270 CALL section_vals_val_get(cp_fm_gemm_test_section,
"transb", i_rep_section=i_rep, l_val=transb_p)
1271 CALL section_vals_val_get(cp_fm_gemm_test_section,
"nrow_block", i_rep_section=i_rep, i_val=nrow_block)
1272 CALL section_vals_val_get(cp_fm_gemm_test_section,
"ncol_block", i_rep_section=i_rep, i_val=ncol_block)
1273 CALL section_vals_val_get(cp_fm_gemm_test_section,
"ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
1274 CALL section_vals_val_get(cp_fm_gemm_test_section,
"GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
1275 CALL section_vals_val_get(cp_fm_gemm_test_section,
"FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
1278 IF (transa_p) transa =
"T"
1279 IF (transb_p) transb =
"T"
1282 WRITE (iw,
'(T2,A)')
"----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
1283 WRITE (iw,
'(T2,A)', advance=
"NO")
"C = "
1285 WRITE (iw,
'(A)', advance=
"NO")
"TRANSPOSE(A) x"
1287 WRITE (iw,
'(A)', advance=
"NO")
"A x "
1290 WRITE (iw,
'(A)')
"TRANSPOSE(B) "
1292 WRITE (iw,
'(A)')
"B "
1294 WRITE (iw,
'(T2,A,T50,I5,A,I5)')
'requested block size', nrow_block,
' by ', ncol_block
1295 WRITE (iw,
'(T2,A,T50,I5)')
'number of repetitions of cp_fm_gemm ', n_loop
1296 WRITE (iw,
'(T2,A,T50,L5)')
'Row Major', row_major
1297 WRITE (iw,
'(T2,A,T50,2I7)')
'GRID_2D ', grid_2d
1298 WRITE (iw,
'(T2,A,T50,L5)')
'Force blocksize ', force_blocksize
1302 WRITE (iw,
'(T2,A,T50,I5)')
'PILAENV blocksize', np
1308 para_env=para_env, &
1309 row_major=row_major, &
1312 NULLIFY (fmstruct_a)
1314 nrow_global = m; ncol_global = k
1316 nrow_global = k; ncol_global = m
1319 nrow_global=nrow_global, ncol_global=ncol_global, &
1320 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1321 CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1322 IF (iw > 0)
WRITE (iw,
'(T2,A,I9,A,I9,A,I5,A,I5)')
'matrix A ', nrow_global,
" by ", ncol_global, &
1323 ' using blocks of ', nrow_block_actual,
' by ', ncol_block_actual
1326 nrow_global = n; ncol_global = m
1328 nrow_global = m; ncol_global = n
1330 NULLIFY (fmstruct_b)
1332 nrow_global=nrow_global, ncol_global=ncol_global, &
1333 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1334 CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1335 IF (iw > 0)
WRITE (iw,
'(T2,A,I9,A,I9,A,I5,A,I5)')
'matrix B ', nrow_global,
" by ", ncol_global, &
1336 ' using blocks of ', nrow_block_actual,
' by ', ncol_block_actual
1338 NULLIFY (fmstruct_c)
1342 nrow_global=nrow_global, ncol_global=ncol_global, &
1343 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1344 CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1345 IF (iw > 0)
WRITE (iw,
'(T2,A,I9,A,I9,A,I5,A,I5)')
'matrix C ', nrow_global,
" by ", ncol_global, &
1346 ' using blocks of ', nrow_block_actual,
' by ', ncol_block_actual
1348 CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name=
"MATRIX A")
1349 CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name=
"MATRIX B")
1350 CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name=
"MATRIX C")
1352 CALL random_number(matrix_a%local_data)
1353 CALL random_number(matrix_b%local_data)
1354 CALL random_number(matrix_c%local_data)
1359 DO i_loop = 1, n_loop
1361 CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
1364 WRITE (iw,
'(T2,A,T50,F12.6)')
"cp_fm_gemm timing: ", (t4 - t3)
1371 WRITE (iw,
'(T2,A,T50,F12.6)')
"average cp_fm_gemm timing: ", (t2 - t1)/n_loop
1373 WRITE (iw,
'(T2,A,T50,F12.6)')
"cp_fm_gemm Gflops per MPI task: ", &
1374 2*real(m, kind=
dp)*real(n, kind=
dp)*real(k, kind=
dp)*n_loop/max(0.001_dp, t2 - t1)/1.0e9_dp/para_env%num_pe
1378 CALL cp_fm_release(matrix=matrix_a)
1379 CALL cp_fm_release(matrix=matrix_b)
1380 CALL cp_fm_release(matrix=matrix_c)
1388 END SUBROUTINE cp_fm_gemm_test
1396 SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
1398 TYPE(mp_para_env_type),
POINTER :: para_env
1400 TYPE(section_vals_type),
POINTER :: input_section
1402 CHARACTER,
DIMENSION(3) :: types
1403 INTEGER :: data_type, i_rep, k, m, n, n_loop, &
1405 INTEGER,
DIMENSION(:),
POINTER :: bs_k, bs_m, bs_n, nproc
1406 LOGICAL :: always_checksum, retain_sparsity, &
1408 REAL(kind=
dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1412 NULLIFY (bs_m, bs_n, bs_k)
1414 CALL dbcsr_reset_randmat_seed()
1433 CALL section_vals_val_get(input_section,
"keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1448 i_rep_section=i_rep, r_val=filter_eps)
1449 CALL section_vals_val_get(input_section,
"ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1451 CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
1453 (/transa_p, transb_p/), &
1455 (/s_a, s_b, s_c/), &
1457 data_type=data_type, &
1458 test_type=test_type, &
1459 n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
1460 always_checksum=always_checksum)
1462 END SUBROUTINE cp_dbcsr_tests
1470 SUBROUTINE run_dbm_tests(para_env, iw, input_section)
1472 TYPE(mp_para_env_type),
POINTER :: para_env
1474 TYPE(section_vals_type),
POINTER :: input_section
1476 INTEGER :: i_rep, k, m, n, n_loop, n_rep
1477 INTEGER,
DIMENSION(:),
POINTER :: bs_k, bs_m, bs_n
1478 LOGICAL :: always_checksum, retain_sparsity, &
1480 REAL(kind=
dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1484 NULLIFY (bs_m, bs_n, bs_k)
1486 CALL dbcsr_reset_randmat_seed()
1497 CALL section_vals_val_get(input_section,
"keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1504 CALL section_vals_val_get(input_section,
"ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1508 matrix_sizes=(/m, n, k/), &
1509 trs=(/transa_p, transb_p/), &
1513 sparsities=(/s_a, s_b, s_c/), &
1518 retain_sparsity=retain_sparsity, &
1519 always_checksum=always_checksum)
1521 END SUBROUTINE run_dbm_tests
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Test of Electron Repulsion Routines (ERI)
subroutine, public eri_test(iw)
...
Handles all functions related to the CELL.
subroutine, public init_cell(cell, hmat, periodic)
Initialise/readjust a simulation cell after hmat has been changed.
subroutine, public cell_create(cell, hmat, periodic, tag)
allocates and initializes a cell
Handles all functions related to the CELL.
subroutine, public cell_release(cell)
releases the given cell (see doc/ReferenceCounting.html)
Test of Clebsch-Gordon Coefficients.
subroutine, public clebsch_gordon_test()
...
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Interface to Minimax-Ewald method for periodic ERI's to be used in CP2K.
subroutine, public cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
...
Utility routines to open and close files. Tracking of preconnections.
subroutine, public open_file(file_name, file_status, file_form, file_action, file_position, file_pad, unit_number, debug, skip_get_unit_number, file_access)
Opens the requested file using a free unit number.
subroutine, public close_file(unit_number, file_status, keep_preconnection)
Close an open file given by its logical unit number. Optionally, keep the file and unit preconnected.
basic linear algebra operations for full matrices
subroutine, public cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, c_first_row)
computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
subroutine, public cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
Computes all eigenvalues and vectors of a real symmetric matrix significantly faster than syevx,...
subroutine, public cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack....
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
integer function, public cp_fm_pilaenv(ictxt, prec)
...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
subroutine, public init_input_type(input_settings, nsmax, rs_grid_section, ilevel, higher_grid_layout)
parses an input section to assign the proper values to the input type
subroutine, public dbm_run_tests(mp_group, io_unit, matrix_sizes, trs, bs_m, bs_n, bs_k, sparsities, alpha, beta, n_loops, eps, retain_sparsity, always_checksum)
Tests the DBM library.
Define type storing the global information of a run. Keep the amount of stored data small....
Defines the basic variable types.
integer, parameter, public dp
Performance tests for basic tasks like matrix multiplies, copy, fft.
subroutine, public lib_test(root_section, para_env, globenv)
Master routine for tests.
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Interface to the message passing library MPI.
Routines to calculate the minimax coefficients in order to approximate 1/x as a sum over exponential ...
subroutine, public validate_exp_minimax(n_R, iw)
Unit test checking that numerical error of minimax approximations generated using any k15 or k53 coef...
Routines to calculate frequency and time grids (integration points and weights) for correlation metho...
subroutine, public test_least_square_ft(nR, iw)
test the singular value decomposition for the computation of integration weights for the Fourier tran...
Interface to the message passing library MPI.
subroutine, public mpi_perf_test(comm, npow, output_unit)
Tests the MPI library.
basic linear algebra operations for full matrixes
Parallel (pseudo)random number generator (RNG) for multiple streams and substreams of random numbers.
integer, parameter, public uniform
integer, parameter, public halfspace
integer, parameter, public fullspace
This module defines the grid data type and some basic operations on it.
subroutine, public pw_grid_release(pw_grid)
releases the given pw grid
subroutine, public pw_grid_setup(cell_hmat, pw_grid, grid_span, cutoff, bounds, bounds_local, npts, spherical, odd, fft_usage, ncommensurate, icommensurate, blocked, ref_grid, rs_dims, iounit)
sets up a pw_grid
subroutine, public pw_grid_create(pw_grid, pe_group, local)
Initialize a PW grid with all defaults.
subroutine, public rs_grid_print(rs, iounit)
Print information on grids to output.
subroutine, public rs_grid_create(rs, desc)
...
subroutine, public rs_grid_create_descriptor(desc, pw_grid, input_settings, border_points)
Determine the setup of real space grids - this is divided up into the creation of a descriptor and th...
subroutine, public transfer_pw2rs(rs, pw)
...
subroutine, public rs_grid_release_descriptor(rs_desc)
releases the given rs grid descriptor (see doc/ReferenceCounting.html)
subroutine, public transfer_rs2pw(rs, pw)
...
subroutine, public rs_grid_release(rs_grid)
releases the given rs grid (see doc/ReferenceCounting.html)
subroutine, public rs_grid_zero(rs)
Initialize grid to zero.
Calculates 2-center integrals for different r12 operators comparing the Solid harmonic Gaussian integ...
subroutine, public shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
Unit test for performance and accuracy of the SHG integrals.